From a142d59637ce406d1d5648553571471a171e3cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=A4rtens?= Date: Tue, 27 Jul 2021 20:07:08 +0200 Subject: [PATCH] experiment with a git cache --- voxygen/i18n/src/bin/i18n-check.rs | 6 +- voxygen/i18n/src/gitfragments.rs | 172 +++++++++++++++++++++++++++++ voxygen/i18n/src/lib.rs | 6 +- voxygen/i18n/src/raw.rs | 45 ++++---- voxygen/i18n/src/verification.rs | 4 +- 5 files changed, 208 insertions(+), 25 deletions(-) create mode 100644 voxygen/i18n/src/gitfragments.rs diff --git a/voxygen/i18n/src/bin/i18n-check.rs b/voxygen/i18n/src/bin/i18n-check.rs index f3bdf0a490..174bb0b9d6 100644 --- a/voxygen/i18n/src/bin/i18n-check.rs +++ b/voxygen/i18n/src/bin/i18n-check.rs @@ -1,3 +1,4 @@ +/* use clap::{App, Arg}; use std::path::Path; use veloren_i18n::{analysis, verification}; @@ -60,4 +61,7 @@ fn main() { if matches.is_present("verify") { verification::verify_all_localizations(&root, &asset_path); } -} +}*/ + + +fn main() {} \ No newline at end of file diff --git a/voxygen/i18n/src/gitfragments.rs b/voxygen/i18n/src/gitfragments.rs new file mode 100644 index 0000000000..ac415e4ad8 --- /dev/null +++ b/voxygen/i18n/src/gitfragments.rs @@ -0,0 +1,172 @@ +//! fragment attached with git versioning information +use hashbrown::{HashMap}; +use std::path::{Path, PathBuf}; +use std::sync::RwLock; +use std::sync::Arc; +use crate::raw::{RawFragment}; + +struct GitCache<'a> { + pub root_dir: PathBuf, + pub blobs: RwLock>>>, + pub repo: git2::Repository, + //pub head_ref: git2::Reference<'a>, +} + +impl<'a> GitCache<'a> { + pub fn new(root_dir: &Path) -> Self { + let repo = git2::Repository::discover(&root_dir) + .unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir)); + //let head_ref = repo.head().expect("Impossible to get the HEAD reference"); + + let root_dir = root_dir.to_path_buf(); + let blobs = RwLock::new(HashMap::new()); + Self { + root_dir, + blobs, + repo, + //head_ref, + } + } + /// Returns the Git blob associated with the given reference and path + fn read_file_from_path( + &'a self, + reference: &git2::Reference, + path: &std::path::Path, + ) -> Arc> { + // return from cache + let lock = self.blobs.read().unwrap(); + if let Some(blob) = lock.get(path) { + return blob.clone(); + } + drop(lock); + // load file not in cache + let tree = reference + .peel_to_tree() + .expect("Impossible to peel HEAD to a tree object"); + let blob = Arc::new(tree.get_path(path) + .unwrap_or_else(|_| { + panic!( + "Impossible to find the file {:?} in reference {:?}", + path, + reference.name() + ) + }) + .to_object(&self.repo) + .unwrap() + .peel_to_blob() + .expect("Impossible to fetch the Git object")); + let mut lock = self.blobs.write().unwrap(); + let pathbuf = path.to_path_buf(); + lock.insert(pathbuf, blob.clone()); + blob + } +} + +/* +/// Extend a Fragment with historical git data +/// The actual translation gets dropped +fn generate_key_version<'a>( + repo: &'a GitCache, + path: &Path, + fragment: RawFragment, +) -> RawFragment { + let file_blob = repo.read_file_from_path(path); + // Find key start lines + let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); + let mut to_process: HashSet<&String> = localization.string_map.keys().collect(); + for (line_nb, line) in file_content.lines().enumerate() { + let mut found_key = None; + + for key in to_process.iter() { + if correspond(line, key) { + found_key = Some(key.to_owned()); + } + } + + if let Some(key) = found_key { + keys.get_mut(key).unwrap().key_line = Some(line_nb); + to_process.remove(key); + }; + } + + +}*/ + +/* + +fn generate_key_version<'a>( + repo: &'a git2::Repository, + fragment: &RawFragment, + path: &std::path::Path, + file_blob: &git2::Blob, +) -> HashMap { + let mut keys: HashMap = localization + .string_map + .keys() + .map(|k| (k.to_owned(), LocalizationEntryState::new())) + .collect(); + // Find key start lines + let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); + let mut to_process: HashSet<&String> = localization.string_map.keys().collect(); + for (line_nb, line) in file_content.lines().enumerate() { + let mut found_key = None; + + for key in to_process.iter() { + if correspond(line, key) { + found_key = Some(key.to_owned()); + } + } + + if let Some(key) = found_key { + keys.get_mut(key).unwrap().key_line = Some(line_nb); + to_process.remove(key); + }; + } + + let mut error_check_set: Vec = vec![]; + // Find commit for each keys + repo.blame_file(path, None) + .expect("Impossible to generate the Git blame") + .iter() + .for_each(|e: git2::BlameHunk| { + for (key, state) in keys.iter_mut() { + let line = match state.key_line { + Some(l) => l, + None => { + if !error_check_set.contains(key) { + eprintln!( + "Key {} does not have a git line in it's state! Skipping key.", + key + ); + error_check_set.push(key.clone()); + } + continue; + }, + }; + + if line + 1 >= e.final_start_line() + && line + 1 < e.final_start_line() + e.lines_in_hunk() + { + state.chuck_line_range = Some(( + e.final_start_line(), + e.final_start_line() + e.lines_in_hunk(), + )); + state.commit_id = match state.commit_id { + Some(existing_commit) => { + match repo.graph_descendant_of(e.final_commit_id(), existing_commit) { + Ok(true) => Some(e.final_commit_id()), + Ok(false) => Some(existing_commit), + Err(err) => panic!("{}", err), + } + }, + None => Some(e.final_commit_id()), + }; + } + } + }); + + keys +} + + + */ \ No newline at end of file diff --git a/voxygen/i18n/src/lib.rs b/voxygen/i18n/src/lib.rs index d0b9f85e8c..0c946db4c1 100644 --- a/voxygen/i18n/src/lib.rs +++ b/voxygen/i18n/src/lib.rs @@ -1,5 +1,7 @@ #[cfg(any(feature = "bin", test))] -pub mod analysis; +pub mod gitfragments; +//#[cfg(any(feature = "bin", test))] +//pub mod analysis; pub mod raw; pub mod verification; @@ -113,7 +115,7 @@ impl common_assets::Compound for Language { // inside the asked_localization let mut fragments = HashMap::new(); for fragment_asset in cache - .load_dir::(asset_key, true)? + .load_dir::>(asset_key, true)? .iter() { let read = fragment_asset.read(); diff --git a/voxygen/i18n/src/raw.rs b/voxygen/i18n/src/raw.rs index e04d64e706..318bd3bbcd 100644 --- a/voxygen/i18n/src/raw.rs +++ b/voxygen/i18n/src/raw.rs @@ -20,14 +20,14 @@ pub(crate) struct RawManifest { /// Raw localization data from one specific file /// These structs are meant to be merged into a Language #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -pub(crate) struct RawFragment { - pub(crate) string_map: HashMap, - pub(crate) vector_map: HashMap>, +pub(crate) struct RawFragment { + pub(crate) string_map: HashMap, + pub(crate) vector_map: HashMap>, } -pub(crate) struct RawLanguage { +pub(crate) struct RawLanguage { pub(crate) manifest: RawManifest, - pub(crate) fragments: HashMap, + pub(crate) fragments: HashMap>, } #[derive(Debug)] @@ -44,18 +44,26 @@ pub(crate) fn load_manifest(i18n_root_path: &Path, language_identifier: &str) -> } /// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`, `fr_FR` files -pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> Result { +pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> Result, common_assets::Error> { + let language_identifier = &manifest.metadata.language_identifier; + let fragments = recursive_load_raw_language(i18n_root_path, language_identifier, Path::new(""))?; + Ok(RawLanguage{ + manifest, + fragments, + }) +} + +fn recursive_load_raw_language(i18n_root_path: &Path, language_identifier: &str, subfolder: &Path) -> Result>, common_assets::Error> { // Walk through each file in the directory let mut fragments = HashMap::new(); - let language_identifier = &manifest.metadata.language_identifier; - let language_dir = i18n_root_path.join(language_identifier); - for fragment_file in language_dir.read_dir().unwrap().flatten() { + let search_dir = i18n_root_path.join(language_identifier).join(subfolder); + for fragment_file in search_dir.read_dir().unwrap().flatten() { let file_type = fragment_file.file_type()?; if file_type.is_dir() { - // TODO: recursive - continue; - } - if file_type.is_file() { + let full_path = fragment_file.path(); + let relative_path = full_path.strip_prefix(&search_dir).unwrap(); + fragments.extend(recursive_load_raw_language(i18n_root_path, language_identifier, relative_path)?); + } else if file_type.is_file() { let full_path = fragment_file.path(); let relative_path = full_path.strip_prefix(&i18n_root_path).unwrap(); let f = fs::File::open(&full_path)?; @@ -63,14 +71,11 @@ pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> fragments.insert(relative_path.to_path_buf(), fragment); } } - Ok(RawLanguage{ - manifest, - fragments, - }) + Ok(fragments) } -impl From for Language { - fn from(raw: RawLanguage) -> Self { +impl From> for Language { + fn from(raw: RawLanguage) -> Self { let mut string_map = HashMap::new(); let mut vector_map = HashMap::new(); @@ -129,7 +134,7 @@ impl common_assets::Asset for RawManifest { const EXTENSION: &'static str = LANG_EXTENSION; } -impl common_assets::Asset for RawFragment { +impl common_assets::Asset for RawFragment { type Loader = common_assets::RonLoader; const EXTENSION: &'static str = LANG_EXTENSION; diff --git a/voxygen/i18n/src/verification.rs b/voxygen/i18n/src/verification.rs index 9a62974abc..89d8146757 100644 --- a/voxygen/i18n/src/verification.rs +++ b/voxygen/i18n/src/verification.rs @@ -31,8 +31,8 @@ pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) { folder is empty?" ); for i18n_directory in i18n_directories { - let display_language_identifier = i18n_directory.strip_prefix(&root_dir).unwrap().as_os_str().to_str().unwrap(); - let language_identifier = i18n_directory.strip_prefix(&i18n_root_path).unwrap().as_os_str().to_str().unwrap(); + let display_language_identifier = i18n_directory.strip_prefix(&root_dir).unwrap().to_str().unwrap(); + let language_identifier = i18n_directory.strip_prefix(&i18n_root_path).unwrap().to_str().unwrap(); println!( "verifying {:?}", display_language_identifier