From f626d27955790a8be2a0b07e05f2d60f53f8fad8 Mon Sep 17 00:00:00 2001 From: Illia Denysenko Date: Mon, 10 May 2021 22:13:00 +0000 Subject: [PATCH] Refactor i18n analysis code and improve UX of testing binary --- Cargo.lock | 1 + voxygen/i18n/Cargo.toml | 17 +- voxygen/i18n/README.md | 12 +- voxygen/i18n/src/analysis.rs | 611 ++++++++++++----------------- voxygen/i18n/src/bin/i18n-check.rs | 59 ++- voxygen/i18n/src/data.rs | 87 ++-- voxygen/i18n/src/lib.rs | 2 + voxygen/i18n/src/verification.rs | 88 +++++ 8 files changed, 451 insertions(+), 426 deletions(-) create mode 100644 voxygen/i18n/src/verification.rs diff --git a/Cargo.lock b/Cargo.lock index 50e3df519a..1ff27e939b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5644,6 +5644,7 @@ dependencies = [ name = "veloren-i18n" version = "0.9.0" dependencies = [ + "clap", "deunicode", "git2", "hashbrown", diff --git a/voxygen/i18n/Cargo.toml b/voxygen/i18n/Cargo.toml index d330a5b3a0..03e3efaa11 100644 --- a/voxygen/i18n/Cargo.toml +++ b/voxygen/i18n/Cargo.toml @@ -7,6 +7,7 @@ version = "0.9.0" [[bin]] name = "i18n-check" +required-features = ["bin"] [dependencies] # Assets @@ -14,8 +15,16 @@ hashbrown = { version = "0.9", features = ["serde", "nightly"] } common-assets = {package = "veloren-common-assets", path = "../../common/assets"} deunicode = "1.0" serde = { version = "1.0", features = ["derive"] } -# Diagnostic -git2 = { version = "0.12", default-features = false } -ron = "0.6" - tracing = "0.1" +# Diagnostic +ron = "0.6" +git2 = { version = "0.12", default-features = false, optional = true } + +# Binary +clap = { version = "2.33", features = ["suggestions"], default-features = false, optional = true } + +[dev-dependencies] +git2 = { version = "0.12", default-features = false } + +[features] +bin = ["git2", "clap"] diff --git a/voxygen/i18n/README.md b/voxygen/i18n/README.md index 06704a4f36..4353f88c95 100644 --- a/voxygen/i18n/README.md +++ b/voxygen/i18n/README.md @@ -1,8 +1,6 @@ # Usage -Get diagnostic for specific language
-`$ cargo run --bin i18n-check -- --lang `
-Test all languages
-`$ cargo run --bin i18n-check -- --all` -Verify all directories
-`$ cargo run --bin i18n-check -- --verify` - +`$ cargo run --features=bin -- --help`
+(Or if somewhere else in the repo)
+`$ cargo run -p veloren-i18n --features=bin -- --help`
+For example, diagnostic for specific language
+`$ cargo run -p veloren-i18n --features=bin -- `
diff --git a/voxygen/i18n/src/analysis.rs b/voxygen/i18n/src/analysis.rs index d7cdf7656d..13344b9c30 100644 --- a/voxygen/i18n/src/analysis.rs +++ b/voxygen/i18n/src/analysis.rs @@ -1,136 +1,12 @@ -use ron::de::{from_bytes, from_reader}; -use serde::{Deserialize, Serialize}; -use std::{ - fs, - path::{Path, PathBuf}, -}; +use ron::de::from_bytes; +use std::path::{Path, PathBuf}; +use crate::data::{ + i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG, +}; use hashbrown::{HashMap, HashSet}; -/// The reference language, aka the more up-to-date localization data. Also the -/// default language at first startup. -const REFERENCE_LANG: &str = "en"; - -const LANG_MANIFEST_FILE: &str = "_manifest"; - -/// How a language can be described -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -struct LanguageMetadata { - /// A human friendly language name (e.g. "English (US)") - language_name: String, - - /// A short text identifier for this language (e.g. "en_US") - /// - /// On the opposite of `language_name` that can change freely, - /// `language_identifier` value shall be stable in time as it - /// is used by setting components to store the language - /// selected by the user. - language_identifier: String, -} - -/// Store font metadata -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -struct Font { - /// Key to retrieve the font in the asset system - asset_key: String, - - /// Scale ratio to resize the UI text dynamicly - scale_ratio: f32, -} - -/// Store font metadata -type Fonts = HashMap; - -/// Raw localization data, expect the strings to not be loaded here -/// However, metadata informations are correct -/// See `Localization` for more info on each attributes -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -struct RawLocalization { - sub_directories: Vec, - string_map: HashMap, - vector_map: HashMap>, - convert_utf8_to_ascii: bool, - fonts: Fonts, - metadata: LanguageMetadata, -} - -/// Store internationalization data -#[derive(Debug, PartialEq, Serialize, Deserialize)] -struct Localization { - /// A list of subdirectories to lookup for localization files - sub_directories: Vec, - - /// A map storing the localized texts - /// - /// Localized content can be accessed using a String key. - string_map: HashMap, - - /// A map for storing variations of localized texts, for example multiple - /// ways of saying "Help, I'm under attack". Used primarily for npc - /// dialogue. - vector_map: HashMap>, - - /// Whether to convert the input text encoded in UTF-8 - /// into a ASCII version by using the `deunicode` crate. - convert_utf8_to_ascii: bool, - - /// Font configuration is stored here - fonts: Fonts, - - metadata: LanguageMetadata, -} - -/// Store internationalization maps -/// These structs are meant to be merged into a Localization -#[derive(Debug, PartialEq, Serialize, Deserialize)] -struct LocalizationFragment { - /// A map storing the localized texts - /// - /// Localized content can be accessed using a String key. - string_map: HashMap, - - /// A map for storing variations of localized texts, for example multiple - /// ways of saying "Help, I'm under attack". Used primarily for npc - /// dialogue. - vector_map: HashMap>, -} - -impl Localization {} - -impl From for Localization { - fn from(raw: RawLocalization) -> Self { - Self { - sub_directories: raw.sub_directories, - string_map: raw.string_map, - vector_map: raw.vector_map, - convert_utf8_to_ascii: raw.convert_utf8_to_ascii, - fonts: raw.fonts, - metadata: raw.metadata, - } - } -} -impl From for LocalizationFragment { - fn from(raw: RawLocalization) -> Self { - Self { - string_map: raw.string_map, - vector_map: raw.vector_map, - } - } -} - -#[derive(Clone, Debug)] -struct LocalizationList(Vec); - -/// List localization directories as a PathBuf vector -fn i18n_directories(i18n_dir: &Path) -> Vec { - fs::read_dir(i18n_dir) - .unwrap() - .map(|res| res.map(|e| e.path()).unwrap()) - .filter(|e| e.is_dir()) - .collect() -} - -#[derive(Eq, Hash, Debug, PartialEq)] +#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)] enum LocalizationState { UpToDate, NotFound, @@ -140,7 +16,7 @@ enum LocalizationState { } #[derive(Debug, PartialEq)] -struct FindLocalization { +struct LocalizationStats { uptodate_entries: usize, outdated_entries: usize, unused_entries: usize, @@ -149,7 +25,61 @@ struct FindLocalization { real_entry_count: usize, } -#[derive(Debug)] +#[derive(Default)] +struct LocalizationAnalysis { + notfound: Vec<(String, Option)>, + unused: Vec<(String, Option)>, + outdated: Vec<(String, Option)>, + unknown: Vec<(String, Option)>, +} + +impl LocalizationAnalysis { + fn get_mut( + &mut self, + state: LocalizationState, + ) -> Option<&mut Vec<(String, Option)>> { + match state { + LocalizationState::NotFound => Some(&mut self.notfound), + LocalizationState::Unused => Some(&mut self.unused), + LocalizationState::Outdated => Some(&mut self.outdated), + LocalizationState::Unknown => Some(&mut self.unknown), + _ => None, + } + } + + fn show( + &mut self, + state: LocalizationState, + be_verbose: bool, + ref_i18n_map: &HashMap, + ) { + let entries = self + .get_mut(state) + .unwrap_or_else(|| panic!("called on invalid state: {:?}", state)); + if entries.is_empty() { + return; + } + println!("\n\t[{:?}]", state); + entries.sort(); + for (key, commit_id) in entries { + if be_verbose { + let our_commit = commit_id + .map(|s| format!("{}", s)) + .unwrap_or_else(|| "None".to_owned()); + let ref_commit = ref_i18n_map + .get(key) + .and_then(|s| s.commit_id) + .map(|s| format!("{}", s)) + .unwrap_or_else(|| "None".to_owned()); + println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,); + } else { + println!("{}", key); + } + } + } +} + +#[derive(Copy, Clone, Debug)] struct LocalizationEntryState { key_line: Option, chuck_line_range: Option<(usize, usize)>, @@ -220,11 +150,9 @@ fn generate_key_version<'a>( .keys() .map(|k| (k.to_owned(), LocalizationEntryState::new())) .collect(); - let mut to_process: HashSet<&String> = localization.string_map.keys().collect(); // Find key start lines let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); - - // Make the file hot + let mut to_process: HashSet<&String> = localization.string_map.keys().collect(); for (line_nb, line) in file_content.lines().enumerate() { let mut found_key = None; @@ -290,12 +218,12 @@ fn complete_key_versions<'a>( head_ref: &git2::Reference, i18n_key_versions: &mut HashMap, root_dir: &Path, - asset_path: &Path, + lang_dir: &Path, ) { //TODO: review unwraps in this file // For each file (if it's not a directory) in directory - for i18n_file in root_dir.join(&asset_path).read_dir().unwrap().flatten() { + for i18n_file in root_dir.join(&lang_dir).read_dir().unwrap().flatten() { if let Ok(file_type) = i18n_file.file_type() { if file_type.is_file() { println!("-> {:?}", i18n_file.file_name()); @@ -320,106 +248,54 @@ fn complete_key_versions<'a>( } } -fn verify_localization_directory(root_dir: &Path, directory_path: &Path) { - // Walk through each file in the directory - for i18n_file in root_dir.join(&directory_path).read_dir().unwrap().flatten() { - if let Ok(file_type) = i18n_file.file_type() { - // Skip folders and the manifest file (which does not contain the same struct we - // want to load) - if file_type.is_file() - && i18n_file.file_name().to_string_lossy() - != (LANG_MANIFEST_FILE.to_string() + ".ron") - { - let full_path = i18n_file.path(); - println!("-> {:?}", full_path.strip_prefix(&root_dir).unwrap()); - let f = fs::File::open(&full_path).expect("Failed opening file"); - let _: LocalizationFragment = match from_reader(f) { - Ok(v) => v, - Err(e) => { - panic!( - "Could not parse {} RON file, error: {}", - full_path.to_string_lossy(), - e - ); - }, - }; - } - } - } -} +fn gather_state( + loc: &RawLocalization, + i18n_blob: &git2::Blob, + ref_manifest: &Path, + root_dir: &Path, + lang_dir: &Path, + repo: &git2::Repository, + head_ref: &git2::Reference, +) -> HashMap { + // Generate map + let mut i18n_map = generate_key_version( + repo, + &LocalizationFragment::from(loc.clone()), + ref_manifest, + i18n_blob, + ); -/// Test to verify all languages that they are VALID and loadable, without -/// need of git just on the local assets folder -/// `root_dir` - absolute path to main repo -/// `asset_path` - relative path to asset directory (right now it is -/// 'assets/voxygen/i18n') -pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) { - let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG); - let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron"); - assert!( - root_dir.join(&ref_i18n_dir_path).is_dir(), - "Reference language folder doesn't exist, something is wrong!" - ); - assert!( - root_dir.join(&ref_i18n_path).is_file(), - "Reference language manifest file doesn't exist, something is wrong!" - ); - let i18n_directories = i18n_directories(&root_dir.join(asset_path)); - // This simple check ONLY guarantees that an arbitrary minimum of translation - // files exists. It's just to notice unintentional deletion of all - // files, or modifying the paths. In case you want to delete all - // language you have to adjust this number: - assert!( - i18n_directories.len() > 5, - "have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \ - folder is empty?" - ); - for i18n_directory in i18n_directories { - // Attempt to load the manifest file - let manifest_path = i18n_directory.join(LANG_MANIFEST_FILE.to_string() + ".ron"); - println!( - "verifying {:?}", - manifest_path.strip_prefix(&root_dir).unwrap() - ); - let f = fs::File::open(&manifest_path).expect("Failed opening file"); - let raw_localization: RawLocalization = match from_reader(f) { - Ok(v) => v, - Err(e) => { - panic!( - "Could not parse {} RON file, error: {}", - i18n_directory.to_string_lossy(), - e - ); - }, - }; - // Walk through each files and try to load them - verify_localization_directory(root_dir, &i18n_directory); - // Walk through each subdirectories and try to load files in them - for sub_directory in raw_localization.sub_directories.iter() { - let subdir_path = &i18n_directory.join(sub_directory); - verify_localization_directory(root_dir, &subdir_path); - } + // Gathering info about keys from language + complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, lang_dir); + + // read HEAD for the subfolders + for sub_directory in loc.sub_directories.iter() { + let subdir_path = &lang_dir.join(sub_directory); + complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, subdir_path); } + + i18n_map } // Helper function to test localization directory -// `asset_path` - path to localization directory. Relative from root of the +// `lang_dir` - path to localization directory. Relative from root of the // repo. // `root_dir` - absolute path to repo -// `ref_i18n_path` - path to reference manifest +// `ref_manifest` - path to reference manifest // `i18n_references` - keys from reference language // `repo` - git object for main repo // `head_ref` - HEAD fn test_localization_directory( - asset_path: &Path, + lang_dir: &Path, root_dir: &Path, - ref_i18n_path: &Path, + ref_manifest: &Path, i18n_references: &HashMap, + be_verbose: bool, repo: &git2::Repository, head_ref: &git2::Reference, -) -> Option { - let relfile = asset_path.join(&(LANG_MANIFEST_FILE.to_string() + ".ron")); - if relfile == ref_i18n_path { +) -> Option { + let relfile = lang_dir.join(&(LANG_MANIFEST_FILE.to_string() + ".ron")); + if relfile == ref_manifest { return None; } println!("\n-----------------------------------"); @@ -439,20 +315,40 @@ fn test_localization_directory( return None; }, }; - let mut current_i18n = generate_key_version( - &repo, - &LocalizationFragment::from(current_loc.clone()), - &relfile, - ¤t_blob, - ); - // read HEAD for the fragment files - complete_key_versions(&repo, &head_ref, &mut current_i18n, root_dir, &asset_path); - // read HEAD for the subfolders - for sub_directory in current_loc.sub_directories.iter() { - let subdir_path = &asset_path.join(sub_directory); - complete_key_versions(&repo, &head_ref, &mut current_i18n, root_dir, &subdir_path); - } + // Gather state of current localization + let mut current_i18n = gather_state( + ¤t_loc, + ¤t_blob, + ref_manifest, + root_dir, + lang_dir, + repo, + head_ref, + ); + + // Comparing with reference localization + fill_info(&mut current_i18n, &i18n_references, repo, &relfile); + + let mut state_map = LocalizationAnalysis::default(); + let result = gather_results(current_i18n, &mut state_map); + print_translation_stats( + &i18n_references, + &result, + &mut state_map, + be_verbose, + relfile, + ref_manifest, + ); + Some(result) +} + +fn fill_info( + current_i18n: &mut HashMap, + i18n_references: &HashMap, + repo: &git2::Repository, + relfile: &Path, +) { for (ref_key, ref_state) in i18n_references.iter() { match current_i18n.get_mut(ref_key) { Some(state) => { @@ -505,109 +401,111 @@ fn test_localization_directory( { state.state = LocalizationState::Unused; } +} - let keys: Vec<&String> = current_i18n.keys().collect(); - let mut state_map: HashMap)>> = - HashMap::new(); - state_map.insert(LocalizationState::Outdated, Vec::new()); - state_map.insert(LocalizationState::NotFound, Vec::new()); - state_map.insert(LocalizationState::Unknown, Vec::new()); - state_map.insert(LocalizationState::Unused, Vec::new()); - - let current_i18n_entry_count = current_i18n.len(); +fn gather_results( + current_i18n: HashMap, + state_map: &mut LocalizationAnalysis, +) -> LocalizationStats { let mut uptodate_entries = 0; let mut outdated_entries = 0; let mut unused_entries = 0; let mut notfound_entries = 0; let mut unknown_entries = 0; + let keys: Vec<&String> = current_i18n.keys().collect(); for key in keys { let entry = current_i18n.get(key).unwrap(); + match entry.state { + LocalizationState::Outdated => outdated_entries += 1, + LocalizationState::NotFound => notfound_entries += 1, + LocalizationState::Unknown => unknown_entries += 1, + LocalizationState::Unused => unused_entries += 1, + LocalizationState::UpToDate => uptodate_entries += 1, + }; if entry.state != LocalizationState::UpToDate { let state_keys = state_map - .get_mut(&entry.state) + .get_mut(entry.state) .expect("vectors must be added"); - state_keys.push((key, entry.commit_id)); - match entry.state { - LocalizationState::Outdated => outdated_entries += 1, - LocalizationState::NotFound => notfound_entries += 1, - LocalizationState::Unknown => unknown_entries += 1, - LocalizationState::Unused => unused_entries += 1, - LocalizationState::UpToDate => unreachable!(), - }; - } else { - uptodate_entries += 1; + state_keys.push((key.to_owned(), entry.commit_id)); } } - // Display - println!( - "\n{:60}| {:40} | {:40}\n", - "Key name", - relfile.to_str().unwrap(), - ref_i18n_path.to_str().unwrap() - ); - - for (state, mut lines) in state_map { - if lines.is_empty() { - continue; - } - println!("\n\t[{:?}]", state); - lines.sort(); - for line in lines { - println!( - "{:60}| {:40} | {:40}", - line.0, - line.1 - .map(|s| format!("{}", s)) - .unwrap_or_else(|| "None".to_string()), - i18n_references - .get(line.0) - .map(|s| s.commit_id) - .flatten() - .map(|s| format!("{}", s)) - .unwrap_or_else(|| "None".to_string()), - ); - } - } - - println!( - "\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries", - uptodate_entries, outdated_entries, unused_entries, notfound_entries, unknown_entries - ); - // Calculate key count that actually matter for the status of the translation // Unused entries don't break the game + let current_i18n_entry_count = current_i18n.len(); let real_entry_count = current_i18n_entry_count - unused_entries; - let uptodate_percent = (uptodate_entries as f32 / real_entry_count as f32) * 100_f32; - let outdated_percent = (outdated_entries as f32 / real_entry_count as f32) * 100_f32; - let untranslated_percent = - ((notfound_entries + unknown_entries) as f32 / real_entry_count as f32) * 100_f32; - println!( - "{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n", - uptodate_percent, outdated_percent, untranslated_percent, - ); - - let result = FindLocalization { + LocalizationStats { uptodate_entries, unused_entries, outdated_entries, notfound_entries, errors: unknown_entries, real_entry_count, - }; - Some(result) + } +} + +fn print_translation_stats( + ref_i18n_map: &HashMap, + stats: &LocalizationStats, + state_map: &mut LocalizationAnalysis, + be_verbose: bool, + relfile: PathBuf, + ref_manifest: &Path, +) { + let uptodate_percent = + (stats.uptodate_entries as f32 / stats.real_entry_count as f32) * 100_f32; + let outdated_percent = + (stats.outdated_entries as f32 / stats.real_entry_count as f32) * 100_f32; + let untranslated_percent = + ((stats.errors + stats.errors) as f32 / stats.real_entry_count as f32) * 100_f32; + + // Display + if be_verbose { + println!( + "\n{:60}| {:40} | {:40}", + "Key name", + relfile.to_str().unwrap(), + ref_manifest.to_str().unwrap(), + ); + } else { + println!("\nKey name"); + } + + state_map.show(LocalizationState::NotFound, be_verbose, ref_i18n_map); + state_map.show(LocalizationState::Unused, be_verbose, ref_i18n_map); + state_map.show(LocalizationState::Outdated, be_verbose, ref_i18n_map); + state_map.show(LocalizationState::Unknown, be_verbose, ref_i18n_map); + + println!( + "\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries", + stats.uptodate_entries, + stats.outdated_entries, + stats.unused_entries, + stats.notfound_entries, + stats.errors, + ); + + println!( + "{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n", + uptodate_percent, outdated_percent, untranslated_percent, + ); } /// Test one language /// `code` - name of the directory in assets (de_DE for example) /// `root_dir` - absolute path to main repo -/// `asset_path` - relative path to asset directory (right now it is +/// `assets_path` - relative path to asset directory (right now it is /// 'assets/voxygen/i18n') -pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Path) { +pub fn test_specific_localization( + code: &str, + root_dir: &Path, + assets_path: &Path, + be_verbose: bool, +) { // Relative paths from root of repo to assets - let ref_lang_dir = asset_path.join(REFERENCE_LANG); + let ref_lang_dir = assets_path.join(REFERENCE_LANG); let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron"); // Initialize Git objects @@ -619,39 +517,26 @@ pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Pa let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest); let loc: RawLocalization = from_bytes(ref_manifest_blob.content()) .expect("Expect to parse reference i18n RON file, can't proceed without it"); - let mut i18n_references: HashMap = generate_key_version( - &repo, - &LocalizationFragment::from(loc.clone()), - &ref_manifest, - &ref_manifest_blob, - ); // Gathering info about keys from reference language - complete_key_versions( - &repo, - &head_ref, - &mut i18n_references, + let reference_i18n = gather_state( + &loc, + &ref_manifest_blob, + &ref_manifest, root_dir, &ref_lang_dir, + &repo, + &head_ref, ); - for sub_directory in loc.sub_directories.iter() { - let subdir_path = &ref_lang_dir.join(sub_directory); - complete_key_versions( - &repo, - &head_ref, - &mut i18n_references, - root_dir, - &subdir_path, - ); - } // Testing how specific language is localized - let dir = asset_path.join(code); + let dir = assets_path.join(code); test_localization_directory( &dir, root_dir, &ref_manifest, - &i18n_references, + &reference_i18n, + be_verbose, &repo, &head_ref, ); @@ -659,20 +544,17 @@ pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Pa /// Test all localizations /// `root_dir` - absolute path to main repo -/// `asset_path` - relative path to asset directory (right now it is +/// `assets_path` - relative path to asset directory (right now it is /// 'assets/voxygen/i18n') -pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) { - let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG); - let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron"); +pub fn test_all_localizations(root_dir: &Path, assets_path: &Path, be_verbose: bool) { + let ref_lang_dir = assets_path.join(REFERENCE_LANG); + let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron"); - if !root_dir.join(&ref_i18n_dir_path).is_dir() { - panic!( - "Reference language folder not found {:?}", - &ref_i18n_dir_path - ) + if !root_dir.join(&ref_lang_dir).is_dir() { + panic!("Reference language folder not found {:?}", &ref_lang_dir) } - if !root_dir.join(&ref_i18n_path).is_file() { - panic!("Reference language file not found {:?}", &ref_i18n_path) + if !root_dir.join(&ref_manifest).is_file() { + panic!("Reference language file not found {:?}", &ref_manifest) } // Initialize Git objects @@ -681,46 +563,32 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) { let head_ref = repo.head().expect("Impossible to get the HEAD reference"); // Read HEAD for the reference language file - let i18n_ref_blob = read_file_from_path(&repo, &head_ref, &ref_i18n_path); - let loc: RawLocalization = from_bytes(i18n_ref_blob.content()) + let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest); + let loc: RawLocalization = from_bytes(ref_manifest_blob.content()) .expect("Expect to parse reference i18n RON file, can't proceed without it"); - let mut i18n_references: HashMap = generate_key_version( - &repo, - &LocalizationFragment::from(loc.clone()), - &ref_i18n_path, - &i18n_ref_blob, - ); // Gathering info about keys from reference language - complete_key_versions( + let reference_i18n = gather_state( + &loc, + &ref_manifest_blob, + &ref_manifest, + root_dir, + &ref_lang_dir, &repo, &head_ref, - &mut i18n_references, - root_dir, - &ref_i18n_dir_path, ); - // read HEAD for the subfolders - for sub_directory in loc.sub_directories.iter() { - let subdir_path = &ref_i18n_dir_path.join(sub_directory); - complete_key_versions( - &repo, - &head_ref, - &mut i18n_references, - root_dir, - &subdir_path, - ); - } // Compare to other reference files - let i18n_directories = i18n_directories(&root_dir.join(asset_path)); - let mut i18n_entry_counts: HashMap = HashMap::new(); + let i18n_directories = i18n_directories(&root_dir.join(assets_path)); + let mut i18n_entry_counts: HashMap = HashMap::new(); for dir in &i18n_directories { let rel_dir = dir.strip_prefix(root_dir).unwrap(); let result = test_localization_directory( rel_dir, root_dir, - &ref_i18n_path, - &i18n_references, + &ref_manifest, + &reference_i18n, + be_verbose, &repo, &head_ref, ); @@ -729,6 +597,10 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) { } } + print_overall_stats(i18n_entry_counts); +} + +fn print_overall_stats(i18n_entry_counts: HashMap) { let mut overall_uptodate_entry_count = 0; let mut overall_outdated_entry_count = 0; let mut overall_untranslated_entry_count = 0; @@ -742,8 +614,11 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) { "", "up-to-date", "outdated", "untranslated", "unused", "errors", ); - for (path, test_result) in i18n_entry_counts { - let FindLocalization { + let mut i18n_stats: Vec<(&PathBuf, &LocalizationStats)> = i18n_entry_counts.iter().collect(); + i18n_stats.sort_by_key(|(_, result)| result.notfound_entries); + + for (path, test_result) in i18n_stats { + let LocalizationStats { uptodate_entries: uptodate, outdated_entries: outdated, unused_entries: unused, diff --git a/voxygen/i18n/src/bin/i18n-check.rs b/voxygen/i18n/src/bin/i18n-check.rs index 8f225f47ff..465bfd4586 100644 --- a/voxygen/i18n/src/bin/i18n-check.rs +++ b/voxygen/i18n/src/bin/i18n-check.rs @@ -1,22 +1,51 @@ -use std::{env::args, path::Path, vec::Vec}; -use veloren_i18n::analysis; +use clap::{App, Arg}; +use std::path::Path; +use veloren_i18n::{analysis, verification}; fn main() { - let cli: Vec = args().collect(); + let matches = App::new("i18n-check") + .version("0.1.0") + .author("juliancoffee ") + .about("Test veloren localizations") + .arg( + Arg::with_name("CODE") + .required(false) + .help("Run diagnostic for specific language code (de_DE as example)"), + ) + .arg( + Arg::with_name("verify") + .long("verify") + .help("verify all localizations"), + ) + .arg( + Arg::with_name("test") + .long("test") + .help("test all localizations"), + ) + .arg( + Arg::with_name("verbose") + .short("v") + .long("verbose") + .help("print additional information"), + ) + .get_matches(); // Generate paths - let curr_dir = std::env::current_dir().unwrap(); - let root = curr_dir.parent().unwrap().parent().unwrap(); + let root = veloren_i18n::find_root().expect("Failed to find root of repository"); let asset_path = Path::new("assets/voxygen/i18n/"); - for (i, arg) in cli.iter().enumerate() { - match arg.as_str() { - "--all" => analysis::test_all_localizations(root, asset_path), - "--verify" => analysis::verify_all_localizations(root, asset_path), - "--lang" => { - let code = cli[i + 1].clone(); - analysis::test_specific_localization(code, root, asset_path); - }, - _ => continue, - } + + if let Some(code) = matches.value_of("CODE") { + analysis::test_specific_localization( + code, + &root, + &asset_path, + matches.is_present("verbose"), + ); + } + if matches.is_present("test") { + analysis::test_all_localizations(&root, &asset_path, matches.is_present("verbose")); + } + if matches.is_present("verify") { + verification::verify_all_localizations(&root, &asset_path); } } diff --git a/voxygen/i18n/src/data.rs b/voxygen/i18n/src/data.rs index f86e45e649..9bf4de78d0 100644 --- a/voxygen/i18n/src/data.rs +++ b/voxygen/i18n/src/data.rs @@ -2,6 +2,10 @@ use crate::assets::{self, AssetExt, AssetGuard, AssetHandle}; use deunicode::deunicode; use hashbrown::{HashMap, HashSet}; use serde::{Deserialize, Serialize}; +use std::{ + fs, + path::{Path, PathBuf}, +}; use tracing::warn; /// The reference language, aka the more up-to-date localization data. @@ -49,11 +53,11 @@ pub type Fonts = HashMap; #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] pub(crate) struct RawLocalization { pub(crate) sub_directories: Vec, - pub(crate) string_map: HashMap, - pub(crate) vector_map: HashMap>, pub(crate) convert_utf8_to_ascii: bool, pub(crate) fonts: Fonts, pub(crate) metadata: LanguageMetadata, + pub(crate) string_map: HashMap, + pub(crate) vector_map: HashMap>, } /// Store internationalization data @@ -85,7 +89,7 @@ struct Language { /// Store internationalization maps /// These structs are meant to be merged into a Language #[derive(Debug, PartialEq, Serialize, Deserialize)] -struct LocalizationFragment { +pub(crate) struct LocalizationFragment { /// A map storing the localized texts /// /// Localized content can be accessed using a String key. @@ -100,7 +104,7 @@ struct LocalizationFragment { impl Language { /// Get a localized text from the given key pub fn get<'a>(&'a self, key: &'a str) -> Option<&str> { - self.string_map.get(key).map(|s| s.as_str()) + self.string_map.get(key).map(String::as_str) } /// Get a variation of localized text from the given key @@ -110,16 +114,13 @@ impl Language { /// If the key is not present in the localization object /// then the key is returned. pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> Option<&str> { - self.vector_map - .get(key) - .map(|v| { - if !v.is_empty() { - Some(v[index as usize % v.len()].as_str()) - } else { - None - } - }) - .flatten() + self.vector_map.get(key).and_then(|v| { + if v.is_empty() { + None + } else { + Some(v[index as usize % v.len()].as_str()) + } + }) } } @@ -247,8 +248,7 @@ impl LocalizationGuard { self.active.get(key).unwrap_or_else(|| { self.fallback .as_ref() - .map(|f| f.get(key)) - .flatten() + .and_then(|f| f.get(key)) .unwrap_or(key) }) } @@ -263,8 +263,7 @@ impl LocalizationGuard { self.active.get_variation(key, index).unwrap_or_else(|| { self.fallback .as_ref() - .map(|f| f.get_variation(key, index)) - .flatten() + .and_then(|f| f.get_variation(key, index)) .unwrap_or(key) }) } @@ -389,42 +388,66 @@ pub fn list_localizations() -> Vec { /// Start hot reloading of i18n assets pub fn start_hot_reloading() { assets::start_hot_reloading(); } +/// Return path to repository by searching 10 directories back +pub fn find_root() -> Option { + std::env::current_dir().map_or(None, |path| { + // If we are in the root, push path + if path.join(".git").is_dir() { + return Some(path); + } + // Search .git directory in parent directries + for ancestor in path.ancestors().take(10) { + if ancestor.join(".git").is_dir() { + return Some(ancestor.to_path_buf()); + } + } + None + }) +} + +/// List localization directories as a `PathBuf` vector +pub fn i18n_directories(i18n_dir: &Path) -> Vec { + fs::read_dir(i18n_dir) + .unwrap() + .map(|res| res.map(|e| e.path()).unwrap()) + .filter(|e| e.is_dir()) + .collect() +} + #[cfg(test)] mod tests { - use super::*; - use crate::analysis; - use std::path::Path; - // Test that localization list is loaded (not empty) #[test] fn test_localization_list() { - let list = list_localizations(); + let list = super::list_localizations(); assert!(!list.is_empty()); } // Test that reference language can be loaded #[test] - fn test_localization_handle() { let _ = LocalizationHandle::load_expect(REFERENCE_LANG); } + fn test_localization_handle() { + let _ = super::LocalizationHandle::load_expect(super::REFERENCE_LANG); + } // Test to verify all languages that they are VALID and loadable, without // need of git just on the local assets folder #[test] fn verify_all_localizations() { // Generate paths - let i18n_asset_path = Path::new("assets/voxygen/i18n/"); - let curr_dir = std::env::current_dir().unwrap(); - let root_dir = curr_dir.parent().unwrap().parent().unwrap(); - analysis::verify_all_localizations(&root_dir, &i18n_asset_path); + let i18n_asset_path = std::path::Path::new("assets/voxygen/i18n/"); + let root_dir = super::find_root().expect("Failed to discover repository root"); + crate::verification::verify_all_localizations(&root_dir, &i18n_asset_path); } // Test to verify all languages and print missing and faulty localisation #[test] #[ignore] fn test_all_localizations() { + // Options + let be_verbose = true; // Generate paths - let i18n_asset_path = Path::new("assets/voxygen/i18n/"); - let curr_dir = std::env::current_dir().unwrap(); - let root_dir = curr_dir.parent().unwrap().parent().unwrap(); - analysis::test_all_localizations(&root_dir, &i18n_asset_path); + let i18n_asset_path = std::path::Path::new("assets/voxygen/i18n/"); + let root_dir = super::find_root().expect("Failed to discover repository root"); + crate::analysis::test_all_localizations(&root_dir, &i18n_asset_path, be_verbose); } } diff --git a/voxygen/i18n/src/lib.rs b/voxygen/i18n/src/lib.rs index 9377f81aba..1d8462e15d 100644 --- a/voxygen/i18n/src/lib.rs +++ b/voxygen/i18n/src/lib.rs @@ -1,5 +1,7 @@ +#[cfg(any(feature = "bin", test))] pub mod analysis; mod data; +pub mod verification; use common_assets as assets; pub use data::*; diff --git a/voxygen/i18n/src/verification.rs b/voxygen/i18n/src/verification.rs new file mode 100644 index 0000000000..066ee51980 --- /dev/null +++ b/voxygen/i18n/src/verification.rs @@ -0,0 +1,88 @@ +use ron::de::from_reader; +use std::{fs, path::Path}; + +use crate::data::{ + i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG, +}; + +fn verify_localization_directory(root_dir: &Path, directory_path: &Path) { + // Walk through each file in the directory + for i18n_file in root_dir.join(&directory_path).read_dir().unwrap().flatten() { + if let Ok(file_type) = i18n_file.file_type() { + // Skip folders and the manifest file (which does not contain the same struct we + // want to load) + if file_type.is_file() + && i18n_file.file_name().to_string_lossy() + != (LANG_MANIFEST_FILE.to_string() + ".ron") + { + let full_path = i18n_file.path(); + println!("-> {:?}", full_path.strip_prefix(&root_dir).unwrap()); + let f = fs::File::open(&full_path).expect("Failed opening file"); + let _loc: LocalizationFragment = match from_reader(f) { + Ok(v) => v, + Err(e) => { + panic!( + "Could not parse {} RON file, error: {}", + full_path.to_string_lossy(), + e + ); + }, + }; + } + } + } +} + +/// Test to verify all languages that they are VALID and loadable, without +/// need of git just on the local assets folder +/// `root_dir` - absolute path to main repo +/// `asset_path` - relative path to asset directory (right now it is +/// 'assets/voxygen/i18n') +pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) { + let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG); + let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron"); + assert!( + root_dir.join(&ref_i18n_dir_path).is_dir(), + "Reference language folder doesn't exist, something is wrong!" + ); + assert!( + root_dir.join(&ref_i18n_path).is_file(), + "Reference language manifest file doesn't exist, something is wrong!" + ); + let i18n_directories = i18n_directories(&root_dir.join(asset_path)); + // This simple check ONLY guarantees that an arbitrary minimum of translation + // files exists. It's just to notice unintentional deletion of all + // files, or modifying the paths. In case you want to delete all + // language you have to adjust this number: + assert!( + i18n_directories.len() > 5, + "have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \ + folder is empty?" + ); + for i18n_directory in i18n_directories { + // Attempt to load the manifest file + let manifest_path = i18n_directory.join(LANG_MANIFEST_FILE.to_string() + ".ron"); + println!( + "verifying {:?}", + manifest_path.strip_prefix(&root_dir).unwrap() + ); + let f = fs::File::open(&manifest_path).expect("Failed opening file"); + let raw_localization: RawLocalization = match from_reader(f) { + Ok(v) => v, + Err(e) => { + panic!( + "Could not parse {} RON file, error: {}", + i18n_directory.to_string_lossy(), + e + ); + }, + }; + // Walk through each files and try to load them + verify_localization_directory(root_dir, &i18n_directory); + // Walk through each subdirectories and try to load files in them + for sub_directory in raw_localization.sub_directories.iter() { + let subdir_path = &i18n_directory.join(sub_directory); + verify_localization_directory(root_dir, &subdir_path); + } + } +}