use common::{ assets, assets::{load_expect, load_glob, Asset}, }; use deunicode::deunicode; use ron::de::from_reader; use serde_derive::*; use std::{ collections::{HashMap, HashSet}, fs::File, io::BufReader, }; use tracing::warn; /// The reference language, aka the more up-to-date localization data. /// Also the default language at first startup. pub const REFERENCE_LANG: &str = "en"; /// How a language can be described #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct LanguageMetadata { /// A human friendly language name (e.g. "English (US)") pub language_name: String, /// A short text identifier for this language (e.g. "en_US") /// /// On the opposite of `language_name` that can change freely, /// `language_identifier` value shall be stable in time as it /// is used by setting components to store the language /// selected by the user. pub language_identifier: String, } /// Store font metadata #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Font { /// Key to retrieve the font in the asset system pub asset_key: String, /// Scale ratio to resize the UI text dynamicly pub scale_ratio: f32, } impl Font { /// Scale input size to final UI size pub fn scale(&self, value: u32) -> u32 { (value as f32 * self.scale_ratio).round() as u32 } } /// Store font metadata pub type VoxygenFonts = HashMap; /// Store internationalization data #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct VoxygenLocalization { /// A map storing the localized texts /// /// Localized content can be accessed using a String key. pub string_map: HashMap, /// A map for storing variations of localized texts, for example multiple /// ways of saying "Help, I'm under attack". Used primarily for npc /// dialogue. pub vector_map: HashMap>, /// Whether to convert the input text encoded in UTF-8 /// into a ASCII version by using the `deunicode` crate. pub convert_utf8_to_ascii: bool, /// Font configuration is stored here pub fonts: VoxygenFonts, pub metadata: LanguageMetadata, } impl VoxygenLocalization { /// Get a localized text from the given key /// /// If the key is not present in the localization object /// then the key is returned. pub fn get<'a>(&'a self, key: &'a str) -> &str { match self.string_map.get(key) { Some(localized_text) => localized_text, None => key, } } /// Get a variation of localized text from the given key /// /// `index` should be a random number from `0` to `u16::max()` /// /// If the key is not present in the localization object /// then the key is returned. pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> &str { match self.vector_map.get(key) { Some(v) if !v.is_empty() => &v[index as usize % v.len()], _ => key, } } /// Return the missing keys compared to the reference language pub fn list_missing_entries(&self) -> (HashSet, HashSet) { let reference_localization = load_expect::(i18n_asset_key(REFERENCE_LANG).as_ref()); let reference_string_keys: HashSet<_> = reference_localization.string_map.keys().cloned().collect(); let string_keys: HashSet<_> = self.string_map.keys().cloned().collect(); let strings = reference_string_keys .difference(&string_keys) .cloned() .collect(); let reference_vector_keys: HashSet<_> = reference_localization.vector_map.keys().cloned().collect(); let vector_keys: HashSet<_> = self.vector_map.keys().cloned().collect(); let vectors = reference_vector_keys .difference(&vector_keys) .cloned() .collect(); (strings, vectors) } /// Log missing entries (compared to the reference language) as warnings pub fn log_missing_entries(&self) { let (missing_strings, missing_vectors) = self.list_missing_entries(); for missing_key in missing_strings { warn!( "[{:?}] Missing string key {:?}", self.metadata.language_identifier, missing_key ); } for missing_key in missing_vectors { warn!( "[{:?}] Missing vector key {:?}", self.metadata.language_identifier, missing_key ); } } } impl Asset for VoxygenLocalization { const ENDINGS: &'static [&'static str] = &["ron"]; /// Load the translations located in the input buffer and convert them /// into a `VoxygenLocalization` object. #[allow(clippy::into_iter_on_ref)] // TODO: Pending review in #587 fn parse(buf_reader: BufReader) -> Result { let mut asked_localization: VoxygenLocalization = from_reader(buf_reader).map_err(assets::Error::parse_error)?; // Update the text if UTF-8 to ASCII conversion is enabled if asked_localization.convert_utf8_to_ascii { for value in asked_localization.string_map.values_mut() { *value = deunicode(value); } for value in asked_localization.vector_map.values_mut() { *value = value.into_iter().map(|s| deunicode(s)).collect(); } } asked_localization.metadata.language_name = deunicode(&asked_localization.metadata.language_name); Ok(asked_localization) } } /// Load all the available languages located in the Voxygen asset directory pub fn list_localizations() -> Vec { let voxygen_locales_assets = "voxygen.i18n.*"; let lang_list = load_glob::(voxygen_locales_assets).unwrap(); lang_list.iter().map(|e| (*e).metadata.clone()).collect() } /// Return the asset associated with the language_id pub fn i18n_asset_key(language_id: &str) -> String { "voxygen.i18n.".to_string() + language_id } #[cfg(test)] mod tests { use super::VoxygenLocalization; use git2::Repository; use ron::de::{from_bytes, from_reader}; use std::{ collections::{HashMap, HashSet}, fs, path::{Path, PathBuf}, }; /// List localization files as a PathBuf vector fn i18n_files(i18n_dir: &Path) -> Vec { fs::read_dir(i18n_dir) .unwrap() .map(|res| res.map(|e| e.path()).unwrap()) .filter(|e| match e.extension() { Some(ext) => ext == "ron", None => false, }) .collect() } #[derive(Debug, PartialEq)] enum LocalizationState { UpToDate, NotFound, Outdated, Unknown, Unused, } #[derive(Debug)] struct LocalizationEntryState { pub key_line: Option, pub chuck_line_range: Option<(usize, usize)>, pub commit_id: Option, pub state: LocalizationState, } impl LocalizationEntryState { pub fn new() -> LocalizationEntryState { LocalizationEntryState { key_line: None, chuck_line_range: None, commit_id: None, state: LocalizationState::Unknown, } } } /// Returns the Git blob associated with the given reference and path #[allow(clippy::expect_fun_call)] // TODO: Pending review in #587 fn read_file_from_path<'a>( repo: &'a git2::Repository, reference: &git2::Reference, path: &std::path::Path, ) -> git2::Blob<'a> { let tree = reference .peel_to_tree() .expect("Impossible to peel HEAD to a tree object"); tree.get_path(path) .expect(&format!( "Impossible to find the file {:?} in reference {:?}", path, reference.name() )) .to_object(&repo) .unwrap() .peel_to_blob() .expect("Impossible to fetch the Git object") } fn generate_key_version<'a>( repo: &'a git2::Repository, localization: &VoxygenLocalization, path: &std::path::Path, file_blob: &git2::Blob, ) -> HashMap { let mut keys: HashMap = localization .string_map .keys() .map(|k| (k.to_owned(), LocalizationEntryState::new())) .collect(); let mut to_process: HashSet<&String> = localization.string_map.keys().collect(); // Find key start lines let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); for (line_nb, line) in file_content.lines().enumerate() { let mut found_key = None; for key in to_process.iter() { if line.contains(key.as_str()) { found_key = Some(key.to_owned()); break; } } if let Some(key) = found_key { keys.get_mut(key).unwrap().key_line = Some(line_nb); to_process.remove(&key); }; } let mut error_check_set: Vec = vec![]; // Find commit for each keys repo.blame_file(path, None) .expect("Impossible to generate the Git blame") .iter() .for_each(|e: git2::BlameHunk| { for (key, state) in keys.iter_mut() { let line = match state.key_line { Some(l) => l, None => { if !error_check_set.contains(key) { eprintln!( "Key {} does not have a git line in it's state! Skipping key.", key ); error_check_set.push(key.clone()); } continue; }, }; if line >= e.final_start_line() && line < e.final_start_line() + e.lines_in_hunk() { state.chuck_line_range = Some(( e.final_start_line(), e.final_start_line() + e.lines_in_hunk(), )); state.commit_id = match state.commit_id { Some(existing_commit) => { match repo.graph_descendant_of(e.final_commit_id(), existing_commit) { Ok(true) => Some(e.final_commit_id()), Ok(false) => Some(existing_commit), Err(err) => panic!(err), } }, None => Some(e.final_commit_id()), }; } } }); keys } // Test to verify all languages that they are VALID and loadable, without // need of git just on the local assets folder #[test] fn verify_all_localizations() { // Generate paths let i18n_asset_path = Path::new("assets/voxygen/i18n/"); let en_i18n_path = i18n_asset_path.join("en.ron"); let root_dir = std::env::current_dir() .map(|p| p.parent().expect("").to_owned()) .unwrap(); assert!( root_dir.join(&en_i18n_path).is_file(), "en reference files doesn't exist, something is wrong!" ); let i18n_files = i18n_files(&root_dir.join(i18n_asset_path)); // This simple check ONLY guarantees that an arbitrary minimum of translation // files exists. It's just to notice unintentional deletion of all // files, or modifying the paths. In case you want to delete all // language you have to adjust this number: assert!( i18n_files.len() > 5, "have less than 5 translation files, arbitrary minimum check failed. Maybe the i18n \ folder is empty?" ); for path in i18n_files { let f = fs::File::open(&path).expect("Failed opening file"); let _: VoxygenLocalization = match from_reader(f) { Ok(v) => v, Err(e) => { panic!( "Could not parse {} RON file, error: {}", path.to_string_lossy(), e ); }, }; } } // Test to verify all languages and print missing and faulty localisation #[test] #[ignore] #[allow(clippy::expect_fun_call)] fn test_all_localizations() { // Generate paths let i18n_asset_path = Path::new("assets/voxygen/i18n/"); let en_i18n_path = i18n_asset_path.join("en.ron"); let root_dir = std::env::current_dir() .map(|p| p.parent().expect("").to_owned()) .unwrap(); let i18n_path = root_dir.join(i18n_asset_path); if !root_dir.join(&en_i18n_path).is_file() { panic!("Reference language file not found {:?}", &en_i18n_path) } // Initialize Git objects let repo = Repository::discover(&root_dir).expect(&format!( "Failed to open the Git repository at {:?}", &root_dir )); let head_ref = repo.head().expect("Impossible to get the HEAD reference"); // Read HEAD for the reference language file let i18n_en_blob = read_file_from_path(&repo, &head_ref, &en_i18n_path); let loc: VoxygenLocalization = from_bytes(i18n_en_blob.content()) .expect("Expect to parse reference i18n RON file, can't proceed without it"); let i18n_references: HashMap = generate_key_version(&repo, &loc, &en_i18n_path, &i18n_en_blob); // Compare to other reference files let i18n_files = i18n_files(&i18n_path); let mut i18n_entry_counts: HashMap = HashMap::new(); for file in &i18n_files { let relfile = file.strip_prefix(&root_dir).unwrap(); if relfile == en_i18n_path { continue; } println!("\n-----------------------------------"); println!("{:?}", relfile); println!("-----------------------------------"); // Find the localization entry state let current_blob = read_file_from_path(&repo, &head_ref, &relfile); let current_loc: VoxygenLocalization = match from_bytes(current_blob.content()) { Ok(v) => v, Err(e) => { eprintln!( "Could not parse {} RON file, skipping: {}", relfile.to_string_lossy(), e ); continue; }, }; let mut current_i18n = generate_key_version(&repo, ¤t_loc, &relfile, ¤t_blob); for (ref_key, ref_state) in i18n_references.iter() { match current_i18n.get_mut(ref_key) { Some(state) => { let commit_id = match state.commit_id { Some(c) => c, None => { eprintln!( "Commit ID of key {} in i18n file {} is missing! Skipping key.", ref_key, relfile.to_string_lossy() ); continue; }, }; let ref_commit_id = match ref_state.commit_id { Some(c) => c, None => { eprintln!( "Commit ID of key {} in reference i18n file is missing! \ Skipping key.", ref_key ); continue; }, }; if commit_id != ref_commit_id && !repo .graph_descendant_of(commit_id, ref_commit_id) .unwrap_or(false) { state.state = LocalizationState::Outdated; } else { state.state = LocalizationState::UpToDate; } }, None => { current_i18n.insert(ref_key.to_owned(), LocalizationEntryState { key_line: None, chuck_line_range: None, commit_id: None, state: LocalizationState::NotFound, }); }, } } let ref_keys: HashSet<&String> = i18n_references.keys().collect(); for (_, state) in current_i18n .iter_mut() .filter(|&(k, _)| !ref_keys.contains(k)) { state.state = LocalizationState::Unused; } // Display println!( "\n{:10} | {:60}| {:40} | {:40}\n", "State", "Key name", relfile.to_str().unwrap(), en_i18n_path.to_str().unwrap() ); let mut sorted_keys: Vec<&String> = current_i18n.keys().collect(); sorted_keys.sort(); let current_i18n_entry_count = current_i18n.len(); let mut uptodate_entries = 0; let mut outdated_entries = 0; let mut unused_entries = 0; let mut notfound_entries = 0; let mut unknown_entries = 0; for key in sorted_keys { let state = current_i18n.get(key).unwrap(); if state.state != LocalizationState::UpToDate { match state.state { LocalizationState::Outdated => outdated_entries += 1, LocalizationState::NotFound => notfound_entries += 1, LocalizationState::Unknown => unknown_entries += 1, LocalizationState::Unused => unused_entries += 1, LocalizationState::UpToDate => unreachable!(), }; println!( "[{:9}] | {:60}| {:40} | {:40}", format!("{:?}", state.state), key, state .commit_id .map(|s| format!("{}", s)) .unwrap_or_else(|| "None".to_string()), i18n_references .get(key) .map(|s| s.commit_id) .flatten() .map(|s| format!("{}", s)) .unwrap_or_else(|| "None".to_string()), ); } else { uptodate_entries += 1; } } println!( "\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries", uptodate_entries, outdated_entries, unused_entries, notfound_entries, unknown_entries ); // Calculate key count that actually matter for the status of the translation // Unused entries don't break the game let real_entry_count = current_i18n_entry_count - unused_entries; let uptodate_percent = (uptodate_entries as f32 / real_entry_count as f32) * 100_f32; let outdated_percent = (outdated_entries as f32 / real_entry_count as f32) * 100_f32; let untranslated_percent = ((notfound_entries + unknown_entries) as f32 / real_entry_count as f32) * 100_f32; println!( "{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n", uptodate_percent, outdated_percent, untranslated_percent, ); i18n_entry_counts.insert( file.clone(), ( uptodate_entries, outdated_entries, notfound_entries + unknown_entries, real_entry_count, ), ); } let mut overall_uptodate_entry_count = 0; let mut overall_outdated_entry_count = 0; let mut overall_untranslated_entry_count = 0; let mut overall_real_entry_count = 0; println!("-----------------------------------------------------------------------------"); println!("Overall Translation Status"); println!("-----------------------------------------------------------------------------"); println!( "{:12}| {:8} | {:8} | {:8}", "", "up-to-date", "outdated", "untranslated" ); for (path, (uptodate, outdated, untranslated, real)) in i18n_entry_counts { overall_uptodate_entry_count += uptodate; overall_outdated_entry_count += outdated; overall_untranslated_entry_count += untranslated; overall_real_entry_count += real; println!( "{:12}|{:8} |{:6} |{:8}", path.file_name().unwrap().to_string_lossy(), uptodate, outdated, untranslated ); } println!( "\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated", (overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32, (overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32, (overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32, ); println!("-----------------------------------------------------------------------------\n"); } }