diff --git a/Cargo.lock b/Cargo.lock index 7a0288bc1f..8fd9ef793a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1902,6 +1902,47 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fluent" +version = "0.16.0" +source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.15.2" +source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4ad0989667548f06ccd0e306ed56b61bd4d35458d54df5ec7587c0e8ed5e94" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.11.0" +source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473" +dependencies = [ + "thiserror", +] + [[package]] name = "fnv" version = "1.0.7" @@ -2315,19 +2356,6 @@ version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" -[[package]] -name = "git2" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3826a6e0e2215d7a41c2bfc7c9244123969273f3476b939a226aac0ab56e9e3c" -dependencies = [ - "bitflags", - "libc", - "libgit2-sys", - "log", - "url", -] - [[package]] name = "glam" version = "0.10.2" @@ -2812,6 +2840,25 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "intl-memoizer" +version = "0.5.1" +source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b18f988384267d7066cc2be425e6faf352900652c046b6971d2e228d3b1c5ecf" +dependencies = [ + "tinystr", + "unic-langid", +] + [[package]] name = "io-kit-sys" version = "0.1.0" @@ -3006,18 +3053,6 @@ version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" -[[package]] -name = "libgit2-sys" -version = "0.13.2+1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a42de9a51a5c12e00fc0e4ca6bc2ea43582fc6418488e8f615e905d886f258b" -dependencies = [ - "cc", - "libc", - "libz-sys", - "pkg-config", -] - [[package]] name = "libloading" version = "0.6.7" @@ -3080,18 +3115,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "libz-sys" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f35facd4a5673cb5a48822be2be1d4236c1c99cb4113cab7061ac720d5bf859" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "linked-hash-map" version = "0.5.4" @@ -5262,6 +5285,12 @@ dependencies = [ "libc", ] +[[package]] +name = "self_cell" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ef965a420fe14fdac7dd018862966a4c14094f900e1650bbc71ddd7d580c8af" + [[package]] name = "semver" version = "0.9.0" @@ -5292,6 +5321,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-tuple-vec-map" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a04d0ebe0de77d7d445bb729a895dcb0a288854b267ca85f030ce51cdc578c82" +dependencies = [ + "serde", +] + [[package]] name = "serde_bytes" version = "0.11.5" @@ -5977,6 +6015,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1" + [[package]] name = "tinytemplate" version = "1.2.1" @@ -6255,6 +6299,15 @@ dependencies = [ "nom 5.1.2", ] +[[package]] +name = "type-map" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb68604048ff8fa93347f02441e4487594adc20bb8a084f9e564d2b827a0a9f" +dependencies = [ + "rustc-hash", +] + [[package]] name = "typenum" version = "1.15.0" @@ -6270,6 +6323,24 @@ dependencies = [ "version_check 0.9.4", ] +[[package]] +name = "unic-langid" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73328fcd730a030bdb19ddf23e192187a6b01cd98be6d3140622a89129459ce5" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a4a8eeaf0494862c1404c95ec2f4c33a2acff5076f64314b465e3ddae1b934d" +dependencies = [ + "tinystr", +] + [[package]] name = "unicode-bidi" version = "0.3.7" @@ -6856,15 +6927,18 @@ dependencies = [ [[package]] name = "veloren-voxygen-i18n" -version = "0.10.0" +version = "0.13.0" dependencies = [ - "clap 3.1.10", "deunicode", - "git2", + "fluent", + "fluent-bundle", "hashbrown 0.12.0", + "intl-memoizer", "ron 0.7.0", "serde", + "serde-tuple-vec-map", "tracing", + "unic-langid", "veloren-common-assets", ] diff --git a/common/assets/src/lib.rs b/common/assets/src/lib.rs index 3c24f23e44..e1bf8b1c29 100644 --- a/common/assets/src/lib.rs +++ b/common/assets/src/lib.rs @@ -16,6 +16,8 @@ pub use assets_manager::{ }; mod fs; +mod walk; +pub use walk::*; lazy_static! { /// The HashMap where all loaded assets are stored in. diff --git a/common/assets/src/walk.rs b/common/assets/src/walk.rs new file mode 100644 index 0000000000..976e325f85 --- /dev/null +++ b/common/assets/src/walk.rs @@ -0,0 +1,39 @@ +use std::{ + io, + path::{Path, PathBuf}, +}; + +/// Read `walk_tree` +#[derive(Debug)] +pub enum Walk { + File(PathBuf), + Dir { path: PathBuf, content: Vec }, +} + +/// Utility function to build a tree of directory, recursively +/// +/// At first iteration, use path to your directory as dir and root +pub fn walk_tree(dir: &Path, root: &Path) -> io::Result> { + let mut buff = Vec::new(); + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + buff.push(Walk::Dir { + path: path + .strip_prefix(root) + .expect("strip can't fail, this path is created from root") + .to_owned(), + content: walk_tree(&path, root)?, + }); + } else { + let filename = path + .strip_prefix(root) + .expect("strip can't fail, this file is created from root") + .to_owned(); + buff.push(Walk::File(filename)); + } + } + + Ok(buff) +} diff --git a/common/src/bin/asset_migrate.rs b/common/src/bin/asset_migrate.rs index a881f431c0..e05e4069ff 100644 --- a/common/src/bin/asset_migrate.rs +++ b/common/src/bin/asset_migrate.rs @@ -1,9 +1,6 @@ +use common_assets::{walk_tree, Walk}; use serde::{de::DeserializeOwned, Serialize}; -use std::{ - fs, io, - io::Write, - path::{Path, PathBuf}, -}; +use std::{fs, io, io::Write, path::Path}; // If you want to migrate assets. // 1) Copy-paste old asset type to own module @@ -19,37 +16,6 @@ mod new { pub type Config = (); } -#[derive(Debug)] -enum Walk { - File(PathBuf), - Dir { path: PathBuf, content: Vec }, -} - -fn walk_tree(dir: &Path, root: &Path) -> io::Result> { - let mut buff = Vec::new(); - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - if path.is_dir() { - buff.push(Walk::Dir { - path: path - .strip_prefix(root) - .expect("strip can't fail, this path is created from root") - .to_owned(), - content: walk_tree(&path, root)?, - }); - } else { - let filename = path - .strip_prefix(root) - .expect("strip can't fail, this file is created from root") - .to_owned(); - buff.push(Walk::File(filename)); - } - } - - Ok(buff) -} - fn walk_with_migrate(tree: Walk, from: &Path, to: &Path) -> io::Result<()> where NewV: From, diff --git a/voxygen/i18n/Cargo.toml b/voxygen/i18n/Cargo.toml index 6734954e28..187b906e65 100644 --- a/voxygen/i18n/Cargo.toml +++ b/voxygen/i18n/Cargo.toml @@ -1,30 +1,32 @@ [package] -authors = ["juliancoffee ", "Rémy Phelipot"] +authors = ["juliancoffee "] edition = "2021" name = "veloren-voxygen-i18n" description = "Crate for internalization and diagnostic of existing localizations." -version = "0.10.0" - -[[bin]] -name = "i18n-check" -required-features = ["bin"] +version = "0.13.0" [dependencies] # Assets -hashbrown = { version = "0.12", features = ["serde", "nightly"] } common-assets = {package = "veloren-common-assets", path = "../../common/assets"} -deunicode = "1.0" -serde = { version = "1.0", features = ["derive"] } -tracing = "0.1" -# Diagnostic ron = "0.7" -git2 = { version = "0.14", default-features = false, optional = true } +serde = { version = "1.0", features = ["derive"] } +# Localization +unic-langid = { version = "0.9"} +intl-memoizer = { git = "https://github.com/juliancoffee/fluent-rs.git"} +fluent = { git = "https://github.com/juliancoffee/fluent-rs.git"} +fluent-bundle = { git = "https://github.com/juliancoffee/fluent-rs.git"} +# Utility +hashbrown = { version = "0.12", features = ["serde", "nightly"] } +deunicode = "1.0" +tracing = "0.1" +# Bin +serde-tuple-vec-map = "1.0" -# Binary -clap = { version = "3.1.8", features = ["suggestions", "std"], default-features = false, optional = true } - -[dev-dependencies] -git2 = { version = "0.14", default-features = false } +# FIXME: remove before merge +[[bin]] +name = "i18n-migrate" +required-features = ["i18n-migrate"] +path = "src/bin/migrate.rs" [features] -bin = ["git2", "clap"] +i18n-migrate = [] diff --git a/voxygen/i18n/README.md b/voxygen/i18n/README.md deleted file mode 100644 index 4353f88c95..0000000000 --- a/voxygen/i18n/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Usage -`$ cargo run --features=bin -- --help`
-(Or if somewhere else in the repo)
-`$ cargo run -p veloren-i18n --features=bin -- --help`
-For example, diagnostic for specific language
-`$ cargo run -p veloren-i18n --features=bin -- `
diff --git a/voxygen/i18n/src/analysis.rs b/voxygen/i18n/src/analysis.rs deleted file mode 100644 index 9356166d74..0000000000 --- a/voxygen/i18n/src/analysis.rs +++ /dev/null @@ -1,247 +0,0 @@ -use crate::{ - gitfragments::{ - read_file_from_path, transform_fragment, LocalizationEntryState, LocalizationState, - }, - path::{BasePath, LangPath}, - raw::{self, RawFragment, RawLanguage}, - stats::{ - print_csv_stats, print_overall_stats, print_translation_stats, LocalizationAnalysis, - LocalizationStats, - }, - REFERENCE_LANG, -}; -use hashbrown::{hash_map::Entry, HashMap}; -use ron::de::from_bytes; - -/// Fill the entry State base information (except `state`) for a complete -/// language -fn gather_entry_state<'a>( - repo: &'a git2::Repository, - head_ref: &git2::Reference, - path: &LangPath, -) -> RawLanguage { - println!("-> {:?}", path.language_identifier()); - // load standard manifest - let manifest = raw::load_manifest(path).expect("failed to load language manifest"); - // transform language into LocalizationEntryState - let mut fragments = HashMap::new(); - - // For each file in directory - let files = path - .fragments() - .expect("failed to get all files in language"); - for sub_path in files { - let fullpath = path.sub_path(&sub_path); - let gitpath = fullpath.strip_prefix(path.base().root_path()).unwrap(); - println!(" -> {:?}", &sub_path); - let i18n_blob = read_file_from_path(repo, head_ref, gitpath); - let fragment: RawFragment = from_bytes(i18n_blob.content()) - .unwrap_or_else(|e| panic!("Could not parse {:?} RON file, error: {}", sub_path, e)); - let frag = transform_fragment(repo, (gitpath, fragment), &i18n_blob); - fragments.insert(sub_path, frag); - } - - RawLanguage:: { - manifest, - fragments, - } -} - -/// fills in the `state` -fn compare_lang_with_reference( - current_i18n: &mut RawLanguage, - i18n_references: &RawLanguage, - repo: &git2::Repository, -) { - // git graph descendant of is slow, so we cache it - let mut graph_descendant_of_cache = HashMap::new(); - - let mut cached_graph_descendant_of = |commit, ancestor| -> bool { - let key = (commit, ancestor); - match graph_descendant_of_cache.entry(key) { - Entry::Occupied(entry) => { - return *entry.get(); - }, - Entry::Vacant(entry) => { - let value = repo.graph_descendant_of(commit, ancestor).unwrap_or(false); - *entry.insert(value) - }, - } - }; - - const MISSING: LocalizationEntryState = LocalizationEntryState { - key_line: None, - chuck_line_range: None, - commit_id: None, - state: Some(LocalizationState::NotFound), - }; - - // match files - for (ref_path, ref_fragment) in i18n_references.fragments.iter() { - let cur_fragment = match current_i18n.fragments.get_mut(ref_path) { - Some(c) => c, - None => { - eprintln!( - "language {} is missing file: {:?}", - current_i18n.manifest.metadata.language_identifier, ref_path - ); - // add all keys as missing - let mut string_map = HashMap::new(); - for (ref_key, _) in ref_fragment.string_map.iter() { - string_map.insert(ref_key.to_owned(), MISSING.clone()); - } - current_i18n - .fragments - .insert(ref_path.to_owned(), RawFragment { - string_map, - vector_map: HashMap::new(), - }); - continue; - }, - }; - - for (ref_key, ref_state) in ref_fragment.string_map.iter() { - match cur_fragment.string_map.get_mut(ref_key) { - Some(state) => { - let commit_id = match state.commit_id { - Some(c) => c, - None => { - eprintln!( - "Commit ID of key {} in i18n file {} is missing! Skipping key.", - ref_key, - ref_path.to_string_lossy() - ); - continue; - }, - }; - let ref_commit_id = match ref_state.commit_id { - Some(c) => c, - None => { - eprintln!( - "Commit ID of key {} in reference i18n file is missing! Skipping \ - key.", - ref_key - ); - continue; - }, - }; - if commit_id != ref_commit_id - && !cached_graph_descendant_of(commit_id, ref_commit_id) - { - state.state = Some(LocalizationState::Outdated); - } else { - state.state = Some(LocalizationState::UpToDate); - } - }, - None => { - cur_fragment - .string_map - .insert(ref_key.to_owned(), MISSING.clone()); - }, - } - } - - for (_, state) in cur_fragment - .string_map - .iter_mut() - .filter(|&(k, _)| ref_fragment.string_map.get(k).is_none()) - { - state.state = Some(LocalizationState::Unused); - } - } -} - -fn gather_results( - current_i18n: &RawLanguage, -) -> (LocalizationAnalysis, LocalizationStats) { - let mut state_map = - LocalizationAnalysis::new(¤t_i18n.manifest.metadata.language_identifier); - let mut stats = LocalizationStats::default(); - - for (file, fragments) in ¤t_i18n.fragments { - for (key, entry) in &fragments.string_map { - match entry.state { - Some(LocalizationState::Outdated) => stats.outdated_entries += 1, - Some(LocalizationState::NotFound) => stats.notfound_entries += 1, - None => stats.errors += 1, - Some(LocalizationState::Unused) => stats.unused_entries += 1, - Some(LocalizationState::UpToDate) => stats.uptodate_entries += 1, - }; - let state_keys = state_map.data.get_mut(&entry.state).expect("prefiled"); - state_keys.push((file.clone(), key.to_owned(), entry.commit_id)); - } - } - - for (_, entries) in state_map.data.iter_mut() { - entries.sort(); - } - - (state_map, stats) -} - -/// Test one language -/// - `code`: name of the directory in assets (de_DE for example) -/// - `path`: path to repo -/// - `be_verbose`: print extra info -/// - `csv_enabled`: generate csv files in target folder -pub fn test_specific_localizations( - path: &BasePath, - language_identifiers: &[&str], - be_verbose: bool, - csv_enabled: bool, -) { - //complete analysis - let mut analysis = HashMap::new(); - // Initialize Git objects - let repo = git2::Repository::discover(path.root_path()) - .unwrap_or_else(|_| panic!("Failed to open the Git repository {:?}", path.root_path())); - let head_ref = repo.head().expect("Impossible to get the HEAD reference"); - - // Read Reference Language - let ref_language = gather_entry_state(&repo, &head_ref, &path.i18n_path(REFERENCE_LANG)); - for &language_identifier in language_identifiers { - let mut cur_language = - gather_entry_state(&repo, &head_ref, &path.i18n_path(language_identifier)); - compare_lang_with_reference(&mut cur_language, &ref_language, &repo); - let (state_map, stats) = gather_results(&cur_language); - analysis.insert(language_identifier.to_owned(), (state_map, stats)); - } - - let output = path.root_path().join("translation_analysis.csv"); - let mut f = std::fs::File::create(output).expect("couldn't write csv file"); - - use std::io::Write; - writeln!( - f, - "country_code,file_name,translation_key,status,git_commit" - ) - .unwrap(); - //printing - for (language_identifier, (state_map, stats)) in &analysis { - if csv_enabled { - print_csv_stats(state_map, &mut f); - } else { - print_translation_stats( - language_identifier, - &ref_language, - stats, - state_map, - be_verbose, - ); - } - } - if analysis.len() > 1 { - print_overall_stats(analysis); - } -} - -/// Test all localizations -pub fn test_all_localizations(path: &BasePath, be_verbose: bool, csv_enabled: bool) { - // Compare to other reference files - let languages = path.i18n_directories(); - let language_identifiers = languages - .iter() - .map(|s| s.language_identifier()) - .collect::>(); - test_specific_localizations(path, &language_identifiers, be_verbose, csv_enabled); -} diff --git a/voxygen/i18n/src/bin/i18n-check.rs b/voxygen/i18n/src/bin/i18n-check.rs deleted file mode 100644 index 4154c94011..0000000000 --- a/voxygen/i18n/src/bin/i18n-check.rs +++ /dev/null @@ -1,48 +0,0 @@ -use clap::{Arg, Command}; -use veloren_voxygen_i18n::{analysis, verification, BasePath}; - -fn main() { - let matches = Command::new("i18n-check") - .version("0.1.0") - .author("juliancoffee ") - .about("Test veloren localizations") - .arg( - Arg::new("CODE") - .required(false) - .help("Run diagnostic for specific language code (de_DE as example)"), - ) - .arg( - Arg::new("verify") - .long("verify") - .help("verify all localizations"), - ) - .arg(Arg::new("test").long("test").help("test all localizations")) - .arg( - Arg::new("verbose") - .short('v') - .long("verbose") - .help("print additional information"), - ) - .arg( - Arg::new("csv") - .long("csv") - .help("generate csv files per language in target folder"), - ) - .get_matches(); - - // Generate paths - let root_path = common_assets::find_root().expect("Failed to find root of repository"); - let path = BasePath::new(&root_path); - let be_verbose = matches.is_present("verbose"); - let csv_enabled = matches.is_present("csv"); - - if let Some(code) = matches.value_of("CODE") { - analysis::test_specific_localizations(&path, &[code], be_verbose, csv_enabled); - } - if matches.is_present("test") { - analysis::test_all_localizations(&path, be_verbose, csv_enabled); - } - if matches.is_present("verify") { - verification::verify_all_localizations(&path); - } -} diff --git a/voxygen/i18n/src/bin/migrate.rs b/voxygen/i18n/src/bin/migrate.rs new file mode 100644 index 0000000000..a68b46a498 --- /dev/null +++ b/voxygen/i18n/src/bin/migrate.rs @@ -0,0 +1,232 @@ +use std::{ffi::OsStr, fs, io::Write, path::Path}; + +use serde::Deserialize; + +use common_assets::{walk_tree, Walk}; + +/// Structure representing file for old .ron format +#[derive(Deserialize)] +struct RawFragment { + #[serde(with = "tuple_vec_map")] + string_map: Vec<(String, String)>, + #[serde(with = "tuple_vec_map")] + vector_map: Vec<(String, Vec)>, +} + +impl RawFragment { + fn read(path: &Path) -> Self { + let source = fs::File::open(path).unwrap(); + ron::de::from_reader(source).unwrap() + } +} + +/// Message value, may contain interpolated variables +struct Pattern { + view: String, +} + +impl Pattern { + fn expand(self) -> String { + let mut buff = String::new(); + if self.view.contains('\n') { + let mut first = true; + for line in self.view.lines() { + if line.is_empty() && first { + // fluent ignores space characters at the beginning + // so we need to encode \n explicitly + buff.push_str(r#"{"\u000A"}"#); + } else { + buff.push_str("\n "); + } + if first { + first = false; + } + buff.push_str(line); + } + } else { + buff.push_str(" "); + buff.push_str(&self.view); + } + + buff + } +} + +/// Fluent entry +struct Message { + value: Option, + attributes: Vec<(String, Pattern)>, +} + +impl Message { + fn stringify(self) -> String { + let mut buff = String::new(); + // append equal sign + buff.push_str(" ="); + // display value if any + if let Some(value) = self.value { + buff.push_str(&value.expand()); + } + // add attributes + for (attr_name, attr) in self.attributes { + // new line and append tab + buff.push_str("\n "); + // print attrname + buff.push('.'); + buff.push_str(&attr_name); + // equal sign + buff.push_str(" ="); + // display attr + buff.push_str(&attr.expand()); + } + + buff + } +} + +/// Structure representing file for new .ftl format +struct Source { + entries: Vec<(String, Message)>, +} + +impl Source { + fn write(self, path: &Path) { + let mut source = fs::File::create(path).unwrap(); + let mut first = true; + for (key, msg) in self.entries { + if !first { + source.write_all(b"\n").unwrap(); + } else { + first = false; + } + source.write_all(key.as_bytes()).unwrap(); + source.write_all(msg.stringify().as_bytes()).unwrap(); + } + } +} + +// Convert old i18n string to new fluent format +fn to_pattern(old: String) -> Pattern { + let mut buff = String::new(); + + let mut in_capture = false; + let mut need_sign = false; + + for ch in old.chars() { + if ch == '{' { + if !in_capture { + in_capture = true; + } else { + panic!("double {{"); + } + need_sign = true; + + buff.push(ch); + buff.push(' '); + } else if ch == '}' { + if in_capture { + in_capture = false; + } else { + panic!("}} without opening {{"); + } + + buff.push(' '); + buff.push(ch); + } else { + if need_sign { + buff.push('$'); + need_sign = false; + } + if ch == '.' && in_capture { + buff.push('-') + } else { + buff.push(ch) + } + } + } + + Pattern { view: buff } +} + +fn to_attributes(old: Vec) -> Message { + let mut attributes = Vec::new(); + for (i, string) in old.iter().enumerate() { + let attr_name = format!("a{i}"); + let attr = to_pattern(string.to_owned()); + attributes.push((attr_name, attr)) + } + + Message { + value: None, + attributes, + } +} + +fn convert(old: RawFragment) -> Source { + let mut entries = Vec::new(); + let mut cache = Vec::new(); + for (key, string) in old.string_map.into_iter() { + if cache.contains(&key) { + continue; + } else { + cache.push(key.clone()); + } + // common.weapon.tool -> common-weapon-tool + let key = key.replace('.', "-").to_owned(); + let msg = Message { + value: Some(to_pattern(string.to_owned())), + attributes: Vec::new(), + }; + entries.push((key, msg)) + } + + for (key, variation) in old.vector_map.into_iter() { + if cache.contains(&key) { + continue; + } else { + cache.push(key.clone()); + } + // common.weapon.tool -> common-weapon-tool + let key = key.replace('.', "-").to_owned(); + let msg = to_attributes(variation); + entries.push((key, msg)) + } + + Source { entries } +} + +fn migrate(tree: Walk, from: &Path, to: &Path) { + match tree { + Walk::Dir { path, content } => { + println!("{:?}", path); + let target_dir = to.join(path); + fs::create_dir(target_dir).unwrap(); + for entry in content { + migrate(entry, from, to); + } + }, + Walk::File(path) => { + if path.file_name() == Some(OsStr::new("_manifest.ron")) + || path.file_name() == Some(OsStr::new("README.md")) + { + fs::copy(from.join(&path), to.join(path)).unwrap(); + } else { + let old = RawFragment::read(&from.join(&path)); + let new = convert(old); + new.write(&to.join(path).with_extension("ftl")); + } + }, + } +} + +fn main() { + // it assumes that you have old i18n files in i18n-ron directory + let old_path = Path::new("assets/voxygen/i18n-ron"); + let new_path = Path::new("assets/voxygen/i18n"); + let tree = walk_tree(&old_path, &old_path).unwrap(); + let tree = Walk::Dir { + path: Path::new("").to_owned(), + content: tree, + }; + migrate(tree, &old_path, &new_path); +} diff --git a/voxygen/i18n/src/gitfragments.rs b/voxygen/i18n/src/gitfragments.rs deleted file mode 100644 index 6ca48dce81..0000000000 --- a/voxygen/i18n/src/gitfragments.rs +++ /dev/null @@ -1,157 +0,0 @@ -//! fragment attached with git versioning information -use crate::raw::RawFragment; -use hashbrown::HashMap; -use std::path::Path; - -#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)] -pub(crate) enum LocalizationState { - UpToDate, - NotFound, - Outdated, - Unused, -} - -pub(crate) const ALL_LOCALIZATION_STATES: [Option; 5] = [ - Some(LocalizationState::UpToDate), - Some(LocalizationState::NotFound), - Some(LocalizationState::Outdated), - Some(LocalizationState::Unused), - None, -]; - -#[derive(Clone, Debug)] -pub(crate) struct LocalizationEntryState { - pub(crate) key_line: Option, - pub(crate) chuck_line_range: Option<(usize, usize)>, - pub(crate) commit_id: Option, - pub(crate) state: Option, -} - -impl LocalizationState { - pub(crate) fn print(this: &Option) -> String { - match this { - Some(LocalizationState::UpToDate) => "UpToDate", - Some(LocalizationState::NotFound) => "NotFound", - Some(LocalizationState::Outdated) => "Outdated", - Some(LocalizationState::Unused) => "Unused", - None => "Unknown", - } - .to_owned() - } -} - -impl LocalizationEntryState { - fn new(key_line: Option) -> LocalizationEntryState { - LocalizationEntryState { - key_line, - chuck_line_range: None, - commit_id: None, - state: None, - } - } -} - -/// Returns the Git blob associated with the given reference and path -pub(crate) fn read_file_from_path<'a>( - repo: &'a git2::Repository, - reference: &git2::Reference, - path: &Path, -) -> git2::Blob<'a> { - let tree = reference - .peel_to_tree() - .expect("Impossible to peel HEAD to a tree object"); - tree.get_path(path) - .unwrap_or_else(|_| { - panic!( - "Impossible to find the file {:?} in reference {:?}", - path, - reference.name() - ) - }) - .to_object(repo) - .unwrap() - .peel_to_blob() - .expect("Impossible to fetch the Git object") -} - -/// Extend a Fragment with historical git data -/// The actual translation gets dropped -/// TODO: transform vector_map too -pub(crate) fn transform_fragment<'a>( - repo: &'a git2::Repository, - fragment: (&Path, RawFragment), - file_blob: &git2::Blob, -) -> RawFragment { - let (path, fragment) = fragment; - // Find key start lines by searching all lines which have `:` in them (as they - // are probably keys) and getting the first part of such line trimming - // whitespace and quotes. Quite buggy heuristic - let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); - // we only need the key part of the file to process - let file_content_keys = file_content.lines().enumerate().filter_map(|(no, line)| { - line.split_once(':').map(|(key, _)| { - let mut key = key.trim().chars(); - key.next(); - key.next_back(); - (no, key.as_str()) - }) - }); - //speed up the search by sorting all keys! - let mut file_content_keys_sorted = file_content_keys.into_iter().collect::>(); - file_content_keys_sorted.sort_by_key(|(_, key)| *key); - - let mut result = RawFragment:: { - string_map: HashMap::new(), - vector_map: HashMap::new(), - }; - - for (original_key, _) in fragment.string_map { - let line_nb = file_content_keys_sorted - .binary_search_by_key(&original_key.as_str(), |(_, key)| *key) - .map_or_else( - |_| { - eprintln!( - "Key {} does not have a git line in it's state!", - original_key - ); - None - }, - |id| Some(file_content_keys_sorted[id].0), - ); - - result - .string_map - .insert(original_key, LocalizationEntryState::new(line_nb)); - } - - // Find commit for each keys, THIS PART IS SLOW (2s/4s) - for e in repo - .blame_file(path, None) - .expect("Impossible to generate the Git blame") - .iter() - { - for (_, state) in result.string_map.iter_mut() { - if let Some(line) = state.key_line { - let range = ( - e.final_start_line(), - e.final_start_line() + e.lines_in_hunk(), - ); - if line + 1 >= range.0 && line + 1 < range.1 { - state.chuck_line_range = Some(range); - state.commit_id = state.commit_id.map_or_else( - || Some(e.final_commit_id()), - |existing_commit| match repo - .graph_descendant_of(e.final_commit_id(), existing_commit) - { - Ok(true) => Some(e.final_commit_id()), - Ok(false) => Some(existing_commit), - Err(err) => panic!("{}", err), - }, - ); - } - } - } - } - - result -} diff --git a/voxygen/i18n/src/lib.rs b/voxygen/i18n/src/lib.rs index acf72606ed..2714103ffd 100644 --- a/voxygen/i18n/src/lib.rs +++ b/voxygen/i18n/src/lib.rs @@ -1,22 +1,21 @@ -#[cfg(any(feature = "bin", test))] -pub mod analysis; -#[cfg(any(feature = "bin", test))] -mod gitfragments; -mod path; mod raw; -#[cfg(any(feature = "bin", test))] pub mod stats; -pub mod verification; -//reexport -pub use path::BasePath; +use fluent_bundle::{bundle::FluentBundle, FluentResource}; +use intl_memoizer::concurrent::IntlLangMemoizer; +use unic_langid::LanguageIdentifier; -use crate::path::{LANG_EXTENSION, LANG_MANIFEST_FILE}; -use common_assets::{self, source::DirEntry, AssetExt, AssetGuard, AssetHandle, ReloadWatcher}; -use hashbrown::{HashMap, HashSet}; -use raw::{RawFragment, RawLanguage, RawManifest}; +use hashbrown::HashMap; use serde::{Deserialize, Serialize}; -use std::{io, path::PathBuf}; +use std::{borrow::Cow, io}; + +use assets::{source::DirEntry, AssetExt, AssetGuard, AssetHandle, ReloadWatcher}; use tracing::warn; +// Re-export because I don't like prefix +use common_assets as assets; + +// Re-export for argument creation +pub use fluent::fluent_args; +pub use fluent_bundle::FluentArgs; /// The reference language, aka the more up-to-date localization data. /// Also the default language at first startup. @@ -49,6 +48,7 @@ pub struct Font { impl Font { /// Scale input size to final UI size + #[must_use] pub fn scale(&self, value: u32) -> u32 { (value as f32 * self.scale_ratio).round() as u32 } } @@ -56,21 +56,14 @@ impl Font { pub type Fonts = HashMap; /// Store internationalization data -#[derive(Debug, PartialEq, Serialize, Deserialize)] struct Language { - /// A map storing the localized texts - /// - /// Localized content can be accessed using a String key. - pub(crate) string_map: HashMap, - - /// A map for storing variations of localized texts, for example multiple - /// ways of saying "Help, I'm under attack". Used primarily for npc - /// dialogue. - pub(crate) vector_map: HashMap>, - + /// The bundle storing all localized texts + pub(crate) bundle: FluentBundle, /// Whether to convert the input text encoded in UTF-8 /// into a ASCII version by using the `deunicode` crate. - pub(crate) convert_utf8_to_ascii: bool, + // FIXME (i18n convert_utf8_to_ascii): + #[allow(dead_code)] + convert_utf8_to_ascii: bool, /// Font configuration is stored here pub(crate) fonts: Fonts, @@ -79,68 +72,172 @@ struct Language { } impl Language { - /// Get a localized text from the given key - pub fn get(&self, key: &str) -> Option<&str> { self.string_map.get(key).map(String::as_str) } + fn try_msg<'a>(&'a self, key: &str, args: Option<&'a FluentArgs>) -> Option> { + let bundle = &self.bundle; + let msg = bundle.get_message(key)?; + let mut errs = Vec::new(); + let msg = bundle.format_pattern(msg.value()?, args, &mut errs); + for err in errs { + eprintln!("err: {err} for {key}"); + } - /// Get a variation of localized text from the given key - /// - /// `index` should be a random number from `0` to `u16::max()` - pub fn get_variation(&self, key: &str, index: u16) -> Option<&str> { - self.vector_map.get(key).and_then(|v| { - if v.is_empty() { - None - } else { - Some(v[index as usize % v.len()].as_str()) - } - }) + Some(msg) + } + + fn try_collect_attrs<'a>( + &'a self, + key: &str, + args: Option<&'a FluentArgs>, + ) -> Option>> { + let bundle = &self.bundle; + let msg = bundle.get_message(key)?; + + let mut errs = Vec::new(); + let mut attrs = Vec::new(); + + for attr in msg.attributes() { + let msg = bundle.format_pattern(attr.value(), args, &mut errs); + attrs.push(msg); + } + for err in errs { + eprintln!("err: {err} for {key}"); + } + + Some(attrs) + } + + fn try_variation<'a>( + &'a self, + key: &str, + seed: u16, + args: Option<&'a FluentArgs>, + ) -> Option> { + let mut attrs = self.try_collect_attrs(key, args)?; + + if attrs.is_empty() { + None + } else { + let variation = attrs.swap_remove(usize::from(seed) % attrs.len()); + Some(variation) + } } } -impl common_assets::Compound for Language { - fn load( - cache: common_assets::AnyCache, - asset_key: &str, - ) -> Result { +impl assets::Compound for Language { + fn load(cache: assets::AnyCache, path: &str) -> Result { let manifest = cache - .load::(&[asset_key, ".", LANG_MANIFEST_FILE].concat())? + .load::(&[path, ".", "_manifest"].concat())? .cloned(); + let raw::Manifest { + convert_utf8_to_ascii, + fonts, + metadata, + } = manifest; - // Walk through files in the folder, collecting localization fragment to merge - // inside the asked_localization - let mut fragments = HashMap::new(); - for id in cache - .load_dir::>(asset_key, true)? - .ids() - { - // Don't try to load manifests - if let Some(id) = id.strip_suffix(LANG_MANIFEST_FILE) { - if id.ends_with('.') { - continue; - } + let lang_id: LanguageIdentifier = metadata.language_identifier.parse()?; + let mut bundle = FluentBundle::new_concurrent(vec![lang_id]); + + // Here go dragons + for id in cache.load_dir::(path, true)?.ids() { + if id.ends_with("_manifest") { + continue; } match cache.load(id) { Ok(handle) => { - let fragment: &RawFragment = &*handle.read(); + use std::{error::Error, fmt, ops::Range}; - fragments.insert(PathBuf::from(id), fragment.clone()); + #[derive(Debug)] + struct Pos { + #[allow(dead_code)] // false-positive + line: usize, + #[allow(dead_code)] // false-positive + character: usize, + } + + fn unspan(src: &str, span: Range) -> Range { + let count = |idx| { + let mut line = 1; + let mut character = 1; + for ch in src.bytes().take(idx) { + // Count characters + character += 1; + + // Count newlines + if ch == b'\n' { + line += 1; + // If found new line, reset character count + character = 1; + } + } + Pos { line, character } + }; + let Range { start, end } = span; + count(start)..count(end) + } + + // TODO: + // better error handling? + #[derive(Debug)] + enum ResourceErr { + ParsingError { + #[allow(dead_code)] // false-positive + file: String, + #[allow(dead_code)] // false-positive + err: String, + }, + BundleError(String), + } + + impl fmt::Display for ResourceErr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } + } + + impl Error for ResourceErr {} + + let source: &raw::Resource = &*handle.read(); + let resource = + FluentResource::try_new(source.src.clone()).map_err(|(_ast, errs)| { + let file = id.to_owned(); + let errs = errs + .into_iter() + .map(|e| { + let pos = unspan(&source.src, e.pos); + format!("{pos:?}, kind {:?}", e.kind) + }) + .collect::>(); + + ResourceErr::ParsingError { + file, + err: format!("{errs:?}"), + } + })?; + + bundle + .add_resource(resource) + .map_err(|e| ResourceErr::BundleError(format!("{e:?}")))?; }, - Err(e) => { - warn!("Unable to load asset {}, error={:?}", id, e); + Err(err) => { + // TODO: shouldn't we just panic here? + warn!("Unable to load asset {id}, error={err:?}"); }, } } - Ok(Language::from(RawLanguage { - manifest, - fragments, - })) + Ok(Self { + bundle, + convert_utf8_to_ascii, + fonts, + metadata, + }) } } -/// the central data structure to handle localization in veloren -// inherit Copy+Clone from AssetHandle -#[derive(Debug, Copy, Clone)] +/// The central data structure to handle localization in Veloren +// inherit Copy + Clone from AssetHandle (what?) +#[derive(Copy, Clone)] pub struct LocalizationHandle { active: AssetHandle, watcher: ReloadWatcher, @@ -148,24 +245,46 @@ pub struct LocalizationHandle { pub use_english_fallback: bool, } -// RAII guard returned from Localization::read(), resembles AssetGuard +/// Read `LocalizationGuard` +// arbitrary choice to minimize changing all of veloren +pub type Localization = LocalizationGuard; + +/// RAII guard returned from `Localization::read`(), resembles `AssetGuard` pub struct LocalizationGuard { active: AssetGuard, fallback: Option>, } -// arbitrary choice to minimize changing all of veloren -pub type Localization = LocalizationGuard; - impl LocalizationGuard { + /// DEPRECATED + /// /// Get a localized text from the given key /// /// First lookup is done in the active language, second in /// the fallback (if present). - pub fn get_opt(&self, key: &str) -> Option<&str> { + /// If the key is not present in the localization object + /// then the key itself is returned. + /// + /// NOTE: this function shouldn't be used in new code. + /// It is kept for compatibility with old code that uses + /// old style dot-separated keys and this function internally + /// replaces them with dashes. + // FIXME (i18n old style keys): + // this is deprecated, fix all usages of this asap + pub fn get(&self, key: &str) -> Cow { + // Fluent uses `-` as informal separator, while in the past with our + // RON based system we used `.` for that purpose. + self.get_msg(&key.replace('.', "-")) + } + + /// Get a localized text from the given key + /// + /// First lookup is done in the active language, second in + /// the fallback (if present). + pub fn try_msg(&self, key: &str) -> Option> { self.active - .get(key) - .or_else(|| self.fallback.as_ref().and_then(|f| f.get(key))) + .try_msg(key, None) + .or_else(|| self.fallback.as_ref().and_then(|fb| fb.try_msg(key, None))) } /// Get a localized text from the given key @@ -173,76 +292,95 @@ impl LocalizationGuard { /// First lookup is done in the active language, second in /// the fallback (if present). /// If the key is not present in the localization object - /// then the key is returned. - pub fn get<'a>(&'a self, key: &'a str) -> &str { self.get_opt(key).unwrap_or(key) } + /// then the key itself is returned. + pub fn get_msg(&self, key: &str) -> Cow { + // NOTE: we clone the key if translation was missing + // We could use borrowed version, but it would mean that + // `key`, `self`, and result should have the same lifetime. + // Which would make it impossible to use with runtime generated keys. + self.try_msg(key) + .unwrap_or_else(|| Cow::Owned(key.to_owned())) + } - /// Get a localized text from the given key + /// Get a localized text from the given key using given arguments /// /// First lookup is done in the active language, second in /// the fallback (if present). - pub fn get_or(&self, key: &str, fallback_key: &str) -> Option<&str> { - self.get_opt(key).or_else(|| self.get_opt(fallback_key)) + pub fn try_msg_ctx<'a>(&'a self, key: &str, args: &'a FluentArgs) -> Option> { + // NOTE: as after using args we get our result owned (because you need + // to clone pattern during forming value from args), this conversion + // to Cow;:Owned is no-op. + // We could use String here, but using Cow everywhere in i18n API is + // prefered for consistency. + self.active + .try_msg(key, Some(args)) + .or_else(|| { + self.fallback + .as_ref() + .and_then(|fb| fb.try_msg(key, Some(args))) + }) + .map(|x| { + // NOTE: + // Hack. Remove Unicode Directionality Marks, conrod doesn't support them. + let res = x.replace('\u{2068}', "").replace('\u{2069}', ""); + Cow::Owned(res) + }) } - /// Get a variation of localized text from the given key - /// - /// `index` should be a random number from `0` to `u16::max()` + /// Get a localized text from the given key using given arguments /// + /// First lookup is done in the active language, second in + /// the fallback (if present). /// If the key is not present in the localization object - /// then the key is returned. - pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> &str { - self.active.get_variation(key, index).unwrap_or_else(|| { + /// then the key itself is returned. + pub fn get_msg_ctx<'a>(&'a self, key: &str, args: &'a FluentArgs) -> Cow<'static, str> { + self.try_msg_ctx(key, args) + .unwrap_or_else(|| Cow::Owned(key.to_owned())) + } + + pub fn try_variation(&self, key: &str, seed: u16) -> Option> { + self.active.try_variation(key, seed, None).or_else(|| { self.fallback .as_ref() - .and_then(|f| f.get_variation(key, index)) - .unwrap_or(key) + .and_then(|fb| fb.try_variation(key, seed, None)) }) } - /// Return the missing keys compared to the reference language - fn list_missing_entries(&self) -> (HashSet, HashSet) { - if let Some(ref_lang) = &self.fallback { - let reference_string_keys: HashSet<_> = ref_lang.string_map.keys().cloned().collect(); - let string_keys: HashSet<_> = self.active.string_map.keys().cloned().collect(); - let strings = reference_string_keys - .difference(&string_keys) - .cloned() - .collect(); - - let reference_vector_keys: HashSet<_> = ref_lang.vector_map.keys().cloned().collect(); - let vector_keys: HashSet<_> = self.active.vector_map.keys().cloned().collect(); - let vectors = reference_vector_keys - .difference(&vector_keys) - .cloned() - .collect(); - - (strings, vectors) - } else { - (HashSet::default(), HashSet::default()) - } + pub fn get_variation(&self, key: &str, seed: u16) -> Cow { + self.try_variation(key, seed) + .unwrap_or_else(|| Cow::Owned(key.to_owned())) } - /// Log missing entries (compared to the reference language) as warnings - pub fn log_missing_entries(&self) { - let (missing_strings, missing_vectors) = self.list_missing_entries(); - for missing_key in missing_strings { - warn!( - "[{:?}] Missing string key {:?}", - self.metadata().language_identifier, - missing_key - ); - } - for missing_key in missing_vectors { - warn!( - "[{:?}] Missing vector key {:?}", - self.metadata().language_identifier, - missing_key - ); - } + pub fn try_variation_ctx<'a>( + &'a self, + key: &str, + seed: u16, + args: &'a FluentArgs, + ) -> Option> { + self.active + .try_variation(key, seed, Some(args)) + .or_else(|| { + self.fallback + .as_ref() + .and_then(|fb| fb.try_variation(key, seed, Some(args))) + }) + .map(|x| { + // NOTE: + // Hack. Remove Unicode Directionality Marks, conrod doesn't support them. + let res = x.replace('\u{2068}', "").replace('\u{2069}', ""); + Cow::Owned(res) + }) } + pub fn get_variation_ctx<'a>(&'a self, key: &str, seed: u16, args: &'a FluentArgs) -> Cow { + self.try_variation_ctx(key, seed, args) + .unwrap_or_else(|| Cow::Owned(key.to_owned())) + } + + #[must_use] pub fn fonts(&self) -> &Fonts { &self.active.fonts } + #[must_use] pub fn metadata(&self) -> &LanguageMetadata { &self.active.metadata } } @@ -251,6 +389,7 @@ impl LocalizationHandle { self.use_english_fallback = use_english_fallback; } + #[must_use] pub fn read(&self) -> LocalizationGuard { LocalizationGuard { active: self.active.read(), @@ -262,7 +401,9 @@ impl LocalizationHandle { } } - pub fn load(specifier: &str) -> Result { + /// # Errors + /// Returns error if active language can't be loaded + pub fn load(specifier: &str) -> Result { let default_key = ["voxygen.i18n.", REFERENCE_LANG].concat(); let language_key = ["voxygen.i18n.", specifier].concat(); let is_default = language_key == default_key; @@ -273,12 +414,14 @@ impl LocalizationHandle { fallback: if is_default { None } else { + // TODO: shouldn't this panic? Language::load(&default_key).ok() }, use_english_fallback: false, }) } + #[must_use] pub fn load_expect(specifier: &str) -> Self { Self::load(specifier).expect("Can't load language files") } @@ -288,17 +431,18 @@ impl LocalizationHandle { struct FindManifests; -impl common_assets::DirLoadable for FindManifests { - fn select_ids( +impl assets::DirLoadable for FindManifests { + fn select_ids( source: &S, specifier: &str, - ) -> io::Result> { + ) -> io::Result> { let mut specifiers = Vec::new(); source.read_dir(specifier, &mut |entry| { if let DirEntry::Directory(spec) = entry { - let manifest_spec = [spec, ".", LANG_MANIFEST_FILE].concat(); - if source.exists(DirEntry::File(&manifest_spec, LANG_EXTENSION)) { + let manifest_spec = [spec, ".", "_manifest"].concat(); + + if source.exists(DirEntry::File(&manifest_spec, "ron")) { specifiers.push(manifest_spec.into()); } } @@ -311,16 +455,13 @@ impl common_assets::DirLoadable for FindManifests { #[derive(Clone, Debug)] struct LocalizationList(Vec); -impl common_assets::Compound for LocalizationList { - fn load( - cache: common_assets::AnyCache, - specifier: &str, - ) -> Result { +impl assets::Compound for LocalizationList { + fn load(cache: assets::AnyCache, specifier: &str) -> Result { // List language directories - let languages = common_assets::load_dir::(specifier, false) + let languages = assets::load_dir::(specifier, false) .unwrap_or_else(|e| panic!("Failed to get manifests from {}: {:?}", specifier, e)) .ids() - .filter_map(|spec| cache.load::(spec).ok()) + .filter_map(|spec| cache.load::(spec).ok()) .map(|localization| localization.read().metadata.clone()) .collect(); @@ -329,42 +470,49 @@ impl common_assets::Compound for LocalizationList { } /// Load all the available languages located in the voxygen asset directory +#[must_use] pub fn list_localizations() -> Vec { - LocalizationList::load_expect_cloned("voxygen.i18n").0 + let LocalizationList(list) = LocalizationList::load_expect_cloned("voxygen.i18n"); + list } #[cfg(test)] mod tests { - use crate::path::BasePath; + use super::*; - // Test that localization list is loaded (not empty) #[test] - fn test_localization_list() { - let list = super::list_localizations(); + fn check_localization_list() { + let list = list_localizations(); assert!(!list.is_empty()); } - // Test that reference language can be loaded #[test] - fn test_localization_handle() { - let _ = super::LocalizationHandle::load_expect(super::REFERENCE_LANG); + fn validate_reference_language() { let _ = LocalizationHandle::load_expect(REFERENCE_LANG); } + + #[test] + fn validate_all_localizations() { + let list = list_localizations(); + for meta in list { + let _ = LocalizationHandle::load_expect(&meta.language_identifier); + } } - // Test to verify all languages that they are VALID and loadable, without - // need of git just on the local assets folder - #[test] - fn verify_all_localizations() { - // Generate paths - let root_dir = common_assets::find_root().expect("Failed to discover repository root"); - crate::verification::verify_all_localizations(&BasePath::new(&root_dir)); - } - - // Test to verify all languages and print missing and faulty localisation #[test] #[ignore] + // Generate translation stats fn test_all_localizations() { - // Generate paths - let root_dir = common_assets::find_root().expect("Failed to discover repository root"); - crate::analysis::test_all_localizations(&BasePath::new(&root_dir), true, true); + // FIXME (i18n translation stats): + use std::{fs, io::Write}; + + let output = assets::find_root() + .unwrap() + .join("translation_analysis.csv"); + let mut f = fs::File::create(output).expect("couldn't write csv file"); + + writeln!( + f, + "country_code,file_name,translation_key,status,git_commit" + ) + .unwrap(); } } diff --git a/voxygen/i18n/src/path.rs b/voxygen/i18n/src/path.rs deleted file mode 100644 index 3f25e55adf..0000000000 --- a/voxygen/i18n/src/path.rs +++ /dev/null @@ -1,141 +0,0 @@ -use std::path::{Path, PathBuf}; - -pub(crate) const LANG_MANIFEST_FILE: &str = "_manifest"; -pub(crate) const LANG_EXTENSION: &str = "ron"; - -#[derive(Clone)] -pub struct BasePath { - ///repo part, git main folder - root_path: PathBuf, - ///relative path to i18n path which contains, currently - /// 'assets/voxygen/i18n' - relative_i18n_root_path: PathBuf, - ///i18n_root_folder - cache: PathBuf, -} - -impl BasePath { - pub fn new(root_path: &Path) -> Self { - let relative_i18n_root_path = Path::new("assets/voxygen/i18n").to_path_buf(); - let cache = root_path.join(&relative_i18n_root_path); - assert!( - cache.is_dir(), - "i18n_root_path folder doesn't exist, something is wrong!" - ); - Self { - root_path: root_path.to_path_buf(), - relative_i18n_root_path, - cache, - } - } - - pub fn root_path(&self) -> &Path { &self.root_path } - - pub fn relative_i18n_root_path(&self) -> &Path { &self.relative_i18n_root_path } - - /// absolute path to `relative_i18n_root_path` - pub fn i18n_root_path(&self) -> &Path { &self.cache } - - pub fn i18n_path(&self, language_identifier: &str) -> LangPath { - LangPath::new(self, language_identifier) - } - - /// List localization directories - pub fn i18n_directories(&self) -> Vec { - std::fs::read_dir(&self.cache) - .unwrap() - .map(|res| res.unwrap()) - .filter(|e| e.file_type().unwrap().is_dir()) - .map(|e| LangPath::new(self, e.file_name().to_str().unwrap())) - .collect() - } -} - -impl core::fmt::Debug for BasePath { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{:?}", &self.cache) - } -} - -#[derive(Clone)] -pub struct LangPath { - base: BasePath, - /// `en`, `de_DE`, `fr_FR`, etc.. - language_identifier: String, - /// i18n_path - cache: PathBuf, -} - -impl LangPath { - fn new(base: &BasePath, language_identifier: &str) -> Self { - let cache = base.i18n_root_path().join(language_identifier); - if !cache.is_dir() { - panic!("language folder '{}' doesn't exist", language_identifier); - } - Self { - base: base.clone(), - language_identifier: language_identifier.to_owned(), - cache, - } - } - - pub fn base(&self) -> &BasePath { &self.base } - - pub fn language_identifier(&self) -> &str { &self.language_identifier } - - ///absolute path to `i18n_root_path` + `language_identifier` - pub fn i18n_path(&self) -> &Path { &self.cache } - - /// fragment or manifest file, based on a path - pub fn sub_path(&self, sub_path: &Path) -> PathBuf { self.cache.join(sub_path) } - - /// fragment or manifest file, based on a string without extension - pub fn file(&self, name_without_extension: &str) -> PathBuf { - self.cache - .join(format!("{}.{}", name_without_extension, LANG_EXTENSION)) - } - - /// return all fragments sub_pathes - pub(crate) fn fragments(&self) -> Result, std::io::Error> { - let mut result = vec![]; - recursive_fragments_paths_in_language(self, Path::new(""), &mut result)?; - Ok(result) - } -} - -//unwraps cant fail as they are in same Path -fn recursive_fragments_paths_in_language( - lpath: &LangPath, - subfolder: &Path, - result: &mut Vec, -) -> Result<(), std::io::Error> { - let manifest_path = PathBuf::from(&format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION)); - let template_path = PathBuf::from(&format!("{}.{}", "template", LANG_EXTENSION)); - let search_dir = lpath.sub_path(subfolder); - for fragment_file in search_dir.read_dir()?.flatten() { - let file_type = fragment_file.file_type()?; - let full_path = fragment_file.path(); - let relative_path = full_path.strip_prefix(lpath.i18n_path()).unwrap(); - if file_type.is_dir() { - recursive_fragments_paths_in_language(lpath, relative_path, result)?; - } else if file_type.is_file() - && relative_path != manifest_path - && relative_path != template_path - { - result.push(relative_path.to_path_buf()); - } - } - Ok(()) -} - -impl core::fmt::Debug for LangPath { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!( - f, - "{:?}", - self.base - .relative_i18n_root_path - .join(&self.language_identifier) - ) - } -} diff --git a/voxygen/i18n/src/raw.rs b/voxygen/i18n/src/raw.rs index da814bfc1f..d869203582 100644 --- a/voxygen/i18n/src/raw.rs +++ b/voxygen/i18n/src/raw.rs @@ -1,113 +1,36 @@ -//! handle the loading of a `Language` -use crate::{ - path::{LangPath, LANG_EXTENSION, LANG_MANIFEST_FILE}, - Fonts, Language, LanguageMetadata, -}; -use deunicode::deunicode; -use hashbrown::hash_map::HashMap; -use ron::de::from_reader; +use crate::{Fonts, LanguageMetadata}; use serde::{Deserialize, Serialize}; -use std::{fs, path::PathBuf}; -/// Raw localization metadata from LANG_MANIFEST_FILE file +use std::str::FromStr; + +/// Localization metadata from manifest file /// See `Language` for more info on each attributes #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -pub(crate) struct RawManifest { +pub(crate) struct Manifest { pub(crate) convert_utf8_to_ascii: bool, pub(crate) fonts: Fonts, pub(crate) metadata: LanguageMetadata, } -/// Raw localization data from one specific file -/// These structs are meant to be merged into a Language -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -pub(crate) struct RawFragment { - pub(crate) string_map: HashMap, - pub(crate) vector_map: HashMap>, +impl crate::assets::Asset for Manifest { + type Loader = crate::assets::RonLoader; + + const EXTENSION: &'static str = "ron"; } -pub(crate) struct RawLanguage { - pub(crate) manifest: RawManifest, - pub(crate) fragments: HashMap>, +#[derive(Clone)] +pub(crate) struct Resource { + pub(crate) src: String, } -pub(crate) fn load_manifest(path: &LangPath) -> Result { - let manifest_file = path.file(LANG_MANIFEST_FILE); - tracing::debug!(?manifest_file, "manifest loading"); - let f = fs::File::open(&manifest_file)?; - let manifest: RawManifest = from_reader(f)?; - // verify that the folder name `de_DE` matches the value inside the metadata! - assert_eq!( - manifest.metadata.language_identifier, - path.language_identifier() - ); - Ok(manifest) +impl FromStr for Resource { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { Ok(Self { src: s.to_owned() }) } } -pub(crate) fn load_raw_language( - path: &LangPath, - manifest: RawManifest, -) -> Result, common_assets::BoxedError> { - //get List of files - let files = path.fragments()?; +impl crate::assets::Asset for Resource { + type Loader = crate::assets::loader::ParseLoader; - // Walk through each file in the directory - let mut fragments = HashMap::new(); - for sub_path in files { - let f = fs::File::open(path.sub_path(&sub_path))?; - let fragment = from_reader(f)?; - fragments.insert(sub_path, fragment); - } - - Ok(RawLanguage { - manifest, - fragments, - }) -} - -impl From> for Language { - fn from(raw: RawLanguage) -> Self { - let mut string_map = HashMap::new(); - let mut vector_map = HashMap::new(); - - for (_, fragment) in raw.fragments { - string_map.extend(fragment.string_map); - vector_map.extend(fragment.vector_map); - } - - let convert_utf8_to_ascii = raw.manifest.convert_utf8_to_ascii; - - // Update the text if UTF-8 to ASCII conversion is enabled - if convert_utf8_to_ascii { - for value in string_map.values_mut() { - *value = deunicode(value); - } - - for value in vector_map.values_mut() { - *value = value.iter().map(|s| deunicode(s)).collect(); - } - } - let mut metadata = raw.manifest.metadata; - metadata.language_name = deunicode(&metadata.language_name); - - Self { - string_map, - vector_map, - convert_utf8_to_ascii, - fonts: raw.manifest.fonts, - metadata, - } - } -} - -impl common_assets::Asset for RawManifest { - type Loader = common_assets::RonLoader; - - const EXTENSION: &'static str = LANG_EXTENSION; -} - -impl common_assets::Asset for RawFragment { - type Loader = common_assets::RonLoader; - - const EXTENSION: &'static str = LANG_EXTENSION; + const EXTENSION: &'static str = "ftl"; } diff --git a/voxygen/i18n/src/stats.rs b/voxygen/i18n/src/stats.rs deleted file mode 100644 index 65b6e5cc1b..0000000000 --- a/voxygen/i18n/src/stats.rs +++ /dev/null @@ -1,199 +0,0 @@ -use crate::{ - gitfragments::{LocalizationEntryState, LocalizationState, ALL_LOCALIZATION_STATES}, - raw::RawLanguage, -}; -use hashbrown::HashMap; -use std::path::PathBuf; - -#[derive(Default, Debug, PartialEq)] -pub(crate) struct LocalizationStats { - pub(crate) uptodate_entries: usize, - pub(crate) notfound_entries: usize, - pub(crate) unused_entries: usize, - pub(crate) outdated_entries: usize, - pub(crate) errors: usize, -} - -pub(crate) struct LocalizationAnalysis { - language_identifier: String, - pub(crate) data: HashMap, Vec<(PathBuf, String, Option)>>, -} - -impl LocalizationStats { - /// Calculate key count that actually matter for the status of the - /// translation Unused entries don't break the game - pub(crate) fn get_real_entry_count(&self) -> usize { - self.outdated_entries + self.notfound_entries + self.errors + self.uptodate_entries - } -} - -impl LocalizationAnalysis { - pub(crate) fn new(language_identifier: &str) -> Self { - let mut data = HashMap::new(); - for key in ALL_LOCALIZATION_STATES.iter() { - data.insert(*key, vec![]); - } - Self { - language_identifier: language_identifier.to_owned(), - data, - } - } - - fn show( - &self, - state: Option, - ref_language: &RawLanguage, - be_verbose: bool, - output: &mut W, - ) { - let entries = self.data.get(&state).unwrap_or_else(|| { - panic!( - "called on invalid state: {}", - LocalizationState::print(&state) - ) - }); - if entries.is_empty() { - return; - } - writeln!(output, "\n\t[{}]", LocalizationState::print(&state)).unwrap(); - for (path, key, commit_id) in entries { - if be_verbose { - let our_commit = LocalizationAnalysis::print_commit(commit_id); - let ref_commit = ref_language - .fragments - .get(path) - .and_then(|entry| entry.string_map.get(key)) - .and_then(|s| s.commit_id) - .map(|s| format!("{}", s)) - .unwrap_or_else(|| "None".to_owned()); - writeln!(output, "{:60}| {:40} | {:40}", key, our_commit, ref_commit).unwrap(); - } else { - writeln!(output, "{}", key).unwrap(); - } - } - } - - fn csv(&self, state: Option, output: &mut W) { - let entries = self - .data - .get(&state) - .unwrap_or_else(|| panic!("called on invalid state: {:?}", state)); - for (path, key, commit_id) in entries { - let our_commit = LocalizationAnalysis::print_commit(commit_id); - writeln!( - output, - "{},{:?},{},{},{}", - self.language_identifier, - path, - key, - LocalizationState::print(&state), - our_commit - ) - .unwrap(); - } - } - - fn print_commit(commit_id: &Option) -> String { - commit_id - .map(|s| format!("{}", s)) - .unwrap_or_else(|| "None".to_owned()) - } -} - -pub(crate) fn print_translation_stats( - language_identifier: &str, - ref_language: &RawLanguage, - stats: &LocalizationStats, - state_map: &LocalizationAnalysis, - be_verbose: bool, -) { - let real_entry_count = stats.get_real_entry_count() as f32; - let uptodate_percent = (stats.uptodate_entries as f32 / real_entry_count) * 100_f32; - let outdated_percent = (stats.outdated_entries as f32 / real_entry_count) * 100_f32; - let untranslated_percent = - ((stats.notfound_entries + stats.errors) as f32 / real_entry_count) * 100_f32; - - // Display - if be_verbose { - println!( - "\n{:60}| {:40} | {:40}", - "Key name", language_identifier, ref_language.manifest.metadata.language_identifier, - ); - } else { - println!("\nKey name"); - } - - for state in &ALL_LOCALIZATION_STATES { - if state == &Some(LocalizationState::UpToDate) { - continue; - } - state_map.show(*state, ref_language, be_verbose, &mut std::io::stdout()); - } - - println!( - "\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries", - stats.uptodate_entries, - stats.outdated_entries, - stats.unused_entries, - stats.notfound_entries, - stats.errors, - ); - - println!( - "{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n", - uptodate_percent, outdated_percent, untranslated_percent, - ); -} - -pub(crate) fn print_csv_stats(state_map: &LocalizationAnalysis, output: &mut W) { - for state in &ALL_LOCALIZATION_STATES { - state_map.csv(*state, output); - } -} - -pub(crate) fn print_overall_stats( - analysis: HashMap, -) { - let mut overall_uptodate_entry_count = 0; - let mut overall_outdated_entry_count = 0; - let mut overall_untranslated_entry_count = 0; - let mut overall_real_entry_count = 0; - - println!("-----------------------------------------------------------------------------"); - println!("Overall Translation Status"); - println!("-----------------------------------------------------------------------------"); - println!( - "{:12}| {:8} | {:8} | {:8} | {:8} | {:8}", - "", "up-to-date", "outdated", "untranslated", "unused", "errors", - ); - - let mut i18n_stats: Vec<(&String, &(_, LocalizationStats))> = analysis.iter().collect(); - i18n_stats.sort_by_key(|(_, (_, v))| v.notfound_entries); - - for (path, (_, test_result)) in i18n_stats { - let LocalizationStats { - uptodate_entries: uptodate, - outdated_entries: outdated, - unused_entries: unused, - notfound_entries: untranslated, - errors, - } = test_result; - overall_uptodate_entry_count += uptodate; - overall_outdated_entry_count += outdated; - overall_untranslated_entry_count += untranslated; - overall_real_entry_count += test_result.get_real_entry_count(); - - println!( - "{:12}|{:8} |{:6} |{:8} |{:6} |{:8}", - path, uptodate, outdated, untranslated, unused, errors, - ); - } - - println!( - "\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated", - (overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32, - (overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32, - (overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32, - ); - println!("-----------------------------------------------------------------------------\n"); -} diff --git a/voxygen/i18n/src/verification.rs b/voxygen/i18n/src/verification.rs deleted file mode 100644 index fa784b9db5..0000000000 --- a/voxygen/i18n/src/verification.rs +++ /dev/null @@ -1,34 +0,0 @@ -use crate::path::{BasePath, LangPath, LANG_MANIFEST_FILE}; - -use crate::{raw, REFERENCE_LANG}; - -/// Test to verify all languages that they are VALID and loadable, without -/// need of git just on the local assets folder -pub fn verify_all_localizations(path: &BasePath) { - let ref_i18n_path = path.i18n_path(REFERENCE_LANG); - let ref_i18n_manifest_path = ref_i18n_path.file(LANG_MANIFEST_FILE); - assert!( - ref_i18n_manifest_path.is_file(), - "Reference language manifest file doesn't exist, something is wrong!" - ); - let i18n_directories = path.i18n_directories(); - // This simple check ONLY guarantees that an arbitrary minimum of translation - // files exists. It's just to notice unintentional deletion of all - // files, or modifying the paths. In case you want to delete all - // language you have to adjust this number: - assert!( - i18n_directories.len() > 5, - "have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \ - folder is empty?" - ); - for i18n_directory in i18n_directories { - println!("verifying {:?}", i18n_directory); - // Walk through each files and try to load them - verify_localization_directory(&i18n_directory); - } -} - -fn verify_localization_directory(path: &LangPath) { - let manifest = raw::load_manifest(path).expect("error accessing manifest file"); - raw::load_raw_language(path, manifest).expect("error accessing fragment file"); -}