From 847ee1c1b1480292fb556a3f269e4dbf386b262d Mon Sep 17 00:00:00 2001 From: juliancoffee Date: Tue, 9 Aug 2022 13:33:34 +0300 Subject: [PATCH] Implement i18n-tooling * Working csv export * Working i18n-check --- .gitlab/scripts/translation.sh | 9 +- Cargo.lock | 2 + common/assets/src/walk.rs | 60 +++++++++++- voxygen/i18n/Cargo.toml | 11 +++ voxygen/i18n/src/analysis.rs | 148 +++++++++++++++++++++++++++++ voxygen/i18n/src/bin/i18n-check.rs | 39 ++++++++ voxygen/i18n/src/error.rs | 75 +++++++++++++++ voxygen/i18n/src/lib.rs | 124 +++++++++++------------- 8 files changed, 391 insertions(+), 77 deletions(-) create mode 100644 voxygen/i18n/src/analysis.rs create mode 100644 voxygen/i18n/src/bin/i18n-check.rs create mode 100644 voxygen/i18n/src/error.rs diff --git a/.gitlab/scripts/translation.sh b/.gitlab/scripts/translation.sh index e6393c5933..4410ae57bb 100755 --- a/.gitlab/scripts/translation.sh +++ b/.gitlab/scripts/translation.sh @@ -1,3 +1,8 @@ #!/bin/bash -export VELOREN_ASSETS="$(pwd)/assets" -time cargo test --package veloren-voxygen-i18n --lib test_all_localizations -- --nocapture --ignored +VELOREN_ASSETS="$(pwd)/assets" +export VELOREN_ASSETS + +time cargo test --package veloren-voxygen-i18n \ + --lib test_all_localizations \ + --features="stat" \ + -- --nocapture --ignored diff --git a/Cargo.lock b/Cargo.lock index c5b7100f38..4376986dca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6908,9 +6908,11 @@ dependencies = [ name = "veloren-voxygen-i18n" version = "0.13.0" dependencies = [ + "clap 3.1.10", "deunicode", "fluent", "fluent-bundle", + "fluent-syntax", "hashbrown 0.12.0", "intl-memoizer", "ron 0.7.0", diff --git a/common/assets/src/walk.rs b/common/assets/src/walk.rs index 976e325f85..b1c7ecbf0c 100644 --- a/common/assets/src/walk.rs +++ b/common/assets/src/walk.rs @@ -3,16 +3,54 @@ use std::{ path::{Path, PathBuf}, }; -/// Read `walk_tree` -#[derive(Debug)] +/// Represent tree of directory, result of [generate_tree]. +/// +/// Note that paths are always relative to root it was generated from. +#[derive(Debug, Clone)] pub enum Walk { + /// Represents file node, path is relative to directory root Walk was + /// generated from. File(PathBuf), + /// Represents directory subtree, path is relative to directory root Walk + /// was generated from. Dir { path: PathBuf, content: Vec }, } -/// Utility function to build a tree of directory, recursively -/// -/// At first iteration, use path to your directory as dir and root +impl Walk { + /// Utility function to build a tree of directory, recursively + /// + /// Path needs to be absolute. + pub fn generate(root: &Path) -> io::Result { + let trees = walk_tree(root, root); + Ok(Walk::Dir { + path: Path::new("").to_owned(), + content: trees?, + }) + } + + // TODO: implement iterator? + pub fn for_each_file(&self, root: &Path, f: &mut F) + where + F: FnMut(&Path), + { + match self { + Self::File(filepath) => { + let path = root.join(filepath); + f(&path); + }, + Self::Dir { + path: _, + content: files, + } => { + for path in files { + path.for_each_file(root, f); + } + }, + } + } +} + +/// Helper function to [Walk::generate()], prefer using it instead. pub fn walk_tree(dir: &Path, root: &Path) -> io::Result> { let mut buff = Vec::new(); for entry in std::fs::read_dir(dir)? { @@ -37,3 +75,15 @@ pub fn walk_tree(dir: &Path, root: &Path) -> io::Result> { Ok(buff) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn trie() { + let root = crate::find_root().unwrap(); + let assets = Path::new(&root).join("assets/"); + Walk::generate(&assets).unwrap(); + } +} diff --git a/voxygen/i18n/Cargo.toml b/voxygen/i18n/Cargo.toml index 89c784aa73..c5ff1c56d7 100644 --- a/voxygen/i18n/Cargo.toml +++ b/voxygen/i18n/Cargo.toml @@ -19,3 +19,14 @@ fluent-bundle = { git = "https://github.com/juliancoffee/fluent-rs.git", branch hashbrown = { version = "0.12", features = ["serde", "nightly"] } deunicode = "1.0" tracing = "0.1" +# Bin +clap = { version = "3.1.8", features = ["suggestions", "std"], default-features = false, optional = true } +fluent-syntax = { git = "https://github.com/juliancoffee/fluent-rs.git", branch = "patched"} + +[[bin]] +name = "i18n-check" +required-features = ["bin"] + +[features] +bin = ["clap"] +stat = [] diff --git a/voxygen/i18n/src/analysis.rs b/voxygen/i18n/src/analysis.rs new file mode 100644 index 0000000000..396bc1b292 --- /dev/null +++ b/voxygen/i18n/src/analysis.rs @@ -0,0 +1,148 @@ +use crate::{assets::Walk, error::ResourceErr}; +use fluent_syntax::{ast, parser}; +use std::{ + fs, io, + path::{Path, PathBuf}, +}; + +/// Generate tree of i18n files, path should be absolute. +/// We assume that all i18n directories should have the same tree structure, +/// so that we can generate tree once and reuse for all languages. +fn i18n_tree(reference: &Path) -> io::Result { Walk::generate(reference) } + +/// Grab keys from one file +fn keys_from_file(filepath: &Path) -> Vec { + use ast::Entry; + + let file = format!("{}", filepath.display()); + + let content = match fs::read_to_string(filepath) { + Ok(content) => content, + Err(e) => { + eprintln!("failed to read from {filepath:?}. err={e}"); + return Vec::new(); + }, + }; + + let ast = parser::parse(&*content).unwrap_or_else(|(_parsed, errs)| { + panic!( + "{}", + ResourceErr::parsing_error(errs, file.clone(), &content) + ) + }); + let mut keys = Vec::new(); + for entry in ast.body { + match entry { + Entry::Message(m) => { + keys.push(MsgId { + key: m.id.name.to_owned(), + file: Some(file.clone()), + }); + }, + Entry::Term(_) + | Entry::Comment(_) + | Entry::GroupComment(_) + | Entry::ResourceComment(_) + | Entry::Junk { .. } => { + // these are not part of "public" API so do nothing + // comments linked to message are part of Message entry + // and we are not interested in global comments either, for now + }, + } + } + keys +} + +/// Grab keys from one language sitting at `from`. +/// +/// Tree of files assumed to have only .ftl files. +fn keys(from: &Path, tree: &Walk) -> Vec { + let mut keys = Vec::new(); + + tree.for_each_file(from, &mut |filepath| { + if !filepath.ends_with("_manifest.ron") { + keys.extend(keys_from_file(filepath)); + } + }); + + keys +} + +// TODO: +// Add versioning +// TODO: +// Do something with attributes? +// +// For some messages it makes sense to require that all attributes +// should match ones in reference language. +// For some it doesn't as of now. +#[derive(Clone, Debug)] +pub struct MsgId { + pub key: String, + pub file: Option, +} + +// TODO: +// Add versioning +#[derive(Debug)] +pub struct Stats { + pub up_to_date: Vec, + pub not_found: Vec, + pub unused: Vec, +} + +pub struct ReferenceLanguage { + /// All keys. + pub keys: Vec, + /// Cached tree of files. + tree: Walk, +} + +impl ReferenceLanguage { + /// Generate reference language, path should be absolute. + pub fn at(path: &Path) -> Self { + let tree = i18n_tree(path) + .unwrap_or_else(|e| panic!("{path:?}\nfailed to build file tree\n{e:?}")); + let keys = keys(path, &tree); + Self { keys, tree } + } + + /// Compare with other language + pub fn compare_with(&self, lang: &Language) -> Stats { + let keys = keys(&lang.path, &self.tree); + + let mut stats = Stats { + up_to_date: Vec::new(), + not_found: Vec::new(), + unused: Vec::new(), + }; + + for ref_key in &self.keys { + if let Some(key) = keys.iter().find(|MsgId { key, .. }| &ref_key.key == key) { + stats.up_to_date.push(key.clone()); + } else { + stats.not_found.push(MsgId { + key: ref_key.key.clone(), + file: None, + }); + } + } + + for key in &keys { + if !self + .keys + .iter() + .any(|MsgId { key: ref_key, .. }| ref_key == &key.key) + { + stats.unused.push(key.clone()) + } + } + + stats + } +} + +pub struct Language { + pub code: String, + pub path: PathBuf, +} diff --git a/voxygen/i18n/src/bin/i18n-check.rs b/voxygen/i18n/src/bin/i18n-check.rs new file mode 100644 index 0000000000..002d76e9a2 --- /dev/null +++ b/voxygen/i18n/src/bin/i18n-check.rs @@ -0,0 +1,39 @@ +use clap::{Arg, Command}; +use common_assets::find_root; +use veloren_voxygen_i18n::{ + analysis::{Language, ReferenceLanguage}, + REFERENCE_LANG, +}; + +fn main() { + let args = Command::new("i18n-check") + .about("Tool to check your Veloren localisation for correctness and missing keys") + .arg( + Arg::new("CODE") + .required(true) + .help("Run diagnostic for specific language code (de_DE, for example)"), + ) + .get_matches(); + + let root = find_root().unwrap(); + let i18n_directory = root.join("assets/voxygen/i18n"); + let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG)); + + let code = args.value_of("CODE").expect("arg is required"); + let lang = Language { + code: code.to_owned(), + path: root.join(i18n_directory.join(code)), + }; + let stats = reference.compare_with(&lang); + println!("\t[Not found]: {}", stats.not_found.len()); + for key in stats.not_found { + let key = &key.key; + println!("{key}"); + } + + println!("\n\t[Unused]: {}", stats.unused.len()); + for key in stats.unused { + let key = &key.key; + println!("{key}") + } +} diff --git a/voxygen/i18n/src/error.rs b/voxygen/i18n/src/error.rs new file mode 100644 index 0000000000..99f47bada3 --- /dev/null +++ b/voxygen/i18n/src/error.rs @@ -0,0 +1,75 @@ +use fluent_syntax::parser::ParserError; +use std::{error::Error, fmt, ops::Range}; + +#[derive(Debug)] +struct Pos { + line: usize, + character: usize, +} + +impl fmt::Display for Pos { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{};{}", self.line, self.character) + } +} + +fn unspan(src: &str, span: Range) -> Range { + let count = |idx| { + let mut line = 1; + let mut character = 1; + for ch in src.bytes().take(idx) { + // Count characters + character += 1; + + // Count newlines + if ch == b'\n' { + line += 1; + // If found new line, reset character count + character = 1; + } + } + Pos { line, character } + }; + let Range { start, end } = span; + count(start)..count(end) +} + +// TODO: +// Ideally we wouldn't write this code, check this issue in fluent-rs. +// https://github.com/projectfluent/fluent-rs/issues/176 +#[derive(Debug)] +pub enum ResourceErr { + ParsingError { + #[allow(dead_code)] // false-positive + file: String, + #[allow(dead_code)] // false-positive + err: String, + }, + BundleError(String), +} + +impl ResourceErr { + pub fn parsing_error(errs: Vec, file: String, src: &str) -> Self { + let errs = errs + .into_iter() + .map(|e| { + let Range { + start: from, + end: to, + } = unspan(src, e.pos); + format!("{from}..{to}, kind {:?}", e.kind) + }) + .collect::>(); + + Self::ParsingError { + file, + err: format!("{errs:?}"), + } + } +} + +impl fmt::Display for ResourceErr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self:#?}") } +} + +impl Error for ResourceErr {} diff --git a/voxygen/i18n/src/lib.rs b/voxygen/i18n/src/lib.rs index 1fff103e18..5c36c7eb6f 100644 --- a/voxygen/i18n/src/lib.rs +++ b/voxygen/i18n/src/lib.rs @@ -1,5 +1,11 @@ +mod error; mod raw; +use error::ResourceErr; + +#[cfg(any(feature = "bin", feature = "stat"))] +pub mod analysis; + use fluent_bundle::{bundle::FluentBundle, FluentResource}; use intl_memoizer::concurrent::IntlLangMemoizer; use unic_langid::LanguageIdentifier; @@ -147,74 +153,10 @@ impl assets::Compound for Language { match cache.load(id) { Ok(handle) => { - use std::{error::Error, fmt, ops::Range}; - - #[derive(Debug)] - struct Pos { - #[allow(dead_code)] // false-positive - line: usize, - #[allow(dead_code)] // false-positive - character: usize, - } - - fn unspan(src: &str, span: Range) -> Range { - let count = |idx| { - let mut line = 1; - let mut character = 1; - for ch in src.bytes().take(idx) { - // Count characters - character += 1; - - // Count newlines - if ch == b'\n' { - line += 1; - // If found new line, reset character count - character = 1; - } - } - Pos { line, character } - }; - let Range { start, end } = span; - count(start)..count(end) - } - - // TODO: - // better error handling? - #[derive(Debug)] - enum ResourceErr { - ParsingError { - #[allow(dead_code)] // false-positive - file: String, - #[allow(dead_code)] // false-positive - err: String, - }, - BundleError(String), - } - - impl fmt::Display for ResourceErr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{self:?}") - } - } - - impl Error for ResourceErr {} - let source: &raw::Resource = &*handle.read(); let resource = FluentResource::try_new(source.src.clone()).map_err(|(_ast, errs)| { - let file = id.to_owned(); - let errs = errs - .into_iter() - .map(|e| { - let pos = unspan(&source.src, e.pos); - format!("{pos:?}, kind {:?}", e.kind) - }) - .collect::>(); - - ResourceErr::ParsingError { - file, - err: format!("{errs:?}"), - } + ResourceErr::parsing_error(errs, id.to_owned(), &source.src) })?; bundle @@ -505,14 +447,15 @@ mod tests { #[test] #[ignore] + #[cfg(feature = "stat")] // Generate translation stats fn test_all_localizations() { - // FIXME (i18n translation stats): - use std::{fs, io::Write}; + use analysis::{Language, ReferenceLanguage}; + use assets::find_root; + use std::{fs, io::Write, path::Path}; - let output = assets::find_root() - .unwrap() - .join("translation_analysis.csv"); + let root = find_root().unwrap(); + let output = root.join("translation_analysis.csv"); let mut f = fs::File::create(output).expect("couldn't write csv file"); writeln!( @@ -520,5 +463,46 @@ mod tests { "country_code,file_name,translation_key,status,git_commit" ) .unwrap(); + + let i18n_directory = root.join("assets/voxygen/i18n"); + let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG)); + + let list = list_localizations(); + let file = |filename: Option| { + let file = filename + .as_ref() + .map(|s| Path::new(s)) + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + .unwrap_or("None"); + + format!("{file}") + }; + for meta in list { + let code = meta.language_identifier; + let lang = Language { + code: code.clone(), + path: i18n_directory.join(code.clone()), + }; + let stats = reference.compare_with(&lang); + for key in stats.up_to_date { + let code = &code; + let filename = &file(key.file); + let key = &key.key; + writeln!(f, "{code},{filename},{key},UpToDate,None").unwrap(); + } + for key in stats.not_found { + let code = &code; + let filename = &file(key.file); + let key = &key.key; + writeln!(f, "{code},{filename},{key},NotFound,None").unwrap(); + } + for key in stats.unused { + let code = &code; + let filename = &file(key.file); + let key = &key.key; + writeln!(f, "{code},{filename},{key},Unused,None").unwrap(); + } + } } }