Implement i18n-tooling

* Working csv export
* Working i18n-check
This commit is contained in:
juliancoffee 2022-08-09 13:33:34 +03:00
parent 06d827af11
commit 847ee1c1b1
8 changed files with 391 additions and 77 deletions

View File

@ -1,3 +1,8 @@
#!/bin/bash #!/bin/bash
export VELOREN_ASSETS="$(pwd)/assets" VELOREN_ASSETS="$(pwd)/assets"
time cargo test --package veloren-voxygen-i18n --lib test_all_localizations -- --nocapture --ignored export VELOREN_ASSETS
time cargo test --package veloren-voxygen-i18n \
--lib test_all_localizations \
--features="stat" \
-- --nocapture --ignored

2
Cargo.lock generated
View File

@ -6908,9 +6908,11 @@ dependencies = [
name = "veloren-voxygen-i18n" name = "veloren-voxygen-i18n"
version = "0.13.0" version = "0.13.0"
dependencies = [ dependencies = [
"clap 3.1.10",
"deunicode", "deunicode",
"fluent", "fluent",
"fluent-bundle", "fluent-bundle",
"fluent-syntax",
"hashbrown 0.12.0", "hashbrown 0.12.0",
"intl-memoizer", "intl-memoizer",
"ron 0.7.0", "ron 0.7.0",

View File

@ -3,16 +3,54 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
/// Read `walk_tree` /// Represent tree of directory, result of [generate_tree].
#[derive(Debug)] ///
/// Note that paths are always relative to root it was generated from.
#[derive(Debug, Clone)]
pub enum Walk { pub enum Walk {
/// Represents file node, path is relative to directory root Walk was
/// generated from.
File(PathBuf), File(PathBuf),
/// Represents directory subtree, path is relative to directory root Walk
/// was generated from.
Dir { path: PathBuf, content: Vec<Walk> }, Dir { path: PathBuf, content: Vec<Walk> },
} }
impl Walk {
/// Utility function to build a tree of directory, recursively /// Utility function to build a tree of directory, recursively
/// ///
/// At first iteration, use path to your directory as dir and root /// Path needs to be absolute.
pub fn generate(root: &Path) -> io::Result<Walk> {
let trees = walk_tree(root, root);
Ok(Walk::Dir {
path: Path::new("").to_owned(),
content: trees?,
})
}
// TODO: implement iterator?
pub fn for_each_file<F>(&self, root: &Path, f: &mut F)
where
F: FnMut(&Path),
{
match self {
Self::File(filepath) => {
let path = root.join(filepath);
f(&path);
},
Self::Dir {
path: _,
content: files,
} => {
for path in files {
path.for_each_file(root, f);
}
},
}
}
}
/// Helper function to [Walk::generate()], prefer using it instead.
pub fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> { pub fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> {
let mut buff = Vec::new(); let mut buff = Vec::new();
for entry in std::fs::read_dir(dir)? { for entry in std::fs::read_dir(dir)? {
@ -37,3 +75,15 @@ pub fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> {
Ok(buff) Ok(buff)
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn trie() {
let root = crate::find_root().unwrap();
let assets = Path::new(&root).join("assets/");
Walk::generate(&assets).unwrap();
}
}

View File

@ -19,3 +19,14 @@ fluent-bundle = { git = "https://github.com/juliancoffee/fluent-rs.git", branch
hashbrown = { version = "0.12", features = ["serde", "nightly"] } hashbrown = { version = "0.12", features = ["serde", "nightly"] }
deunicode = "1.0" deunicode = "1.0"
tracing = "0.1" tracing = "0.1"
# Bin
clap = { version = "3.1.8", features = ["suggestions", "std"], default-features = false, optional = true }
fluent-syntax = { git = "https://github.com/juliancoffee/fluent-rs.git", branch = "patched"}
[[bin]]
name = "i18n-check"
required-features = ["bin"]
[features]
bin = ["clap"]
stat = []

View File

@ -0,0 +1,148 @@
use crate::{assets::Walk, error::ResourceErr};
use fluent_syntax::{ast, parser};
use std::{
fs, io,
path::{Path, PathBuf},
};
/// Generate tree of i18n files, path should be absolute.
/// We assume that all i18n directories should have the same tree structure,
/// so that we can generate tree once and reuse for all languages.
fn i18n_tree(reference: &Path) -> io::Result<Walk> { Walk::generate(reference) }
/// Grab keys from one file
fn keys_from_file(filepath: &Path) -> Vec<MsgId> {
use ast::Entry;
let file = format!("{}", filepath.display());
let content = match fs::read_to_string(filepath) {
Ok(content) => content,
Err(e) => {
eprintln!("failed to read from {filepath:?}. err={e}");
return Vec::new();
},
};
let ast = parser::parse(&*content).unwrap_or_else(|(_parsed, errs)| {
panic!(
"{}",
ResourceErr::parsing_error(errs, file.clone(), &content)
)
});
let mut keys = Vec::new();
for entry in ast.body {
match entry {
Entry::Message(m) => {
keys.push(MsgId {
key: m.id.name.to_owned(),
file: Some(file.clone()),
});
},
Entry::Term(_)
| Entry::Comment(_)
| Entry::GroupComment(_)
| Entry::ResourceComment(_)
| Entry::Junk { .. } => {
// these are not part of "public" API so do nothing
// comments linked to message are part of Message entry
// and we are not interested in global comments either, for now
},
}
}
keys
}
/// Grab keys from one language sitting at `from`.
///
/// Tree of files assumed to have only .ftl files.
fn keys(from: &Path, tree: &Walk) -> Vec<MsgId> {
let mut keys = Vec::new();
tree.for_each_file(from, &mut |filepath| {
if !filepath.ends_with("_manifest.ron") {
keys.extend(keys_from_file(filepath));
}
});
keys
}
// TODO:
// Add versioning
// TODO:
// Do something with attributes?
//
// For some messages it makes sense to require that all attributes
// should match ones in reference language.
// For some it doesn't as of now.
#[derive(Clone, Debug)]
pub struct MsgId {
pub key: String,
pub file: Option<String>,
}
// TODO:
// Add versioning
#[derive(Debug)]
pub struct Stats {
pub up_to_date: Vec<MsgId>,
pub not_found: Vec<MsgId>,
pub unused: Vec<MsgId>,
}
pub struct ReferenceLanguage {
/// All keys.
pub keys: Vec<MsgId>,
/// Cached tree of files.
tree: Walk,
}
impl ReferenceLanguage {
/// Generate reference language, path should be absolute.
pub fn at(path: &Path) -> Self {
let tree = i18n_tree(path)
.unwrap_or_else(|e| panic!("{path:?}\nfailed to build file tree\n{e:?}"));
let keys = keys(path, &tree);
Self { keys, tree }
}
/// Compare with other language
pub fn compare_with(&self, lang: &Language) -> Stats {
let keys = keys(&lang.path, &self.tree);
let mut stats = Stats {
up_to_date: Vec::new(),
not_found: Vec::new(),
unused: Vec::new(),
};
for ref_key in &self.keys {
if let Some(key) = keys.iter().find(|MsgId { key, .. }| &ref_key.key == key) {
stats.up_to_date.push(key.clone());
} else {
stats.not_found.push(MsgId {
key: ref_key.key.clone(),
file: None,
});
}
}
for key in &keys {
if !self
.keys
.iter()
.any(|MsgId { key: ref_key, .. }| ref_key == &key.key)
{
stats.unused.push(key.clone())
}
}
stats
}
}
pub struct Language {
pub code: String,
pub path: PathBuf,
}

View File

@ -0,0 +1,39 @@
use clap::{Arg, Command};
use common_assets::find_root;
use veloren_voxygen_i18n::{
analysis::{Language, ReferenceLanguage},
REFERENCE_LANG,
};
fn main() {
let args = Command::new("i18n-check")
.about("Tool to check your Veloren localisation for correctness and missing keys")
.arg(
Arg::new("CODE")
.required(true)
.help("Run diagnostic for specific language code (de_DE, for example)"),
)
.get_matches();
let root = find_root().unwrap();
let i18n_directory = root.join("assets/voxygen/i18n");
let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG));
let code = args.value_of("CODE").expect("arg is required");
let lang = Language {
code: code.to_owned(),
path: root.join(i18n_directory.join(code)),
};
let stats = reference.compare_with(&lang);
println!("\t[Not found]: {}", stats.not_found.len());
for key in stats.not_found {
let key = &key.key;
println!("{key}");
}
println!("\n\t[Unused]: {}", stats.unused.len());
for key in stats.unused {
let key = &key.key;
println!("{key}")
}
}

75
voxygen/i18n/src/error.rs Normal file
View File

@ -0,0 +1,75 @@
use fluent_syntax::parser::ParserError;
use std::{error::Error, fmt, ops::Range};
#[derive(Debug)]
struct Pos {
line: usize,
character: usize,
}
impl fmt::Display for Pos {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{};{}", self.line, self.character)
}
}
fn unspan(src: &str, span: Range<usize>) -> Range<Pos> {
let count = |idx| {
let mut line = 1;
let mut character = 1;
for ch in src.bytes().take(idx) {
// Count characters
character += 1;
// Count newlines
if ch == b'\n' {
line += 1;
// If found new line, reset character count
character = 1;
}
}
Pos { line, character }
};
let Range { start, end } = span;
count(start)..count(end)
}
// TODO:
// Ideally we wouldn't write this code, check this issue in fluent-rs.
// https://github.com/projectfluent/fluent-rs/issues/176
#[derive(Debug)]
pub enum ResourceErr {
ParsingError {
#[allow(dead_code)] // false-positive
file: String,
#[allow(dead_code)] // false-positive
err: String,
},
BundleError(String),
}
impl ResourceErr {
pub fn parsing_error(errs: Vec<ParserError>, file: String, src: &str) -> Self {
let errs = errs
.into_iter()
.map(|e| {
let Range {
start: from,
end: to,
} = unspan(src, e.pos);
format!("{from}..{to}, kind {:?}", e.kind)
})
.collect::<Vec<_>>();
Self::ParsingError {
file,
err: format!("{errs:?}"),
}
}
}
impl fmt::Display for ResourceErr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self:#?}") }
}
impl Error for ResourceErr {}

View File

@ -1,5 +1,11 @@
mod error;
mod raw; mod raw;
use error::ResourceErr;
#[cfg(any(feature = "bin", feature = "stat"))]
pub mod analysis;
use fluent_bundle::{bundle::FluentBundle, FluentResource}; use fluent_bundle::{bundle::FluentBundle, FluentResource};
use intl_memoizer::concurrent::IntlLangMemoizer; use intl_memoizer::concurrent::IntlLangMemoizer;
use unic_langid::LanguageIdentifier; use unic_langid::LanguageIdentifier;
@ -147,74 +153,10 @@ impl assets::Compound for Language {
match cache.load(id) { match cache.load(id) {
Ok(handle) => { Ok(handle) => {
use std::{error::Error, fmt, ops::Range};
#[derive(Debug)]
struct Pos {
#[allow(dead_code)] // false-positive
line: usize,
#[allow(dead_code)] // false-positive
character: usize,
}
fn unspan(src: &str, span: Range<usize>) -> Range<Pos> {
let count = |idx| {
let mut line = 1;
let mut character = 1;
for ch in src.bytes().take(idx) {
// Count characters
character += 1;
// Count newlines
if ch == b'\n' {
line += 1;
// If found new line, reset character count
character = 1;
}
}
Pos { line, character }
};
let Range { start, end } = span;
count(start)..count(end)
}
// TODO:
// better error handling?
#[derive(Debug)]
enum ResourceErr {
ParsingError {
#[allow(dead_code)] // false-positive
file: String,
#[allow(dead_code)] // false-positive
err: String,
},
BundleError(String),
}
impl fmt::Display for ResourceErr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl Error for ResourceErr {}
let source: &raw::Resource = &*handle.read(); let source: &raw::Resource = &*handle.read();
let resource = let resource =
FluentResource::try_new(source.src.clone()).map_err(|(_ast, errs)| { FluentResource::try_new(source.src.clone()).map_err(|(_ast, errs)| {
let file = id.to_owned(); ResourceErr::parsing_error(errs, id.to_owned(), &source.src)
let errs = errs
.into_iter()
.map(|e| {
let pos = unspan(&source.src, e.pos);
format!("{pos:?}, kind {:?}", e.kind)
})
.collect::<Vec<_>>();
ResourceErr::ParsingError {
file,
err: format!("{errs:?}"),
}
})?; })?;
bundle bundle
@ -505,14 +447,15 @@ mod tests {
#[test] #[test]
#[ignore] #[ignore]
#[cfg(feature = "stat")]
// Generate translation stats // Generate translation stats
fn test_all_localizations() { fn test_all_localizations() {
// FIXME (i18n translation stats): use analysis::{Language, ReferenceLanguage};
use std::{fs, io::Write}; use assets::find_root;
use std::{fs, io::Write, path::Path};
let output = assets::find_root() let root = find_root().unwrap();
.unwrap() let output = root.join("translation_analysis.csv");
.join("translation_analysis.csv");
let mut f = fs::File::create(output).expect("couldn't write csv file"); let mut f = fs::File::create(output).expect("couldn't write csv file");
writeln!( writeln!(
@ -520,5 +463,46 @@ mod tests {
"country_code,file_name,translation_key,status,git_commit" "country_code,file_name,translation_key,status,git_commit"
) )
.unwrap(); .unwrap();
let i18n_directory = root.join("assets/voxygen/i18n");
let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG));
let list = list_localizations();
let file = |filename: Option<String>| {
let file = filename
.as_ref()
.map(|s| Path::new(s))
.and_then(|p| p.file_name())
.and_then(|s| s.to_str())
.unwrap_or("None");
format!("{file}")
};
for meta in list {
let code = meta.language_identifier;
let lang = Language {
code: code.clone(),
path: i18n_directory.join(code.clone()),
};
let stats = reference.compare_with(&lang);
for key in stats.up_to_date {
let code = &code;
let filename = &file(key.file);
let key = &key.key;
writeln!(f, "{code},{filename},{key},UpToDate,None").unwrap();
}
for key in stats.not_found {
let code = &code;
let filename = &file(key.file);
let key = &key.key;
writeln!(f, "{code},{filename},{key},NotFound,None").unwrap();
}
for key in stats.unused {
let code = &code;
let filename = &file(key.file);
let key = &key.key;
writeln!(f, "{code},{filename},{key},Unused,None").unwrap();
}
}
} }
} }