mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
Implement i18n-tooling
* Working csv export * Working i18n-check
This commit is contained in:
parent
06d827af11
commit
847ee1c1b1
@ -1,3 +1,8 @@
|
||||
#!/bin/bash
|
||||
export VELOREN_ASSETS="$(pwd)/assets"
|
||||
time cargo test --package veloren-voxygen-i18n --lib test_all_localizations -- --nocapture --ignored
|
||||
VELOREN_ASSETS="$(pwd)/assets"
|
||||
export VELOREN_ASSETS
|
||||
|
||||
time cargo test --package veloren-voxygen-i18n \
|
||||
--lib test_all_localizations \
|
||||
--features="stat" \
|
||||
-- --nocapture --ignored
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -6908,9 +6908,11 @@ dependencies = [
|
||||
name = "veloren-voxygen-i18n"
|
||||
version = "0.13.0"
|
||||
dependencies = [
|
||||
"clap 3.1.10",
|
||||
"deunicode",
|
||||
"fluent",
|
||||
"fluent-bundle",
|
||||
"fluent-syntax",
|
||||
"hashbrown 0.12.0",
|
||||
"intl-memoizer",
|
||||
"ron 0.7.0",
|
||||
|
@ -3,16 +3,54 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
/// Read `walk_tree`
|
||||
#[derive(Debug)]
|
||||
/// Represent tree of directory, result of [generate_tree].
|
||||
///
|
||||
/// Note that paths are always relative to root it was generated from.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Walk {
|
||||
/// Represents file node, path is relative to directory root Walk was
|
||||
/// generated from.
|
||||
File(PathBuf),
|
||||
/// Represents directory subtree, path is relative to directory root Walk
|
||||
/// was generated from.
|
||||
Dir { path: PathBuf, content: Vec<Walk> },
|
||||
}
|
||||
|
||||
impl Walk {
|
||||
/// Utility function to build a tree of directory, recursively
|
||||
///
|
||||
/// At first iteration, use path to your directory as dir and root
|
||||
/// Path needs to be absolute.
|
||||
pub fn generate(root: &Path) -> io::Result<Walk> {
|
||||
let trees = walk_tree(root, root);
|
||||
Ok(Walk::Dir {
|
||||
path: Path::new("").to_owned(),
|
||||
content: trees?,
|
||||
})
|
||||
}
|
||||
|
||||
// TODO: implement iterator?
|
||||
pub fn for_each_file<F>(&self, root: &Path, f: &mut F)
|
||||
where
|
||||
F: FnMut(&Path),
|
||||
{
|
||||
match self {
|
||||
Self::File(filepath) => {
|
||||
let path = root.join(filepath);
|
||||
f(&path);
|
||||
},
|
||||
Self::Dir {
|
||||
path: _,
|
||||
content: files,
|
||||
} => {
|
||||
for path in files {
|
||||
path.for_each_file(root, f);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to [Walk::generate()], prefer using it instead.
|
||||
pub fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> {
|
||||
let mut buff = Vec::new();
|
||||
for entry in std::fs::read_dir(dir)? {
|
||||
@ -37,3 +75,15 @@ pub fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> {
|
||||
|
||||
Ok(buff)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn trie() {
|
||||
let root = crate::find_root().unwrap();
|
||||
let assets = Path::new(&root).join("assets/");
|
||||
Walk::generate(&assets).unwrap();
|
||||
}
|
||||
}
|
||||
|
@ -19,3 +19,14 @@ fluent-bundle = { git = "https://github.com/juliancoffee/fluent-rs.git", branch
|
||||
hashbrown = { version = "0.12", features = ["serde", "nightly"] }
|
||||
deunicode = "1.0"
|
||||
tracing = "0.1"
|
||||
# Bin
|
||||
clap = { version = "3.1.8", features = ["suggestions", "std"], default-features = false, optional = true }
|
||||
fluent-syntax = { git = "https://github.com/juliancoffee/fluent-rs.git", branch = "patched"}
|
||||
|
||||
[[bin]]
|
||||
name = "i18n-check"
|
||||
required-features = ["bin"]
|
||||
|
||||
[features]
|
||||
bin = ["clap"]
|
||||
stat = []
|
||||
|
148
voxygen/i18n/src/analysis.rs
Normal file
148
voxygen/i18n/src/analysis.rs
Normal file
@ -0,0 +1,148 @@
|
||||
use crate::{assets::Walk, error::ResourceErr};
|
||||
use fluent_syntax::{ast, parser};
|
||||
use std::{
|
||||
fs, io,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
/// Generate tree of i18n files, path should be absolute.
|
||||
/// We assume that all i18n directories should have the same tree structure,
|
||||
/// so that we can generate tree once and reuse for all languages.
|
||||
fn i18n_tree(reference: &Path) -> io::Result<Walk> { Walk::generate(reference) }
|
||||
|
||||
/// Grab keys from one file
|
||||
fn keys_from_file(filepath: &Path) -> Vec<MsgId> {
|
||||
use ast::Entry;
|
||||
|
||||
let file = format!("{}", filepath.display());
|
||||
|
||||
let content = match fs::read_to_string(filepath) {
|
||||
Ok(content) => content,
|
||||
Err(e) => {
|
||||
eprintln!("failed to read from {filepath:?}. err={e}");
|
||||
return Vec::new();
|
||||
},
|
||||
};
|
||||
|
||||
let ast = parser::parse(&*content).unwrap_or_else(|(_parsed, errs)| {
|
||||
panic!(
|
||||
"{}",
|
||||
ResourceErr::parsing_error(errs, file.clone(), &content)
|
||||
)
|
||||
});
|
||||
let mut keys = Vec::new();
|
||||
for entry in ast.body {
|
||||
match entry {
|
||||
Entry::Message(m) => {
|
||||
keys.push(MsgId {
|
||||
key: m.id.name.to_owned(),
|
||||
file: Some(file.clone()),
|
||||
});
|
||||
},
|
||||
Entry::Term(_)
|
||||
| Entry::Comment(_)
|
||||
| Entry::GroupComment(_)
|
||||
| Entry::ResourceComment(_)
|
||||
| Entry::Junk { .. } => {
|
||||
// these are not part of "public" API so do nothing
|
||||
// comments linked to message are part of Message entry
|
||||
// and we are not interested in global comments either, for now
|
||||
},
|
||||
}
|
||||
}
|
||||
keys
|
||||
}
|
||||
|
||||
/// Grab keys from one language sitting at `from`.
|
||||
///
|
||||
/// Tree of files assumed to have only .ftl files.
|
||||
fn keys(from: &Path, tree: &Walk) -> Vec<MsgId> {
|
||||
let mut keys = Vec::new();
|
||||
|
||||
tree.for_each_file(from, &mut |filepath| {
|
||||
if !filepath.ends_with("_manifest.ron") {
|
||||
keys.extend(keys_from_file(filepath));
|
||||
}
|
||||
});
|
||||
|
||||
keys
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// Add versioning
|
||||
// TODO:
|
||||
// Do something with attributes?
|
||||
//
|
||||
// For some messages it makes sense to require that all attributes
|
||||
// should match ones in reference language.
|
||||
// For some it doesn't as of now.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MsgId {
|
||||
pub key: String,
|
||||
pub file: Option<String>,
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// Add versioning
|
||||
#[derive(Debug)]
|
||||
pub struct Stats {
|
||||
pub up_to_date: Vec<MsgId>,
|
||||
pub not_found: Vec<MsgId>,
|
||||
pub unused: Vec<MsgId>,
|
||||
}
|
||||
|
||||
pub struct ReferenceLanguage {
|
||||
/// All keys.
|
||||
pub keys: Vec<MsgId>,
|
||||
/// Cached tree of files.
|
||||
tree: Walk,
|
||||
}
|
||||
|
||||
impl ReferenceLanguage {
|
||||
/// Generate reference language, path should be absolute.
|
||||
pub fn at(path: &Path) -> Self {
|
||||
let tree = i18n_tree(path)
|
||||
.unwrap_or_else(|e| panic!("{path:?}\nfailed to build file tree\n{e:?}"));
|
||||
let keys = keys(path, &tree);
|
||||
Self { keys, tree }
|
||||
}
|
||||
|
||||
/// Compare with other language
|
||||
pub fn compare_with(&self, lang: &Language) -> Stats {
|
||||
let keys = keys(&lang.path, &self.tree);
|
||||
|
||||
let mut stats = Stats {
|
||||
up_to_date: Vec::new(),
|
||||
not_found: Vec::new(),
|
||||
unused: Vec::new(),
|
||||
};
|
||||
|
||||
for ref_key in &self.keys {
|
||||
if let Some(key) = keys.iter().find(|MsgId { key, .. }| &ref_key.key == key) {
|
||||
stats.up_to_date.push(key.clone());
|
||||
} else {
|
||||
stats.not_found.push(MsgId {
|
||||
key: ref_key.key.clone(),
|
||||
file: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for key in &keys {
|
||||
if !self
|
||||
.keys
|
||||
.iter()
|
||||
.any(|MsgId { key: ref_key, .. }| ref_key == &key.key)
|
||||
{
|
||||
stats.unused.push(key.clone())
|
||||
}
|
||||
}
|
||||
|
||||
stats
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Language {
|
||||
pub code: String,
|
||||
pub path: PathBuf,
|
||||
}
|
39
voxygen/i18n/src/bin/i18n-check.rs
Normal file
39
voxygen/i18n/src/bin/i18n-check.rs
Normal file
@ -0,0 +1,39 @@
|
||||
use clap::{Arg, Command};
|
||||
use common_assets::find_root;
|
||||
use veloren_voxygen_i18n::{
|
||||
analysis::{Language, ReferenceLanguage},
|
||||
REFERENCE_LANG,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let args = Command::new("i18n-check")
|
||||
.about("Tool to check your Veloren localisation for correctness and missing keys")
|
||||
.arg(
|
||||
Arg::new("CODE")
|
||||
.required(true)
|
||||
.help("Run diagnostic for specific language code (de_DE, for example)"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let root = find_root().unwrap();
|
||||
let i18n_directory = root.join("assets/voxygen/i18n");
|
||||
let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG));
|
||||
|
||||
let code = args.value_of("CODE").expect("arg is required");
|
||||
let lang = Language {
|
||||
code: code.to_owned(),
|
||||
path: root.join(i18n_directory.join(code)),
|
||||
};
|
||||
let stats = reference.compare_with(&lang);
|
||||
println!("\t[Not found]: {}", stats.not_found.len());
|
||||
for key in stats.not_found {
|
||||
let key = &key.key;
|
||||
println!("{key}");
|
||||
}
|
||||
|
||||
println!("\n\t[Unused]: {}", stats.unused.len());
|
||||
for key in stats.unused {
|
||||
let key = &key.key;
|
||||
println!("{key}")
|
||||
}
|
||||
}
|
75
voxygen/i18n/src/error.rs
Normal file
75
voxygen/i18n/src/error.rs
Normal file
@ -0,0 +1,75 @@
|
||||
use fluent_syntax::parser::ParserError;
|
||||
use std::{error::Error, fmt, ops::Range};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Pos {
|
||||
line: usize,
|
||||
character: usize,
|
||||
}
|
||||
|
||||
impl fmt::Display for Pos {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{};{}", self.line, self.character)
|
||||
}
|
||||
}
|
||||
|
||||
fn unspan(src: &str, span: Range<usize>) -> Range<Pos> {
|
||||
let count = |idx| {
|
||||
let mut line = 1;
|
||||
let mut character = 1;
|
||||
for ch in src.bytes().take(idx) {
|
||||
// Count characters
|
||||
character += 1;
|
||||
|
||||
// Count newlines
|
||||
if ch == b'\n' {
|
||||
line += 1;
|
||||
// If found new line, reset character count
|
||||
character = 1;
|
||||
}
|
||||
}
|
||||
Pos { line, character }
|
||||
};
|
||||
let Range { start, end } = span;
|
||||
count(start)..count(end)
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// Ideally we wouldn't write this code, check this issue in fluent-rs.
|
||||
// https://github.com/projectfluent/fluent-rs/issues/176
|
||||
#[derive(Debug)]
|
||||
pub enum ResourceErr {
|
||||
ParsingError {
|
||||
#[allow(dead_code)] // false-positive
|
||||
file: String,
|
||||
#[allow(dead_code)] // false-positive
|
||||
err: String,
|
||||
},
|
||||
BundleError(String),
|
||||
}
|
||||
|
||||
impl ResourceErr {
|
||||
pub fn parsing_error(errs: Vec<ParserError>, file: String, src: &str) -> Self {
|
||||
let errs = errs
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let Range {
|
||||
start: from,
|
||||
end: to,
|
||||
} = unspan(src, e.pos);
|
||||
format!("{from}..{to}, kind {:?}", e.kind)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self::ParsingError {
|
||||
file,
|
||||
err: format!("{errs:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ResourceErr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self:#?}") }
|
||||
}
|
||||
|
||||
impl Error for ResourceErr {}
|
@ -1,5 +1,11 @@
|
||||
mod error;
|
||||
mod raw;
|
||||
|
||||
use error::ResourceErr;
|
||||
|
||||
#[cfg(any(feature = "bin", feature = "stat"))]
|
||||
pub mod analysis;
|
||||
|
||||
use fluent_bundle::{bundle::FluentBundle, FluentResource};
|
||||
use intl_memoizer::concurrent::IntlLangMemoizer;
|
||||
use unic_langid::LanguageIdentifier;
|
||||
@ -147,74 +153,10 @@ impl assets::Compound for Language {
|
||||
|
||||
match cache.load(id) {
|
||||
Ok(handle) => {
|
||||
use std::{error::Error, fmt, ops::Range};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Pos {
|
||||
#[allow(dead_code)] // false-positive
|
||||
line: usize,
|
||||
#[allow(dead_code)] // false-positive
|
||||
character: usize,
|
||||
}
|
||||
|
||||
fn unspan(src: &str, span: Range<usize>) -> Range<Pos> {
|
||||
let count = |idx| {
|
||||
let mut line = 1;
|
||||
let mut character = 1;
|
||||
for ch in src.bytes().take(idx) {
|
||||
// Count characters
|
||||
character += 1;
|
||||
|
||||
// Count newlines
|
||||
if ch == b'\n' {
|
||||
line += 1;
|
||||
// If found new line, reset character count
|
||||
character = 1;
|
||||
}
|
||||
}
|
||||
Pos { line, character }
|
||||
};
|
||||
let Range { start, end } = span;
|
||||
count(start)..count(end)
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// better error handling?
|
||||
#[derive(Debug)]
|
||||
enum ResourceErr {
|
||||
ParsingError {
|
||||
#[allow(dead_code)] // false-positive
|
||||
file: String,
|
||||
#[allow(dead_code)] // false-positive
|
||||
err: String,
|
||||
},
|
||||
BundleError(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ResourceErr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{self:?}")
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for ResourceErr {}
|
||||
|
||||
let source: &raw::Resource = &*handle.read();
|
||||
let resource =
|
||||
FluentResource::try_new(source.src.clone()).map_err(|(_ast, errs)| {
|
||||
let file = id.to_owned();
|
||||
let errs = errs
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let pos = unspan(&source.src, e.pos);
|
||||
format!("{pos:?}, kind {:?}", e.kind)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
ResourceErr::ParsingError {
|
||||
file,
|
||||
err: format!("{errs:?}"),
|
||||
}
|
||||
ResourceErr::parsing_error(errs, id.to_owned(), &source.src)
|
||||
})?;
|
||||
|
||||
bundle
|
||||
@ -505,14 +447,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
#[cfg(feature = "stat")]
|
||||
// Generate translation stats
|
||||
fn test_all_localizations() {
|
||||
// FIXME (i18n translation stats):
|
||||
use std::{fs, io::Write};
|
||||
use analysis::{Language, ReferenceLanguage};
|
||||
use assets::find_root;
|
||||
use std::{fs, io::Write, path::Path};
|
||||
|
||||
let output = assets::find_root()
|
||||
.unwrap()
|
||||
.join("translation_analysis.csv");
|
||||
let root = find_root().unwrap();
|
||||
let output = root.join("translation_analysis.csv");
|
||||
let mut f = fs::File::create(output).expect("couldn't write csv file");
|
||||
|
||||
writeln!(
|
||||
@ -520,5 +463,46 @@ mod tests {
|
||||
"country_code,file_name,translation_key,status,git_commit"
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let i18n_directory = root.join("assets/voxygen/i18n");
|
||||
let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG));
|
||||
|
||||
let list = list_localizations();
|
||||
let file = |filename: Option<String>| {
|
||||
let file = filename
|
||||
.as_ref()
|
||||
.map(|s| Path::new(s))
|
||||
.and_then(|p| p.file_name())
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("None");
|
||||
|
||||
format!("{file}")
|
||||
};
|
||||
for meta in list {
|
||||
let code = meta.language_identifier;
|
||||
let lang = Language {
|
||||
code: code.clone(),
|
||||
path: i18n_directory.join(code.clone()),
|
||||
};
|
||||
let stats = reference.compare_with(&lang);
|
||||
for key in stats.up_to_date {
|
||||
let code = &code;
|
||||
let filename = &file(key.file);
|
||||
let key = &key.key;
|
||||
writeln!(f, "{code},{filename},{key},UpToDate,None").unwrap();
|
||||
}
|
||||
for key in stats.not_found {
|
||||
let code = &code;
|
||||
let filename = &file(key.file);
|
||||
let key = &key.key;
|
||||
writeln!(f, "{code},{filename},{key},NotFound,None").unwrap();
|
||||
}
|
||||
for key in stats.unused {
|
||||
let code = &code;
|
||||
let filename = &file(key.file);
|
||||
let key = &key.key;
|
||||
writeln!(f, "{code},{filename},{key},Unused,None").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user