New i18n implementation based on Fluent

This commit is contained in:
juliancoffee 2022-07-27 21:32:17 +03:00
parent 48eeabcc93
commit 8ba50e62d5
15 changed files with 736 additions and 1182 deletions

154
Cargo.lock generated
View File

@ -1902,6 +1902,47 @@ dependencies = [
"num-traits",
]
[[package]]
name = "fluent"
version = "0.16.0"
source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473"
dependencies = [
"fluent-bundle",
"unic-langid",
]
[[package]]
name = "fluent-bundle"
version = "0.15.2"
source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473"
dependencies = [
"fluent-langneg",
"fluent-syntax",
"intl-memoizer",
"intl_pluralrules",
"rustc-hash",
"self_cell",
"smallvec",
"unic-langid",
]
[[package]]
name = "fluent-langneg"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c4ad0989667548f06ccd0e306ed56b61bd4d35458d54df5ec7587c0e8ed5e94"
dependencies = [
"unic-langid",
]
[[package]]
name = "fluent-syntax"
version = "0.11.0"
source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473"
dependencies = [
"thiserror",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -2315,19 +2356,6 @@ version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"
[[package]]
name = "git2"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3826a6e0e2215d7a41c2bfc7c9244123969273f3476b939a226aac0ab56e9e3c"
dependencies = [
"bitflags",
"libc",
"libgit2-sys",
"log",
"url",
]
[[package]]
name = "glam"
version = "0.10.2"
@ -2812,6 +2840,25 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "intl-memoizer"
version = "0.5.1"
source = "git+https://github.com/juliancoffee/fluent-rs.git#efd8159736c0c5d5f00a1c1f91fe35492e9ab473"
dependencies = [
"type-map",
"unic-langid",
]
[[package]]
name = "intl_pluralrules"
version = "7.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b18f988384267d7066cc2be425e6faf352900652c046b6971d2e228d3b1c5ecf"
dependencies = [
"tinystr",
"unic-langid",
]
[[package]]
name = "io-kit-sys"
version = "0.1.0"
@ -3006,18 +3053,6 @@ version = "0.2.121"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
[[package]]
name = "libgit2-sys"
version = "0.13.2+1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a42de9a51a5c12e00fc0e4ca6bc2ea43582fc6418488e8f615e905d886f258b"
dependencies = [
"cc",
"libc",
"libz-sys",
"pkg-config",
]
[[package]]
name = "libloading"
version = "0.6.7"
@ -3080,18 +3115,6 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "libz-sys"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f35facd4a5673cb5a48822be2be1d4236c1c99cb4113cab7061ac720d5bf859"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "linked-hash-map"
version = "0.5.4"
@ -5262,6 +5285,12 @@ dependencies = [
"libc",
]
[[package]]
name = "self_cell"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ef965a420fe14fdac7dd018862966a4c14094f900e1650bbc71ddd7d580c8af"
[[package]]
name = "semver"
version = "0.9.0"
@ -5292,6 +5321,15 @@ dependencies = [
"serde_derive",
]
[[package]]
name = "serde-tuple-vec-map"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a04d0ebe0de77d7d445bb729a895dcb0a288854b267ca85f030ce51cdc578c82"
dependencies = [
"serde",
]
[[package]]
name = "serde_bytes"
version = "0.11.5"
@ -5977,6 +6015,12 @@ dependencies = [
"crunchy",
]
[[package]]
name = "tinystr"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1"
[[package]]
name = "tinytemplate"
version = "1.2.1"
@ -6255,6 +6299,15 @@ dependencies = [
"nom 5.1.2",
]
[[package]]
name = "type-map"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "deb68604048ff8fa93347f02441e4487594adc20bb8a084f9e564d2b827a0a9f"
dependencies = [
"rustc-hash",
]
[[package]]
name = "typenum"
version = "1.15.0"
@ -6270,6 +6323,24 @@ dependencies = [
"version_check 0.9.4",
]
[[package]]
name = "unic-langid"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73328fcd730a030bdb19ddf23e192187a6b01cd98be6d3140622a89129459ce5"
dependencies = [
"unic-langid-impl",
]
[[package]]
name = "unic-langid-impl"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a4a8eeaf0494862c1404c95ec2f4c33a2acff5076f64314b465e3ddae1b934d"
dependencies = [
"tinystr",
]
[[package]]
name = "unicode-bidi"
version = "0.3.7"
@ -6856,15 +6927,18 @@ dependencies = [
[[package]]
name = "veloren-voxygen-i18n"
version = "0.10.0"
version = "0.13.0"
dependencies = [
"clap 3.1.10",
"deunicode",
"git2",
"fluent",
"fluent-bundle",
"hashbrown 0.12.0",
"intl-memoizer",
"ron 0.7.0",
"serde",
"serde-tuple-vec-map",
"tracing",
"unic-langid",
"veloren-common-assets",
]

View File

@ -16,6 +16,8 @@ pub use assets_manager::{
};
mod fs;
mod walk;
pub use walk::*;
lazy_static! {
/// The HashMap where all loaded assets are stored in.

39
common/assets/src/walk.rs Normal file
View File

@ -0,0 +1,39 @@
use std::{
io,
path::{Path, PathBuf},
};
/// Read `walk_tree`
#[derive(Debug)]
pub enum Walk {
File(PathBuf),
Dir { path: PathBuf, content: Vec<Walk> },
}
/// Utility function to build a tree of directory, recursively
///
/// At first iteration, use path to your directory as dir and root
pub fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> {
let mut buff = Vec::new();
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
buff.push(Walk::Dir {
path: path
.strip_prefix(root)
.expect("strip can't fail, this path is created from root")
.to_owned(),
content: walk_tree(&path, root)?,
});
} else {
let filename = path
.strip_prefix(root)
.expect("strip can't fail, this file is created from root")
.to_owned();
buff.push(Walk::File(filename));
}
}
Ok(buff)
}

View File

@ -1,9 +1,6 @@
use common_assets::{walk_tree, Walk};
use serde::{de::DeserializeOwned, Serialize};
use std::{
fs, io,
io::Write,
path::{Path, PathBuf},
};
use std::{fs, io, io::Write, path::Path};
// If you want to migrate assets.
// 1) Copy-paste old asset type to own module
@ -19,37 +16,6 @@ mod new {
pub type Config = ();
}
#[derive(Debug)]
enum Walk {
File(PathBuf),
Dir { path: PathBuf, content: Vec<Walk> },
}
fn walk_tree(dir: &Path, root: &Path) -> io::Result<Vec<Walk>> {
let mut buff = Vec::new();
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
buff.push(Walk::Dir {
path: path
.strip_prefix(root)
.expect("strip can't fail, this path is created from root")
.to_owned(),
content: walk_tree(&path, root)?,
});
} else {
let filename = path
.strip_prefix(root)
.expect("strip can't fail, this file is created from root")
.to_owned();
buff.push(Walk::File(filename));
}
}
Ok(buff)
}
fn walk_with_migrate<OldV, NewV>(tree: Walk, from: &Path, to: &Path) -> io::Result<()>
where
NewV: From<OldV>,

View File

@ -1,30 +1,32 @@
[package]
authors = ["juliancoffee <lightdarkdaughter@gmail.com>", "Rémy Phelipot"]
authors = ["juliancoffee <lightdarkdaughter@gmail.com>"]
edition = "2021"
name = "veloren-voxygen-i18n"
description = "Crate for internalization and diagnostic of existing localizations."
version = "0.10.0"
[[bin]]
name = "i18n-check"
required-features = ["bin"]
version = "0.13.0"
[dependencies]
# Assets
hashbrown = { version = "0.12", features = ["serde", "nightly"] }
common-assets = {package = "veloren-common-assets", path = "../../common/assets"}
deunicode = "1.0"
serde = { version = "1.0", features = ["derive"] }
tracing = "0.1"
# Diagnostic
ron = "0.7"
git2 = { version = "0.14", default-features = false, optional = true }
serde = { version = "1.0", features = ["derive"] }
# Localization
unic-langid = { version = "0.9"}
intl-memoizer = { git = "https://github.com/juliancoffee/fluent-rs.git"}
fluent = { git = "https://github.com/juliancoffee/fluent-rs.git"}
fluent-bundle = { git = "https://github.com/juliancoffee/fluent-rs.git"}
# Utility
hashbrown = { version = "0.12", features = ["serde", "nightly"] }
deunicode = "1.0"
tracing = "0.1"
# Bin
serde-tuple-vec-map = "1.0"
# Binary
clap = { version = "3.1.8", features = ["suggestions", "std"], default-features = false, optional = true }
[dev-dependencies]
git2 = { version = "0.14", default-features = false }
# FIXME: remove before merge
[[bin]]
name = "i18n-migrate"
required-features = ["i18n-migrate"]
path = "src/bin/migrate.rs"
[features]
bin = ["git2", "clap"]
i18n-migrate = []

View File

@ -1,6 +0,0 @@
# Usage
`$ cargo run --features=bin -- --help` <br/>
(Or if somewhere else in the repo) <br/>
`$ cargo run -p veloren-i18n --features=bin -- --help` <br/>
For example, diagnostic for specific language <br/>
`$ cargo run -p veloren-i18n --features=bin -- <lang_code>` <br/>

View File

@ -1,247 +0,0 @@
use crate::{
gitfragments::{
read_file_from_path, transform_fragment, LocalizationEntryState, LocalizationState,
},
path::{BasePath, LangPath},
raw::{self, RawFragment, RawLanguage},
stats::{
print_csv_stats, print_overall_stats, print_translation_stats, LocalizationAnalysis,
LocalizationStats,
},
REFERENCE_LANG,
};
use hashbrown::{hash_map::Entry, HashMap};
use ron::de::from_bytes;
/// Fill the entry State base information (except `state`) for a complete
/// language
fn gather_entry_state<'a>(
repo: &'a git2::Repository,
head_ref: &git2::Reference,
path: &LangPath,
) -> RawLanguage<LocalizationEntryState> {
println!("-> {:?}", path.language_identifier());
// load standard manifest
let manifest = raw::load_manifest(path).expect("failed to load language manifest");
// transform language into LocalizationEntryState
let mut fragments = HashMap::new();
// For each file in directory
let files = path
.fragments()
.expect("failed to get all files in language");
for sub_path in files {
let fullpath = path.sub_path(&sub_path);
let gitpath = fullpath.strip_prefix(path.base().root_path()).unwrap();
println!(" -> {:?}", &sub_path);
let i18n_blob = read_file_from_path(repo, head_ref, gitpath);
let fragment: RawFragment<String> = from_bytes(i18n_blob.content())
.unwrap_or_else(|e| panic!("Could not parse {:?} RON file, error: {}", sub_path, e));
let frag = transform_fragment(repo, (gitpath, fragment), &i18n_blob);
fragments.insert(sub_path, frag);
}
RawLanguage::<LocalizationEntryState> {
manifest,
fragments,
}
}
/// fills in the `state`
fn compare_lang_with_reference(
current_i18n: &mut RawLanguage<LocalizationEntryState>,
i18n_references: &RawLanguage<LocalizationEntryState>,
repo: &git2::Repository,
) {
// git graph descendant of is slow, so we cache it
let mut graph_descendant_of_cache = HashMap::new();
let mut cached_graph_descendant_of = |commit, ancestor| -> bool {
let key = (commit, ancestor);
match graph_descendant_of_cache.entry(key) {
Entry::Occupied(entry) => {
return *entry.get();
},
Entry::Vacant(entry) => {
let value = repo.graph_descendant_of(commit, ancestor).unwrap_or(false);
*entry.insert(value)
},
}
};
const MISSING: LocalizationEntryState = LocalizationEntryState {
key_line: None,
chuck_line_range: None,
commit_id: None,
state: Some(LocalizationState::NotFound),
};
// match files
for (ref_path, ref_fragment) in i18n_references.fragments.iter() {
let cur_fragment = match current_i18n.fragments.get_mut(ref_path) {
Some(c) => c,
None => {
eprintln!(
"language {} is missing file: {:?}",
current_i18n.manifest.metadata.language_identifier, ref_path
);
// add all keys as missing
let mut string_map = HashMap::new();
for (ref_key, _) in ref_fragment.string_map.iter() {
string_map.insert(ref_key.to_owned(), MISSING.clone());
}
current_i18n
.fragments
.insert(ref_path.to_owned(), RawFragment {
string_map,
vector_map: HashMap::new(),
});
continue;
},
};
for (ref_key, ref_state) in ref_fragment.string_map.iter() {
match cur_fragment.string_map.get_mut(ref_key) {
Some(state) => {
let commit_id = match state.commit_id {
Some(c) => c,
None => {
eprintln!(
"Commit ID of key {} in i18n file {} is missing! Skipping key.",
ref_key,
ref_path.to_string_lossy()
);
continue;
},
};
let ref_commit_id = match ref_state.commit_id {
Some(c) => c,
None => {
eprintln!(
"Commit ID of key {} in reference i18n file is missing! Skipping \
key.",
ref_key
);
continue;
},
};
if commit_id != ref_commit_id
&& !cached_graph_descendant_of(commit_id, ref_commit_id)
{
state.state = Some(LocalizationState::Outdated);
} else {
state.state = Some(LocalizationState::UpToDate);
}
},
None => {
cur_fragment
.string_map
.insert(ref_key.to_owned(), MISSING.clone());
},
}
}
for (_, state) in cur_fragment
.string_map
.iter_mut()
.filter(|&(k, _)| ref_fragment.string_map.get(k).is_none())
{
state.state = Some(LocalizationState::Unused);
}
}
}
fn gather_results(
current_i18n: &RawLanguage<LocalizationEntryState>,
) -> (LocalizationAnalysis, LocalizationStats) {
let mut state_map =
LocalizationAnalysis::new(&current_i18n.manifest.metadata.language_identifier);
let mut stats = LocalizationStats::default();
for (file, fragments) in &current_i18n.fragments {
for (key, entry) in &fragments.string_map {
match entry.state {
Some(LocalizationState::Outdated) => stats.outdated_entries += 1,
Some(LocalizationState::NotFound) => stats.notfound_entries += 1,
None => stats.errors += 1,
Some(LocalizationState::Unused) => stats.unused_entries += 1,
Some(LocalizationState::UpToDate) => stats.uptodate_entries += 1,
};
let state_keys = state_map.data.get_mut(&entry.state).expect("prefiled");
state_keys.push((file.clone(), key.to_owned(), entry.commit_id));
}
}
for (_, entries) in state_map.data.iter_mut() {
entries.sort();
}
(state_map, stats)
}
/// Test one language
/// - `code`: name of the directory in assets (de_DE for example)
/// - `path`: path to repo
/// - `be_verbose`: print extra info
/// - `csv_enabled`: generate csv files in target folder
pub fn test_specific_localizations(
path: &BasePath,
language_identifiers: &[&str],
be_verbose: bool,
csv_enabled: bool,
) {
//complete analysis
let mut analysis = HashMap::new();
// Initialize Git objects
let repo = git2::Repository::discover(path.root_path())
.unwrap_or_else(|_| panic!("Failed to open the Git repository {:?}", path.root_path()));
let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Read Reference Language
let ref_language = gather_entry_state(&repo, &head_ref, &path.i18n_path(REFERENCE_LANG));
for &language_identifier in language_identifiers {
let mut cur_language =
gather_entry_state(&repo, &head_ref, &path.i18n_path(language_identifier));
compare_lang_with_reference(&mut cur_language, &ref_language, &repo);
let (state_map, stats) = gather_results(&cur_language);
analysis.insert(language_identifier.to_owned(), (state_map, stats));
}
let output = path.root_path().join("translation_analysis.csv");
let mut f = std::fs::File::create(output).expect("couldn't write csv file");
use std::io::Write;
writeln!(
f,
"country_code,file_name,translation_key,status,git_commit"
)
.unwrap();
//printing
for (language_identifier, (state_map, stats)) in &analysis {
if csv_enabled {
print_csv_stats(state_map, &mut f);
} else {
print_translation_stats(
language_identifier,
&ref_language,
stats,
state_map,
be_verbose,
);
}
}
if analysis.len() > 1 {
print_overall_stats(analysis);
}
}
/// Test all localizations
pub fn test_all_localizations(path: &BasePath, be_verbose: bool, csv_enabled: bool) {
// Compare to other reference files
let languages = path.i18n_directories();
let language_identifiers = languages
.iter()
.map(|s| s.language_identifier())
.collect::<Vec<_>>();
test_specific_localizations(path, &language_identifiers, be_verbose, csv_enabled);
}

View File

@ -1,48 +0,0 @@
use clap::{Arg, Command};
use veloren_voxygen_i18n::{analysis, verification, BasePath};
fn main() {
let matches = Command::new("i18n-check")
.version("0.1.0")
.author("juliancoffee <lightdarkdaughter@gmail.com>")
.about("Test veloren localizations")
.arg(
Arg::new("CODE")
.required(false)
.help("Run diagnostic for specific language code (de_DE as example)"),
)
.arg(
Arg::new("verify")
.long("verify")
.help("verify all localizations"),
)
.arg(Arg::new("test").long("test").help("test all localizations"))
.arg(
Arg::new("verbose")
.short('v')
.long("verbose")
.help("print additional information"),
)
.arg(
Arg::new("csv")
.long("csv")
.help("generate csv files per language in target folder"),
)
.get_matches();
// Generate paths
let root_path = common_assets::find_root().expect("Failed to find root of repository");
let path = BasePath::new(&root_path);
let be_verbose = matches.is_present("verbose");
let csv_enabled = matches.is_present("csv");
if let Some(code) = matches.value_of("CODE") {
analysis::test_specific_localizations(&path, &[code], be_verbose, csv_enabled);
}
if matches.is_present("test") {
analysis::test_all_localizations(&path, be_verbose, csv_enabled);
}
if matches.is_present("verify") {
verification::verify_all_localizations(&path);
}
}

View File

@ -0,0 +1,232 @@
use std::{ffi::OsStr, fs, io::Write, path::Path};
use serde::Deserialize;
use common_assets::{walk_tree, Walk};
/// Structure representing file for old .ron format
#[derive(Deserialize)]
struct RawFragment {
#[serde(with = "tuple_vec_map")]
string_map: Vec<(String, String)>,
#[serde(with = "tuple_vec_map")]
vector_map: Vec<(String, Vec<String>)>,
}
impl RawFragment {
fn read(path: &Path) -> Self {
let source = fs::File::open(path).unwrap();
ron::de::from_reader(source).unwrap()
}
}
/// Message value, may contain interpolated variables
struct Pattern {
view: String,
}
impl Pattern {
fn expand(self) -> String {
let mut buff = String::new();
if self.view.contains('\n') {
let mut first = true;
for line in self.view.lines() {
if line.is_empty() && first {
// fluent ignores space characters at the beginning
// so we need to encode \n explicitly
buff.push_str(r#"{"\u000A"}"#);
} else {
buff.push_str("\n ");
}
if first {
first = false;
}
buff.push_str(line);
}
} else {
buff.push_str(" ");
buff.push_str(&self.view);
}
buff
}
}
/// Fluent entry
struct Message {
value: Option<Pattern>,
attributes: Vec<(String, Pattern)>,
}
impl Message {
fn stringify(self) -> String {
let mut buff = String::new();
// append equal sign
buff.push_str(" =");
// display value if any
if let Some(value) = self.value {
buff.push_str(&value.expand());
}
// add attributes
for (attr_name, attr) in self.attributes {
// new line and append tab
buff.push_str("\n ");
// print attrname
buff.push('.');
buff.push_str(&attr_name);
// equal sign
buff.push_str(" =");
// display attr
buff.push_str(&attr.expand());
}
buff
}
}
/// Structure representing file for new .ftl format
struct Source {
entries: Vec<(String, Message)>,
}
impl Source {
fn write(self, path: &Path) {
let mut source = fs::File::create(path).unwrap();
let mut first = true;
for (key, msg) in self.entries {
if !first {
source.write_all(b"\n").unwrap();
} else {
first = false;
}
source.write_all(key.as_bytes()).unwrap();
source.write_all(msg.stringify().as_bytes()).unwrap();
}
}
}
// Convert old i18n string to new fluent format
fn to_pattern(old: String) -> Pattern {
let mut buff = String::new();
let mut in_capture = false;
let mut need_sign = false;
for ch in old.chars() {
if ch == '{' {
if !in_capture {
in_capture = true;
} else {
panic!("double {{");
}
need_sign = true;
buff.push(ch);
buff.push(' ');
} else if ch == '}' {
if in_capture {
in_capture = false;
} else {
panic!("}} without opening {{");
}
buff.push(' ');
buff.push(ch);
} else {
if need_sign {
buff.push('$');
need_sign = false;
}
if ch == '.' && in_capture {
buff.push('-')
} else {
buff.push(ch)
}
}
}
Pattern { view: buff }
}
fn to_attributes(old: Vec<String>) -> Message {
let mut attributes = Vec::new();
for (i, string) in old.iter().enumerate() {
let attr_name = format!("a{i}");
let attr = to_pattern(string.to_owned());
attributes.push((attr_name, attr))
}
Message {
value: None,
attributes,
}
}
fn convert(old: RawFragment) -> Source {
let mut entries = Vec::new();
let mut cache = Vec::new();
for (key, string) in old.string_map.into_iter() {
if cache.contains(&key) {
continue;
} else {
cache.push(key.clone());
}
// common.weapon.tool -> common-weapon-tool
let key = key.replace('.', "-").to_owned();
let msg = Message {
value: Some(to_pattern(string.to_owned())),
attributes: Vec::new(),
};
entries.push((key, msg))
}
for (key, variation) in old.vector_map.into_iter() {
if cache.contains(&key) {
continue;
} else {
cache.push(key.clone());
}
// common.weapon.tool -> common-weapon-tool
let key = key.replace('.', "-").to_owned();
let msg = to_attributes(variation);
entries.push((key, msg))
}
Source { entries }
}
fn migrate(tree: Walk, from: &Path, to: &Path) {
match tree {
Walk::Dir { path, content } => {
println!("{:?}", path);
let target_dir = to.join(path);
fs::create_dir(target_dir).unwrap();
for entry in content {
migrate(entry, from, to);
}
},
Walk::File(path) => {
if path.file_name() == Some(OsStr::new("_manifest.ron"))
|| path.file_name() == Some(OsStr::new("README.md"))
{
fs::copy(from.join(&path), to.join(path)).unwrap();
} else {
let old = RawFragment::read(&from.join(&path));
let new = convert(old);
new.write(&to.join(path).with_extension("ftl"));
}
},
}
}
fn main() {
// it assumes that you have old i18n files in i18n-ron directory
let old_path = Path::new("assets/voxygen/i18n-ron");
let new_path = Path::new("assets/voxygen/i18n");
let tree = walk_tree(&old_path, &old_path).unwrap();
let tree = Walk::Dir {
path: Path::new("").to_owned(),
content: tree,
};
migrate(tree, &old_path, &new_path);
}

View File

@ -1,157 +0,0 @@
//! fragment attached with git versioning information
use crate::raw::RawFragment;
use hashbrown::HashMap;
use std::path::Path;
#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
pub(crate) enum LocalizationState {
UpToDate,
NotFound,
Outdated,
Unused,
}
pub(crate) const ALL_LOCALIZATION_STATES: [Option<LocalizationState>; 5] = [
Some(LocalizationState::UpToDate),
Some(LocalizationState::NotFound),
Some(LocalizationState::Outdated),
Some(LocalizationState::Unused),
None,
];
#[derive(Clone, Debug)]
pub(crate) struct LocalizationEntryState {
pub(crate) key_line: Option<usize>,
pub(crate) chuck_line_range: Option<(usize, usize)>,
pub(crate) commit_id: Option<git2::Oid>,
pub(crate) state: Option<LocalizationState>,
}
impl LocalizationState {
pub(crate) fn print(this: &Option<Self>) -> String {
match this {
Some(LocalizationState::UpToDate) => "UpToDate",
Some(LocalizationState::NotFound) => "NotFound",
Some(LocalizationState::Outdated) => "Outdated",
Some(LocalizationState::Unused) => "Unused",
None => "Unknown",
}
.to_owned()
}
}
impl LocalizationEntryState {
fn new(key_line: Option<usize>) -> LocalizationEntryState {
LocalizationEntryState {
key_line,
chuck_line_range: None,
commit_id: None,
state: None,
}
}
}
/// Returns the Git blob associated with the given reference and path
pub(crate) fn read_file_from_path<'a>(
repo: &'a git2::Repository,
reference: &git2::Reference,
path: &Path,
) -> git2::Blob<'a> {
let tree = reference
.peel_to_tree()
.expect("Impossible to peel HEAD to a tree object");
tree.get_path(path)
.unwrap_or_else(|_| {
panic!(
"Impossible to find the file {:?} in reference {:?}",
path,
reference.name()
)
})
.to_object(repo)
.unwrap()
.peel_to_blob()
.expect("Impossible to fetch the Git object")
}
/// Extend a Fragment with historical git data
/// The actual translation gets dropped
/// TODO: transform vector_map too
pub(crate) fn transform_fragment<'a>(
repo: &'a git2::Repository,
fragment: (&Path, RawFragment<String>),
file_blob: &git2::Blob,
) -> RawFragment<LocalizationEntryState> {
let (path, fragment) = fragment;
// Find key start lines by searching all lines which have `:` in them (as they
// are probably keys) and getting the first part of such line trimming
// whitespace and quotes. Quite buggy heuristic
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
// we only need the key part of the file to process
let file_content_keys = file_content.lines().enumerate().filter_map(|(no, line)| {
line.split_once(':').map(|(key, _)| {
let mut key = key.trim().chars();
key.next();
key.next_back();
(no, key.as_str())
})
});
//speed up the search by sorting all keys!
let mut file_content_keys_sorted = file_content_keys.into_iter().collect::<Vec<_>>();
file_content_keys_sorted.sort_by_key(|(_, key)| *key);
let mut result = RawFragment::<LocalizationEntryState> {
string_map: HashMap::new(),
vector_map: HashMap::new(),
};
for (original_key, _) in fragment.string_map {
let line_nb = file_content_keys_sorted
.binary_search_by_key(&original_key.as_str(), |(_, key)| *key)
.map_or_else(
|_| {
eprintln!(
"Key {} does not have a git line in it's state!",
original_key
);
None
},
|id| Some(file_content_keys_sorted[id].0),
);
result
.string_map
.insert(original_key, LocalizationEntryState::new(line_nb));
}
// Find commit for each keys, THIS PART IS SLOW (2s/4s)
for e in repo
.blame_file(path, None)
.expect("Impossible to generate the Git blame")
.iter()
{
for (_, state) in result.string_map.iter_mut() {
if let Some(line) = state.key_line {
let range = (
e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(),
);
if line + 1 >= range.0 && line + 1 < range.1 {
state.chuck_line_range = Some(range);
state.commit_id = state.commit_id.map_or_else(
|| Some(e.final_commit_id()),
|existing_commit| match repo
.graph_descendant_of(e.final_commit_id(), existing_commit)
{
Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err),
},
);
}
}
}
}
result
}

View File

@ -1,22 +1,21 @@
#[cfg(any(feature = "bin", test))]
pub mod analysis;
#[cfg(any(feature = "bin", test))]
mod gitfragments;
mod path;
mod raw;
#[cfg(any(feature = "bin", test))] pub mod stats;
pub mod verification;
//reexport
pub use path::BasePath;
use fluent_bundle::{bundle::FluentBundle, FluentResource};
use intl_memoizer::concurrent::IntlLangMemoizer;
use unic_langid::LanguageIdentifier;
use crate::path::{LANG_EXTENSION, LANG_MANIFEST_FILE};
use common_assets::{self, source::DirEntry, AssetExt, AssetGuard, AssetHandle, ReloadWatcher};
use hashbrown::{HashMap, HashSet};
use raw::{RawFragment, RawLanguage, RawManifest};
use hashbrown::HashMap;
use serde::{Deserialize, Serialize};
use std::{io, path::PathBuf};
use std::{borrow::Cow, io};
use assets::{source::DirEntry, AssetExt, AssetGuard, AssetHandle, ReloadWatcher};
use tracing::warn;
// Re-export because I don't like prefix
use common_assets as assets;
// Re-export for argument creation
pub use fluent::fluent_args;
pub use fluent_bundle::FluentArgs;
/// The reference language, aka the more up-to-date localization data.
/// Also the default language at first startup.
@ -49,6 +48,7 @@ pub struct Font {
impl Font {
/// Scale input size to final UI size
#[must_use]
pub fn scale(&self, value: u32) -> u32 { (value as f32 * self.scale_ratio).round() as u32 }
}
@ -56,21 +56,14 @@ impl Font {
pub type Fonts = HashMap<String, Font>;
/// Store internationalization data
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct Language {
/// A map storing the localized texts
///
/// Localized content can be accessed using a String key.
pub(crate) string_map: HashMap<String, String>,
/// A map for storing variations of localized texts, for example multiple
/// ways of saying "Help, I'm under attack". Used primarily for npc
/// dialogue.
pub(crate) vector_map: HashMap<String, Vec<String>>,
/// The bundle storing all localized texts
pub(crate) bundle: FluentBundle<FluentResource, IntlLangMemoizer>,
/// Whether to convert the input text encoded in UTF-8
/// into a ASCII version by using the `deunicode` crate.
pub(crate) convert_utf8_to_ascii: bool,
// FIXME (i18n convert_utf8_to_ascii):
#[allow(dead_code)]
convert_utf8_to_ascii: bool,
/// Font configuration is stored here
pub(crate) fonts: Fonts,
@ -79,68 +72,172 @@ struct Language {
}
impl Language {
/// Get a localized text from the given key
pub fn get(&self, key: &str) -> Option<&str> { self.string_map.get(key).map(String::as_str) }
fn try_msg<'a>(&'a self, key: &str, args: Option<&'a FluentArgs>) -> Option<Cow<str>> {
let bundle = &self.bundle;
let msg = bundle.get_message(key)?;
let mut errs = Vec::new();
let msg = bundle.format_pattern(msg.value()?, args, &mut errs);
for err in errs {
eprintln!("err: {err} for {key}");
}
/// Get a variation of localized text from the given key
///
/// `index` should be a random number from `0` to `u16::max()`
pub fn get_variation(&self, key: &str, index: u16) -> Option<&str> {
self.vector_map.get(key).and_then(|v| {
if v.is_empty() {
None
} else {
Some(v[index as usize % v.len()].as_str())
}
})
Some(msg)
}
fn try_collect_attrs<'a>(
&'a self,
key: &str,
args: Option<&'a FluentArgs>,
) -> Option<Vec<Cow<str>>> {
let bundle = &self.bundle;
let msg = bundle.get_message(key)?;
let mut errs = Vec::new();
let mut attrs = Vec::new();
for attr in msg.attributes() {
let msg = bundle.format_pattern(attr.value(), args, &mut errs);
attrs.push(msg);
}
for err in errs {
eprintln!("err: {err} for {key}");
}
Some(attrs)
}
fn try_variation<'a>(
&'a self,
key: &str,
seed: u16,
args: Option<&'a FluentArgs>,
) -> Option<Cow<'a, str>> {
let mut attrs = self.try_collect_attrs(key, args)?;
if attrs.is_empty() {
None
} else {
let variation = attrs.swap_remove(usize::from(seed) % attrs.len());
Some(variation)
}
}
}
impl common_assets::Compound for Language {
fn load(
cache: common_assets::AnyCache,
asset_key: &str,
) -> Result<Self, common_assets::BoxedError> {
impl assets::Compound for Language {
fn load(cache: assets::AnyCache, path: &str) -> Result<Self, assets::BoxedError> {
let manifest = cache
.load::<RawManifest>(&[asset_key, ".", LANG_MANIFEST_FILE].concat())?
.load::<raw::Manifest>(&[path, ".", "_manifest"].concat())?
.cloned();
let raw::Manifest {
convert_utf8_to_ascii,
fonts,
metadata,
} = manifest;
// Walk through files in the folder, collecting localization fragment to merge
// inside the asked_localization
let mut fragments = HashMap::new();
for id in cache
.load_dir::<RawFragment<String>>(asset_key, true)?
.ids()
{
// Don't try to load manifests
if let Some(id) = id.strip_suffix(LANG_MANIFEST_FILE) {
if id.ends_with('.') {
continue;
}
let lang_id: LanguageIdentifier = metadata.language_identifier.parse()?;
let mut bundle = FluentBundle::new_concurrent(vec![lang_id]);
// Here go dragons
for id in cache.load_dir::<raw::Resource>(path, true)?.ids() {
if id.ends_with("_manifest") {
continue;
}
match cache.load(id) {
Ok(handle) => {
let fragment: &RawFragment<String> = &*handle.read();
use std::{error::Error, fmt, ops::Range};
fragments.insert(PathBuf::from(id), fragment.clone());
#[derive(Debug)]
struct Pos {
#[allow(dead_code)] // false-positive
line: usize,
#[allow(dead_code)] // false-positive
character: usize,
}
fn unspan(src: &str, span: Range<usize>) -> Range<Pos> {
let count = |idx| {
let mut line = 1;
let mut character = 1;
for ch in src.bytes().take(idx) {
// Count characters
character += 1;
// Count newlines
if ch == b'\n' {
line += 1;
// If found new line, reset character count
character = 1;
}
}
Pos { line, character }
};
let Range { start, end } = span;
count(start)..count(end)
}
// TODO:
// better error handling?
#[derive(Debug)]
enum ResourceErr {
ParsingError {
#[allow(dead_code)] // false-positive
file: String,
#[allow(dead_code)] // false-positive
err: String,
},
BundleError(String),
}
impl fmt::Display for ResourceErr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl Error for ResourceErr {}
let source: &raw::Resource = &*handle.read();
let resource =
FluentResource::try_new(source.src.clone()).map_err(|(_ast, errs)| {
let file = id.to_owned();
let errs = errs
.into_iter()
.map(|e| {
let pos = unspan(&source.src, e.pos);
format!("{pos:?}, kind {:?}", e.kind)
})
.collect::<Vec<_>>();
ResourceErr::ParsingError {
file,
err: format!("{errs:?}"),
}
})?;
bundle
.add_resource(resource)
.map_err(|e| ResourceErr::BundleError(format!("{e:?}")))?;
},
Err(e) => {
warn!("Unable to load asset {}, error={:?}", id, e);
Err(err) => {
// TODO: shouldn't we just panic here?
warn!("Unable to load asset {id}, error={err:?}");
},
}
}
Ok(Language::from(RawLanguage {
manifest,
fragments,
}))
Ok(Self {
bundle,
convert_utf8_to_ascii,
fonts,
metadata,
})
}
}
/// the central data structure to handle localization in veloren
// inherit Copy+Clone from AssetHandle
#[derive(Debug, Copy, Clone)]
/// The central data structure to handle localization in Veloren
// inherit Copy + Clone from AssetHandle (what?)
#[derive(Copy, Clone)]
pub struct LocalizationHandle {
active: AssetHandle<Language>,
watcher: ReloadWatcher,
@ -148,24 +245,46 @@ pub struct LocalizationHandle {
pub use_english_fallback: bool,
}
// RAII guard returned from Localization::read(), resembles AssetGuard
/// Read `LocalizationGuard`
// arbitrary choice to minimize changing all of veloren
pub type Localization = LocalizationGuard;
/// RAII guard returned from `Localization::read`(), resembles `AssetGuard`
pub struct LocalizationGuard {
active: AssetGuard<Language>,
fallback: Option<AssetGuard<Language>>,
}
// arbitrary choice to minimize changing all of veloren
pub type Localization = LocalizationGuard;
impl LocalizationGuard {
/// DEPRECATED
///
/// Get a localized text from the given key
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
pub fn get_opt(&self, key: &str) -> Option<&str> {
/// If the key is not present in the localization object
/// then the key itself is returned.
///
/// NOTE: this function shouldn't be used in new code.
/// It is kept for compatibility with old code that uses
/// old style dot-separated keys and this function internally
/// replaces them with dashes.
// FIXME (i18n old style keys):
// this is deprecated, fix all usages of this asap
pub fn get(&self, key: &str) -> Cow<str> {
// Fluent uses `-` as informal separator, while in the past with our
// RON based system we used `.` for that purpose.
self.get_msg(&key.replace('.', "-"))
}
/// Get a localized text from the given key
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
pub fn try_msg(&self, key: &str) -> Option<Cow<str>> {
self.active
.get(key)
.or_else(|| self.fallback.as_ref().and_then(|f| f.get(key)))
.try_msg(key, None)
.or_else(|| self.fallback.as_ref().and_then(|fb| fb.try_msg(key, None)))
}
/// Get a localized text from the given key
@ -173,76 +292,95 @@ impl LocalizationGuard {
/// First lookup is done in the active language, second in
/// the fallback (if present).
/// If the key is not present in the localization object
/// then the key is returned.
pub fn get<'a>(&'a self, key: &'a str) -> &str { self.get_opt(key).unwrap_or(key) }
/// then the key itself is returned.
pub fn get_msg(&self, key: &str) -> Cow<str> {
// NOTE: we clone the key if translation was missing
// We could use borrowed version, but it would mean that
// `key`, `self`, and result should have the same lifetime.
// Which would make it impossible to use with runtime generated keys.
self.try_msg(key)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
}
/// Get a localized text from the given key
/// Get a localized text from the given key using given arguments
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
pub fn get_or(&self, key: &str, fallback_key: &str) -> Option<&str> {
self.get_opt(key).or_else(|| self.get_opt(fallback_key))
pub fn try_msg_ctx<'a>(&'a self, key: &str, args: &'a FluentArgs) -> Option<Cow<'static, str>> {
// NOTE: as after using args we get our result owned (because you need
// to clone pattern during forming value from args), this conversion
// to Cow;:Owned is no-op.
// We could use String here, but using Cow everywhere in i18n API is
// prefered for consistency.
self.active
.try_msg(key, Some(args))
.or_else(|| {
self.fallback
.as_ref()
.and_then(|fb| fb.try_msg(key, Some(args)))
})
.map(|x| {
// NOTE:
// Hack. Remove Unicode Directionality Marks, conrod doesn't support them.
let res = x.replace('\u{2068}', "").replace('\u{2069}', "");
Cow::Owned(res)
})
}
/// Get a variation of localized text from the given key
///
/// `index` should be a random number from `0` to `u16::max()`
/// Get a localized text from the given key using given arguments
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
/// If the key is not present in the localization object
/// then the key is returned.
pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> &str {
self.active.get_variation(key, index).unwrap_or_else(|| {
/// then the key itself is returned.
pub fn get_msg_ctx<'a>(&'a self, key: &str, args: &'a FluentArgs) -> Cow<'static, str> {
self.try_msg_ctx(key, args)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
}
pub fn try_variation(&self, key: &str, seed: u16) -> Option<Cow<str>> {
self.active.try_variation(key, seed, None).or_else(|| {
self.fallback
.as_ref()
.and_then(|f| f.get_variation(key, index))
.unwrap_or(key)
.and_then(|fb| fb.try_variation(key, seed, None))
})
}
/// Return the missing keys compared to the reference language
fn list_missing_entries(&self) -> (HashSet<String>, HashSet<String>) {
if let Some(ref_lang) = &self.fallback {
let reference_string_keys: HashSet<_> = ref_lang.string_map.keys().cloned().collect();
let string_keys: HashSet<_> = self.active.string_map.keys().cloned().collect();
let strings = reference_string_keys
.difference(&string_keys)
.cloned()
.collect();
let reference_vector_keys: HashSet<_> = ref_lang.vector_map.keys().cloned().collect();
let vector_keys: HashSet<_> = self.active.vector_map.keys().cloned().collect();
let vectors = reference_vector_keys
.difference(&vector_keys)
.cloned()
.collect();
(strings, vectors)
} else {
(HashSet::default(), HashSet::default())
}
pub fn get_variation(&self, key: &str, seed: u16) -> Cow<str> {
self.try_variation(key, seed)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
}
/// Log missing entries (compared to the reference language) as warnings
pub fn log_missing_entries(&self) {
let (missing_strings, missing_vectors) = self.list_missing_entries();
for missing_key in missing_strings {
warn!(
"[{:?}] Missing string key {:?}",
self.metadata().language_identifier,
missing_key
);
}
for missing_key in missing_vectors {
warn!(
"[{:?}] Missing vector key {:?}",
self.metadata().language_identifier,
missing_key
);
}
pub fn try_variation_ctx<'a>(
&'a self,
key: &str,
seed: u16,
args: &'a FluentArgs,
) -> Option<Cow<str>> {
self.active
.try_variation(key, seed, Some(args))
.or_else(|| {
self.fallback
.as_ref()
.and_then(|fb| fb.try_variation(key, seed, Some(args)))
})
.map(|x| {
// NOTE:
// Hack. Remove Unicode Directionality Marks, conrod doesn't support them.
let res = x.replace('\u{2068}', "").replace('\u{2069}', "");
Cow::Owned(res)
})
}
pub fn get_variation_ctx<'a>(&'a self, key: &str, seed: u16, args: &'a FluentArgs) -> Cow<str> {
self.try_variation_ctx(key, seed, args)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
}
#[must_use]
pub fn fonts(&self) -> &Fonts { &self.active.fonts }
#[must_use]
pub fn metadata(&self) -> &LanguageMetadata { &self.active.metadata }
}
@ -251,6 +389,7 @@ impl LocalizationHandle {
self.use_english_fallback = use_english_fallback;
}
#[must_use]
pub fn read(&self) -> LocalizationGuard {
LocalizationGuard {
active: self.active.read(),
@ -262,7 +401,9 @@ impl LocalizationHandle {
}
}
pub fn load(specifier: &str) -> Result<Self, common_assets::Error> {
/// # Errors
/// Returns error if active language can't be loaded
pub fn load(specifier: &str) -> Result<Self, assets::Error> {
let default_key = ["voxygen.i18n.", REFERENCE_LANG].concat();
let language_key = ["voxygen.i18n.", specifier].concat();
let is_default = language_key == default_key;
@ -273,12 +414,14 @@ impl LocalizationHandle {
fallback: if is_default {
None
} else {
// TODO: shouldn't this panic?
Language::load(&default_key).ok()
},
use_english_fallback: false,
})
}
#[must_use]
pub fn load_expect(specifier: &str) -> Self {
Self::load(specifier).expect("Can't load language files")
}
@ -288,17 +431,18 @@ impl LocalizationHandle {
struct FindManifests;
impl common_assets::DirLoadable for FindManifests {
fn select_ids<S: common_assets::Source + ?Sized>(
impl assets::DirLoadable for FindManifests {
fn select_ids<S: assets::Source + ?Sized>(
source: &S,
specifier: &str,
) -> io::Result<Vec<common_assets::SharedString>> {
) -> io::Result<Vec<assets::SharedString>> {
let mut specifiers = Vec::new();
source.read_dir(specifier, &mut |entry| {
if let DirEntry::Directory(spec) = entry {
let manifest_spec = [spec, ".", LANG_MANIFEST_FILE].concat();
if source.exists(DirEntry::File(&manifest_spec, LANG_EXTENSION)) {
let manifest_spec = [spec, ".", "_manifest"].concat();
if source.exists(DirEntry::File(&manifest_spec, "ron")) {
specifiers.push(manifest_spec.into());
}
}
@ -311,16 +455,13 @@ impl common_assets::DirLoadable for FindManifests {
#[derive(Clone, Debug)]
struct LocalizationList(Vec<LanguageMetadata>);
impl common_assets::Compound for LocalizationList {
fn load(
cache: common_assets::AnyCache,
specifier: &str,
) -> Result<Self, common_assets::BoxedError> {
impl assets::Compound for LocalizationList {
fn load(cache: assets::AnyCache, specifier: &str) -> Result<Self, assets::BoxedError> {
// List language directories
let languages = common_assets::load_dir::<FindManifests>(specifier, false)
let languages = assets::load_dir::<FindManifests>(specifier, false)
.unwrap_or_else(|e| panic!("Failed to get manifests from {}: {:?}", specifier, e))
.ids()
.filter_map(|spec| cache.load::<RawManifest>(spec).ok())
.filter_map(|spec| cache.load::<raw::Manifest>(spec).ok())
.map(|localization| localization.read().metadata.clone())
.collect();
@ -329,42 +470,49 @@ impl common_assets::Compound for LocalizationList {
}
/// Load all the available languages located in the voxygen asset directory
#[must_use]
pub fn list_localizations() -> Vec<LanguageMetadata> {
LocalizationList::load_expect_cloned("voxygen.i18n").0
let LocalizationList(list) = LocalizationList::load_expect_cloned("voxygen.i18n");
list
}
#[cfg(test)]
mod tests {
use crate::path::BasePath;
use super::*;
// Test that localization list is loaded (not empty)
#[test]
fn test_localization_list() {
let list = super::list_localizations();
fn check_localization_list() {
let list = list_localizations();
assert!(!list.is_empty());
}
// Test that reference language can be loaded
#[test]
fn test_localization_handle() {
let _ = super::LocalizationHandle::load_expect(super::REFERENCE_LANG);
fn validate_reference_language() { let _ = LocalizationHandle::load_expect(REFERENCE_LANG); }
#[test]
fn validate_all_localizations() {
let list = list_localizations();
for meta in list {
let _ = LocalizationHandle::load_expect(&meta.language_identifier);
}
}
// Test to verify all languages that they are VALID and loadable, without
// need of git just on the local assets folder
#[test]
fn verify_all_localizations() {
// Generate paths
let root_dir = common_assets::find_root().expect("Failed to discover repository root");
crate::verification::verify_all_localizations(&BasePath::new(&root_dir));
}
// Test to verify all languages and print missing and faulty localisation
#[test]
#[ignore]
// Generate translation stats
fn test_all_localizations() {
// Generate paths
let root_dir = common_assets::find_root().expect("Failed to discover repository root");
crate::analysis::test_all_localizations(&BasePath::new(&root_dir), true, true);
// FIXME (i18n translation stats):
use std::{fs, io::Write};
let output = assets::find_root()
.unwrap()
.join("translation_analysis.csv");
let mut f = fs::File::create(output).expect("couldn't write csv file");
writeln!(
f,
"country_code,file_name,translation_key,status,git_commit"
)
.unwrap();
}
}

View File

@ -1,141 +0,0 @@
use std::path::{Path, PathBuf};
pub(crate) const LANG_MANIFEST_FILE: &str = "_manifest";
pub(crate) const LANG_EXTENSION: &str = "ron";
#[derive(Clone)]
pub struct BasePath {
///repo part, git main folder
root_path: PathBuf,
///relative path to i18n path which contains, currently
/// 'assets/voxygen/i18n'
relative_i18n_root_path: PathBuf,
///i18n_root_folder
cache: PathBuf,
}
impl BasePath {
pub fn new(root_path: &Path) -> Self {
let relative_i18n_root_path = Path::new("assets/voxygen/i18n").to_path_buf();
let cache = root_path.join(&relative_i18n_root_path);
assert!(
cache.is_dir(),
"i18n_root_path folder doesn't exist, something is wrong!"
);
Self {
root_path: root_path.to_path_buf(),
relative_i18n_root_path,
cache,
}
}
pub fn root_path(&self) -> &Path { &self.root_path }
pub fn relative_i18n_root_path(&self) -> &Path { &self.relative_i18n_root_path }
/// absolute path to `relative_i18n_root_path`
pub fn i18n_root_path(&self) -> &Path { &self.cache }
pub fn i18n_path(&self, language_identifier: &str) -> LangPath {
LangPath::new(self, language_identifier)
}
/// List localization directories
pub fn i18n_directories(&self) -> Vec<LangPath> {
std::fs::read_dir(&self.cache)
.unwrap()
.map(|res| res.unwrap())
.filter(|e| e.file_type().unwrap().is_dir())
.map(|e| LangPath::new(self, e.file_name().to_str().unwrap()))
.collect()
}
}
impl core::fmt::Debug for BasePath {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{:?}", &self.cache)
}
}
#[derive(Clone)]
pub struct LangPath {
base: BasePath,
/// `en`, `de_DE`, `fr_FR`, etc..
language_identifier: String,
/// i18n_path
cache: PathBuf,
}
impl LangPath {
fn new(base: &BasePath, language_identifier: &str) -> Self {
let cache = base.i18n_root_path().join(language_identifier);
if !cache.is_dir() {
panic!("language folder '{}' doesn't exist", language_identifier);
}
Self {
base: base.clone(),
language_identifier: language_identifier.to_owned(),
cache,
}
}
pub fn base(&self) -> &BasePath { &self.base }
pub fn language_identifier(&self) -> &str { &self.language_identifier }
///absolute path to `i18n_root_path` + `language_identifier`
pub fn i18n_path(&self) -> &Path { &self.cache }
/// fragment or manifest file, based on a path
pub fn sub_path(&self, sub_path: &Path) -> PathBuf { self.cache.join(sub_path) }
/// fragment or manifest file, based on a string without extension
pub fn file(&self, name_without_extension: &str) -> PathBuf {
self.cache
.join(format!("{}.{}", name_without_extension, LANG_EXTENSION))
}
/// return all fragments sub_pathes
pub(crate) fn fragments(&self) -> Result<Vec</* sub_path */ PathBuf>, std::io::Error> {
let mut result = vec![];
recursive_fragments_paths_in_language(self, Path::new(""), &mut result)?;
Ok(result)
}
}
//unwraps cant fail as they are in same Path
fn recursive_fragments_paths_in_language(
lpath: &LangPath,
subfolder: &Path,
result: &mut Vec<PathBuf>,
) -> Result<(), std::io::Error> {
let manifest_path = PathBuf::from(&format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION));
let template_path = PathBuf::from(&format!("{}.{}", "template", LANG_EXTENSION));
let search_dir = lpath.sub_path(subfolder);
for fragment_file in search_dir.read_dir()?.flatten() {
let file_type = fragment_file.file_type()?;
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(lpath.i18n_path()).unwrap();
if file_type.is_dir() {
recursive_fragments_paths_in_language(lpath, relative_path, result)?;
} else if file_type.is_file()
&& relative_path != manifest_path
&& relative_path != template_path
{
result.push(relative_path.to_path_buf());
}
}
Ok(())
}
impl core::fmt::Debug for LangPath {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(
f,
"{:?}",
self.base
.relative_i18n_root_path
.join(&self.language_identifier)
)
}
}

View File

@ -1,113 +1,36 @@
//! handle the loading of a `Language`
use crate::{
path::{LangPath, LANG_EXTENSION, LANG_MANIFEST_FILE},
Fonts, Language, LanguageMetadata,
};
use deunicode::deunicode;
use hashbrown::hash_map::HashMap;
use ron::de::from_reader;
use crate::{Fonts, LanguageMetadata};
use serde::{Deserialize, Serialize};
use std::{fs, path::PathBuf};
/// Raw localization metadata from LANG_MANIFEST_FILE file
use std::str::FromStr;
/// Localization metadata from manifest file
/// See `Language` for more info on each attributes
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub(crate) struct RawManifest {
pub(crate) struct Manifest {
pub(crate) convert_utf8_to_ascii: bool,
pub(crate) fonts: Fonts,
pub(crate) metadata: LanguageMetadata,
}
/// Raw localization data from one specific file
/// These structs are meant to be merged into a Language
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub(crate) struct RawFragment<T> {
pub(crate) string_map: HashMap<String, T>,
pub(crate) vector_map: HashMap<String, Vec<T>>,
impl crate::assets::Asset for Manifest {
type Loader = crate::assets::RonLoader;
const EXTENSION: &'static str = "ron";
}
pub(crate) struct RawLanguage<T> {
pub(crate) manifest: RawManifest,
pub(crate) fragments: HashMap</* relative to i18n_path */ PathBuf, RawFragment<T>>,
#[derive(Clone)]
pub(crate) struct Resource {
pub(crate) src: String,
}
pub(crate) fn load_manifest(path: &LangPath) -> Result<RawManifest, common_assets::BoxedError> {
let manifest_file = path.file(LANG_MANIFEST_FILE);
tracing::debug!(?manifest_file, "manifest loading");
let f = fs::File::open(&manifest_file)?;
let manifest: RawManifest = from_reader(f)?;
// verify that the folder name `de_DE` matches the value inside the metadata!
assert_eq!(
manifest.metadata.language_identifier,
path.language_identifier()
);
Ok(manifest)
impl FromStr for Resource {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> { Ok(Self { src: s.to_owned() }) }
}
pub(crate) fn load_raw_language(
path: &LangPath,
manifest: RawManifest,
) -> Result<RawLanguage<String>, common_assets::BoxedError> {
//get List of files
let files = path.fragments()?;
impl crate::assets::Asset for Resource {
type Loader = crate::assets::loader::ParseLoader;
// Walk through each file in the directory
let mut fragments = HashMap::new();
for sub_path in files {
let f = fs::File::open(path.sub_path(&sub_path))?;
let fragment = from_reader(f)?;
fragments.insert(sub_path, fragment);
}
Ok(RawLanguage {
manifest,
fragments,
})
}
impl From<RawLanguage<String>> for Language {
fn from(raw: RawLanguage<String>) -> Self {
let mut string_map = HashMap::new();
let mut vector_map = HashMap::new();
for (_, fragment) in raw.fragments {
string_map.extend(fragment.string_map);
vector_map.extend(fragment.vector_map);
}
let convert_utf8_to_ascii = raw.manifest.convert_utf8_to_ascii;
// Update the text if UTF-8 to ASCII conversion is enabled
if convert_utf8_to_ascii {
for value in string_map.values_mut() {
*value = deunicode(value);
}
for value in vector_map.values_mut() {
*value = value.iter().map(|s| deunicode(s)).collect();
}
}
let mut metadata = raw.manifest.metadata;
metadata.language_name = deunicode(&metadata.language_name);
Self {
string_map,
vector_map,
convert_utf8_to_ascii,
fonts: raw.manifest.fonts,
metadata,
}
}
}
impl common_assets::Asset for RawManifest {
type Loader = common_assets::RonLoader;
const EXTENSION: &'static str = LANG_EXTENSION;
}
impl common_assets::Asset for RawFragment<String> {
type Loader = common_assets::RonLoader;
const EXTENSION: &'static str = LANG_EXTENSION;
const EXTENSION: &'static str = "ftl";
}

View File

@ -1,199 +0,0 @@
use crate::{
gitfragments::{LocalizationEntryState, LocalizationState, ALL_LOCALIZATION_STATES},
raw::RawLanguage,
};
use hashbrown::HashMap;
use std::path::PathBuf;
#[derive(Default, Debug, PartialEq)]
pub(crate) struct LocalizationStats {
pub(crate) uptodate_entries: usize,
pub(crate) notfound_entries: usize,
pub(crate) unused_entries: usize,
pub(crate) outdated_entries: usize,
pub(crate) errors: usize,
}
pub(crate) struct LocalizationAnalysis {
language_identifier: String,
pub(crate) data: HashMap<Option<LocalizationState>, Vec<(PathBuf, String, Option<git2::Oid>)>>,
}
impl LocalizationStats {
/// Calculate key count that actually matter for the status of the
/// translation Unused entries don't break the game
pub(crate) fn get_real_entry_count(&self) -> usize {
self.outdated_entries + self.notfound_entries + self.errors + self.uptodate_entries
}
}
impl LocalizationAnalysis {
pub(crate) fn new(language_identifier: &str) -> Self {
let mut data = HashMap::new();
for key in ALL_LOCALIZATION_STATES.iter() {
data.insert(*key, vec![]);
}
Self {
language_identifier: language_identifier.to_owned(),
data,
}
}
fn show<W: std::io::Write>(
&self,
state: Option<LocalizationState>,
ref_language: &RawLanguage<LocalizationEntryState>,
be_verbose: bool,
output: &mut W,
) {
let entries = self.data.get(&state).unwrap_or_else(|| {
panic!(
"called on invalid state: {}",
LocalizationState::print(&state)
)
});
if entries.is_empty() {
return;
}
writeln!(output, "\n\t[{}]", LocalizationState::print(&state)).unwrap();
for (path, key, commit_id) in entries {
if be_verbose {
let our_commit = LocalizationAnalysis::print_commit(commit_id);
let ref_commit = ref_language
.fragments
.get(path)
.and_then(|entry| entry.string_map.get(key))
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
writeln!(output, "{:60}| {:40} | {:40}", key, our_commit, ref_commit).unwrap();
} else {
writeln!(output, "{}", key).unwrap();
}
}
}
fn csv<W: std::io::Write>(&self, state: Option<LocalizationState>, output: &mut W) {
let entries = self
.data
.get(&state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state));
for (path, key, commit_id) in entries {
let our_commit = LocalizationAnalysis::print_commit(commit_id);
writeln!(
output,
"{},{:?},{},{},{}",
self.language_identifier,
path,
key,
LocalizationState::print(&state),
our_commit
)
.unwrap();
}
}
fn print_commit(commit_id: &Option<git2::Oid>) -> String {
commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned())
}
}
pub(crate) fn print_translation_stats(
language_identifier: &str,
ref_language: &RawLanguage<LocalizationEntryState>,
stats: &LocalizationStats,
state_map: &LocalizationAnalysis,
be_verbose: bool,
) {
let real_entry_count = stats.get_real_entry_count() as f32;
let uptodate_percent = (stats.uptodate_entries as f32 / real_entry_count) * 100_f32;
let outdated_percent = (stats.outdated_entries as f32 / real_entry_count) * 100_f32;
let untranslated_percent =
((stats.notfound_entries + stats.errors) as f32 / real_entry_count) * 100_f32;
// Display
if be_verbose {
println!(
"\n{:60}| {:40} | {:40}",
"Key name", language_identifier, ref_language.manifest.metadata.language_identifier,
);
} else {
println!("\nKey name");
}
for state in &ALL_LOCALIZATION_STATES {
if state == &Some(LocalizationState::UpToDate) {
continue;
}
state_map.show(*state, ref_language, be_verbose, &mut std::io::stdout());
}
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
}
pub(crate) fn print_csv_stats<W: std::io::Write>(state_map: &LocalizationAnalysis, output: &mut W) {
for state in &ALL_LOCALIZATION_STATES {
state_map.csv(*state, output);
}
}
pub(crate) fn print_overall_stats(
analysis: HashMap<String, (LocalizationAnalysis, LocalizationStats)>,
) {
let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0;
let mut overall_real_entry_count = 0;
println!("-----------------------------------------------------------------------------");
println!("Overall Translation Status");
println!("-----------------------------------------------------------------------------");
println!(
"{:12}| {:8} | {:8} | {:8} | {:8} | {:8}",
"", "up-to-date", "outdated", "untranslated", "unused", "errors",
);
let mut i18n_stats: Vec<(&String, &(_, LocalizationStats))> = analysis.iter().collect();
i18n_stats.sort_by_key(|(_, (_, v))| v.notfound_entries);
for (path, (_, test_result)) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate,
outdated_entries: outdated,
unused_entries: unused,
notfound_entries: untranslated,
errors,
} = test_result;
overall_uptodate_entry_count += uptodate;
overall_outdated_entry_count += outdated;
overall_untranslated_entry_count += untranslated;
overall_real_entry_count += test_result.get_real_entry_count();
println!(
"{:12}|{:8} |{:6} |{:8} |{:6} |{:8}",
path, uptodate, outdated, untranslated, unused, errors,
);
}
println!(
"\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated",
(overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
);
println!("-----------------------------------------------------------------------------\n");
}

View File

@ -1,34 +0,0 @@
use crate::path::{BasePath, LangPath, LANG_MANIFEST_FILE};
use crate::{raw, REFERENCE_LANG};
/// Test to verify all languages that they are VALID and loadable, without
/// need of git just on the local assets folder
pub fn verify_all_localizations(path: &BasePath) {
let ref_i18n_path = path.i18n_path(REFERENCE_LANG);
let ref_i18n_manifest_path = ref_i18n_path.file(LANG_MANIFEST_FILE);
assert!(
ref_i18n_manifest_path.is_file(),
"Reference language manifest file doesn't exist, something is wrong!"
);
let i18n_directories = path.i18n_directories();
// This simple check ONLY guarantees that an arbitrary minimum of translation
// files exists. It's just to notice unintentional deletion of all
// files, or modifying the paths. In case you want to delete all
// language you have to adjust this number:
assert!(
i18n_directories.len() > 5,
"have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \
folder is empty?"
);
for i18n_directory in i18n_directories {
println!("verifying {:?}", i18n_directory);
// Walk through each files and try to load them
verify_localization_directory(&i18n_directory);
}
}
fn verify_localization_directory(path: &LangPath) {
let manifest = raw::load_manifest(path).expect("error accessing manifest file");
raw::load_raw_language(path, manifest).expect("error accessing fragment file");
}