Merge branch 'juliancoffee/improve_i18n_check' into 'master'

Refactor i18n analysis code and improve UX of testing binary

See merge request veloren/veloren!2289
This commit is contained in:
Marcel 2021-05-10 22:13:01 +00:00
commit 217462805d
8 changed files with 451 additions and 426 deletions

1
Cargo.lock generated
View File

@ -5644,6 +5644,7 @@ dependencies = [
name = "veloren-i18n" name = "veloren-i18n"
version = "0.9.0" version = "0.9.0"
dependencies = [ dependencies = [
"clap",
"deunicode", "deunicode",
"git2", "git2",
"hashbrown", "hashbrown",

View File

@ -7,6 +7,7 @@ version = "0.9.0"
[[bin]] [[bin]]
name = "i18n-check" name = "i18n-check"
required-features = ["bin"]
[dependencies] [dependencies]
# Assets # Assets
@ -14,8 +15,16 @@ hashbrown = { version = "0.9", features = ["serde", "nightly"] }
common-assets = {package = "veloren-common-assets", path = "../../common/assets"} common-assets = {package = "veloren-common-assets", path = "../../common/assets"}
deunicode = "1.0" deunicode = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
# Diagnostic
git2 = { version = "0.12", default-features = false }
ron = "0.6"
tracing = "0.1" tracing = "0.1"
# Diagnostic
ron = "0.6"
git2 = { version = "0.12", default-features = false, optional = true }
# Binary
clap = { version = "2.33", features = ["suggestions"], default-features = false, optional = true }
[dev-dependencies]
git2 = { version = "0.12", default-features = false }
[features]
bin = ["git2", "clap"]

View File

@ -1,8 +1,6 @@
# Usage # Usage
Get diagnostic for specific language <br/> `$ cargo run --features=bin -- --help` <br/>
`$ cargo run --bin i18n-check -- --lang <lang_code>` <br/> (Or if somewhere else in the repo) <br/>
Test all languages <br/> `$ cargo run -p veloren-i18n --features=bin -- --help` <br/>
`$ cargo run --bin i18n-check -- --all` For example, diagnostic for specific language <br/>
Verify all directories <br/> `$ cargo run -p veloren-i18n --features=bin -- <lang_code>` <br/>
`$ cargo run --bin i18n-check -- --verify`

View File

@ -1,136 +1,12 @@
use ron::de::{from_bytes, from_reader}; use ron::de::from_bytes;
use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf};
use std::{
fs,
path::{Path, PathBuf},
};
use crate::data::{
i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG,
};
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
/// The reference language, aka the more up-to-date localization data. Also the #[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
/// default language at first startup.
const REFERENCE_LANG: &str = "en";
const LANG_MANIFEST_FILE: &str = "_manifest";
/// How a language can be described
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
struct LanguageMetadata {
/// A human friendly language name (e.g. "English (US)")
language_name: String,
/// A short text identifier for this language (e.g. "en_US")
///
/// On the opposite of `language_name` that can change freely,
/// `language_identifier` value shall be stable in time as it
/// is used by setting components to store the language
/// selected by the user.
language_identifier: String,
}
/// Store font metadata
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
struct Font {
/// Key to retrieve the font in the asset system
asset_key: String,
/// Scale ratio to resize the UI text dynamicly
scale_ratio: f32,
}
/// Store font metadata
type Fonts = HashMap<String, Font>;
/// Raw localization data, expect the strings to not be loaded here
/// However, metadata informations are correct
/// See `Localization` for more info on each attributes
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
struct RawLocalization {
sub_directories: Vec<String>,
string_map: HashMap<String, String>,
vector_map: HashMap<String, Vec<String>>,
convert_utf8_to_ascii: bool,
fonts: Fonts,
metadata: LanguageMetadata,
}
/// Store internationalization data
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct Localization {
/// A list of subdirectories to lookup for localization files
sub_directories: Vec<String>,
/// A map storing the localized texts
///
/// Localized content can be accessed using a String key.
string_map: HashMap<String, String>,
/// A map for storing variations of localized texts, for example multiple
/// ways of saying "Help, I'm under attack". Used primarily for npc
/// dialogue.
vector_map: HashMap<String, Vec<String>>,
/// Whether to convert the input text encoded in UTF-8
/// into a ASCII version by using the `deunicode` crate.
convert_utf8_to_ascii: bool,
/// Font configuration is stored here
fonts: Fonts,
metadata: LanguageMetadata,
}
/// Store internationalization maps
/// These structs are meant to be merged into a Localization
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct LocalizationFragment {
/// A map storing the localized texts
///
/// Localized content can be accessed using a String key.
string_map: HashMap<String, String>,
/// A map for storing variations of localized texts, for example multiple
/// ways of saying "Help, I'm under attack". Used primarily for npc
/// dialogue.
vector_map: HashMap<String, Vec<String>>,
}
impl Localization {}
impl From<RawLocalization> for Localization {
fn from(raw: RawLocalization) -> Self {
Self {
sub_directories: raw.sub_directories,
string_map: raw.string_map,
vector_map: raw.vector_map,
convert_utf8_to_ascii: raw.convert_utf8_to_ascii,
fonts: raw.fonts,
metadata: raw.metadata,
}
}
}
impl From<RawLocalization> for LocalizationFragment {
fn from(raw: RawLocalization) -> Self {
Self {
string_map: raw.string_map,
vector_map: raw.vector_map,
}
}
}
#[derive(Clone, Debug)]
struct LocalizationList(Vec<LanguageMetadata>);
/// List localization directories as a PathBuf vector
fn i18n_directories(i18n_dir: &Path) -> Vec<PathBuf> {
fs::read_dir(i18n_dir)
.unwrap()
.map(|res| res.map(|e| e.path()).unwrap())
.filter(|e| e.is_dir())
.collect()
}
#[derive(Eq, Hash, Debug, PartialEq)]
enum LocalizationState { enum LocalizationState {
UpToDate, UpToDate,
NotFound, NotFound,
@ -140,7 +16,7 @@ enum LocalizationState {
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
struct FindLocalization { struct LocalizationStats {
uptodate_entries: usize, uptodate_entries: usize,
outdated_entries: usize, outdated_entries: usize,
unused_entries: usize, unused_entries: usize,
@ -149,7 +25,61 @@ struct FindLocalization {
real_entry_count: usize, real_entry_count: usize,
} }
#[derive(Debug)] #[derive(Default)]
struct LocalizationAnalysis {
notfound: Vec<(String, Option<git2::Oid>)>,
unused: Vec<(String, Option<git2::Oid>)>,
outdated: Vec<(String, Option<git2::Oid>)>,
unknown: Vec<(String, Option<git2::Oid>)>,
}
impl LocalizationAnalysis {
fn get_mut(
&mut self,
state: LocalizationState,
) -> Option<&mut Vec<(String, Option<git2::Oid>)>> {
match state {
LocalizationState::NotFound => Some(&mut self.notfound),
LocalizationState::Unused => Some(&mut self.unused),
LocalizationState::Outdated => Some(&mut self.outdated),
LocalizationState::Unknown => Some(&mut self.unknown),
_ => None,
}
}
fn show(
&mut self,
state: LocalizationState,
be_verbose: bool,
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
) {
let entries = self
.get_mut(state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state));
if entries.is_empty() {
return;
}
println!("\n\t[{:?}]", state);
entries.sort();
for (key, commit_id) in entries {
if be_verbose {
let our_commit = commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
let ref_commit = ref_i18n_map
.get(key)
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,);
} else {
println!("{}", key);
}
}
}
}
#[derive(Copy, Clone, Debug)]
struct LocalizationEntryState { struct LocalizationEntryState {
key_line: Option<usize>, key_line: Option<usize>,
chuck_line_range: Option<(usize, usize)>, chuck_line_range: Option<(usize, usize)>,
@ -220,11 +150,9 @@ fn generate_key_version<'a>(
.keys() .keys()
.map(|k| (k.to_owned(), LocalizationEntryState::new())) .map(|k| (k.to_owned(), LocalizationEntryState::new()))
.collect(); .collect();
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
// Find key start lines // Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
// Make the file hot
for (line_nb, line) in file_content.lines().enumerate() { for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None; let mut found_key = None;
@ -290,12 +218,12 @@ fn complete_key_versions<'a>(
head_ref: &git2::Reference, head_ref: &git2::Reference,
i18n_key_versions: &mut HashMap<String, LocalizationEntryState>, i18n_key_versions: &mut HashMap<String, LocalizationEntryState>,
root_dir: &Path, root_dir: &Path,
asset_path: &Path, lang_dir: &Path,
) { ) {
//TODO: review unwraps in this file //TODO: review unwraps in this file
// For each file (if it's not a directory) in directory // For each file (if it's not a directory) in directory
for i18n_file in root_dir.join(&asset_path).read_dir().unwrap().flatten() { for i18n_file in root_dir.join(&lang_dir).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() { if let Ok(file_type) = i18n_file.file_type() {
if file_type.is_file() { if file_type.is_file() {
println!("-> {:?}", i18n_file.file_name()); println!("-> {:?}", i18n_file.file_name());
@ -320,106 +248,54 @@ fn complete_key_versions<'a>(
} }
} }
fn verify_localization_directory(root_dir: &Path, directory_path: &Path) { fn gather_state(
// Walk through each file in the directory loc: &RawLocalization,
for i18n_file in root_dir.join(&directory_path).read_dir().unwrap().flatten() { i18n_blob: &git2::Blob,
if let Ok(file_type) = i18n_file.file_type() { ref_manifest: &Path,
// Skip folders and the manifest file (which does not contain the same struct we root_dir: &Path,
// want to load) lang_dir: &Path,
if file_type.is_file() repo: &git2::Repository,
&& i18n_file.file_name().to_string_lossy() head_ref: &git2::Reference,
!= (LANG_MANIFEST_FILE.to_string() + ".ron") ) -> HashMap<String, LocalizationEntryState> {
{ // Generate map
let full_path = i18n_file.path(); let mut i18n_map = generate_key_version(
println!("-> {:?}", full_path.strip_prefix(&root_dir).unwrap()); repo,
let f = fs::File::open(&full_path).expect("Failed opening file"); &LocalizationFragment::from(loc.clone()),
let _: LocalizationFragment = match from_reader(f) { ref_manifest,
Ok(v) => v, i18n_blob,
Err(e) => { );
panic!(
"Could not parse {} RON file, error: {}",
full_path.to_string_lossy(),
e
);
},
};
}
}
}
}
/// Test to verify all languages that they are VALID and loadable, without // Gathering info about keys from language
/// need of git just on the local assets folder complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, lang_dir);
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is // read HEAD for the subfolders
/// 'assets/voxygen/i18n') for sub_directory in loc.sub_directories.iter() {
pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) { let subdir_path = &lang_dir.join(sub_directory);
let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG); complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, subdir_path);
let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron");
assert!(
root_dir.join(&ref_i18n_dir_path).is_dir(),
"Reference language folder doesn't exist, something is wrong!"
);
assert!(
root_dir.join(&ref_i18n_path).is_file(),
"Reference language manifest file doesn't exist, something is wrong!"
);
let i18n_directories = i18n_directories(&root_dir.join(asset_path));
// This simple check ONLY guarantees that an arbitrary minimum of translation
// files exists. It's just to notice unintentional deletion of all
// files, or modifying the paths. In case you want to delete all
// language you have to adjust this number:
assert!(
i18n_directories.len() > 5,
"have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \
folder is empty?"
);
for i18n_directory in i18n_directories {
// Attempt to load the manifest file
let manifest_path = i18n_directory.join(LANG_MANIFEST_FILE.to_string() + ".ron");
println!(
"verifying {:?}",
manifest_path.strip_prefix(&root_dir).unwrap()
);
let f = fs::File::open(&manifest_path).expect("Failed opening file");
let raw_localization: RawLocalization = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
i18n_directory.to_string_lossy(),
e
);
},
};
// Walk through each files and try to load them
verify_localization_directory(root_dir, &i18n_directory);
// Walk through each subdirectories and try to load files in them
for sub_directory in raw_localization.sub_directories.iter() {
let subdir_path = &i18n_directory.join(sub_directory);
verify_localization_directory(root_dir, &subdir_path);
}
} }
i18n_map
} }
// Helper function to test localization directory // Helper function to test localization directory
// `asset_path` - path to localization directory. Relative from root of the // `lang_dir` - path to localization directory. Relative from root of the
// repo. // repo.
// `root_dir` - absolute path to repo // `root_dir` - absolute path to repo
// `ref_i18n_path` - path to reference manifest // `ref_manifest` - path to reference manifest
// `i18n_references` - keys from reference language // `i18n_references` - keys from reference language
// `repo` - git object for main repo // `repo` - git object for main repo
// `head_ref` - HEAD // `head_ref` - HEAD
fn test_localization_directory( fn test_localization_directory(
asset_path: &Path, lang_dir: &Path,
root_dir: &Path, root_dir: &Path,
ref_i18n_path: &Path, ref_manifest: &Path,
i18n_references: &HashMap<String, LocalizationEntryState>, i18n_references: &HashMap<String, LocalizationEntryState>,
be_verbose: bool,
repo: &git2::Repository, repo: &git2::Repository,
head_ref: &git2::Reference, head_ref: &git2::Reference,
) -> Option<FindLocalization> { ) -> Option<LocalizationStats> {
let relfile = asset_path.join(&(LANG_MANIFEST_FILE.to_string() + ".ron")); let relfile = lang_dir.join(&(LANG_MANIFEST_FILE.to_string() + ".ron"));
if relfile == ref_i18n_path { if relfile == ref_manifest {
return None; return None;
} }
println!("\n-----------------------------------"); println!("\n-----------------------------------");
@ -439,20 +315,40 @@ fn test_localization_directory(
return None; return None;
}, },
}; };
let mut current_i18n = generate_key_version(
&repo,
&LocalizationFragment::from(current_loc.clone()),
&relfile,
&current_blob,
);
// read HEAD for the fragment files
complete_key_versions(&repo, &head_ref, &mut current_i18n, root_dir, &asset_path);
// read HEAD for the subfolders
for sub_directory in current_loc.sub_directories.iter() {
let subdir_path = &asset_path.join(sub_directory);
complete_key_versions(&repo, &head_ref, &mut current_i18n, root_dir, &subdir_path);
}
// Gather state of current localization
let mut current_i18n = gather_state(
&current_loc,
&current_blob,
ref_manifest,
root_dir,
lang_dir,
repo,
head_ref,
);
// Comparing with reference localization
fill_info(&mut current_i18n, &i18n_references, repo, &relfile);
let mut state_map = LocalizationAnalysis::default();
let result = gather_results(current_i18n, &mut state_map);
print_translation_stats(
&i18n_references,
&result,
&mut state_map,
be_verbose,
relfile,
ref_manifest,
);
Some(result)
}
fn fill_info(
current_i18n: &mut HashMap<String, LocalizationEntryState>,
i18n_references: &HashMap<String, LocalizationEntryState>,
repo: &git2::Repository,
relfile: &Path,
) {
for (ref_key, ref_state) in i18n_references.iter() { for (ref_key, ref_state) in i18n_references.iter() {
match current_i18n.get_mut(ref_key) { match current_i18n.get_mut(ref_key) {
Some(state) => { Some(state) => {
@ -505,109 +401,111 @@ fn test_localization_directory(
{ {
state.state = LocalizationState::Unused; state.state = LocalizationState::Unused;
} }
}
let keys: Vec<&String> = current_i18n.keys().collect(); fn gather_results(
let mut state_map: HashMap<LocalizationState, Vec<(&String, Option<git2::Oid>)>> = current_i18n: HashMap<String, LocalizationEntryState>,
HashMap::new(); state_map: &mut LocalizationAnalysis,
state_map.insert(LocalizationState::Outdated, Vec::new()); ) -> LocalizationStats {
state_map.insert(LocalizationState::NotFound, Vec::new());
state_map.insert(LocalizationState::Unknown, Vec::new());
state_map.insert(LocalizationState::Unused, Vec::new());
let current_i18n_entry_count = current_i18n.len();
let mut uptodate_entries = 0; let mut uptodate_entries = 0;
let mut outdated_entries = 0; let mut outdated_entries = 0;
let mut unused_entries = 0; let mut unused_entries = 0;
let mut notfound_entries = 0; let mut notfound_entries = 0;
let mut unknown_entries = 0; let mut unknown_entries = 0;
let keys: Vec<&String> = current_i18n.keys().collect();
for key in keys { for key in keys {
let entry = current_i18n.get(key).unwrap(); let entry = current_i18n.get(key).unwrap();
match entry.state {
LocalizationState::Outdated => outdated_entries += 1,
LocalizationState::NotFound => notfound_entries += 1,
LocalizationState::Unknown => unknown_entries += 1,
LocalizationState::Unused => unused_entries += 1,
LocalizationState::UpToDate => uptodate_entries += 1,
};
if entry.state != LocalizationState::UpToDate { if entry.state != LocalizationState::UpToDate {
let state_keys = state_map let state_keys = state_map
.get_mut(&entry.state) .get_mut(entry.state)
.expect("vectors must be added"); .expect("vectors must be added");
state_keys.push((key, entry.commit_id)); state_keys.push((key.to_owned(), entry.commit_id));
match entry.state {
LocalizationState::Outdated => outdated_entries += 1,
LocalizationState::NotFound => notfound_entries += 1,
LocalizationState::Unknown => unknown_entries += 1,
LocalizationState::Unused => unused_entries += 1,
LocalizationState::UpToDate => unreachable!(),
};
} else {
uptodate_entries += 1;
} }
} }
// Display
println!(
"\n{:60}| {:40} | {:40}\n",
"Key name",
relfile.to_str().unwrap(),
ref_i18n_path.to_str().unwrap()
);
for (state, mut lines) in state_map {
if lines.is_empty() {
continue;
}
println!("\n\t[{:?}]", state);
lines.sort();
for line in lines {
println!(
"{:60}| {:40} | {:40}",
line.0,
line.1
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_string()),
i18n_references
.get(line.0)
.map(|s| s.commit_id)
.flatten()
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_string()),
);
}
}
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
uptodate_entries, outdated_entries, unused_entries, notfound_entries, unknown_entries
);
// Calculate key count that actually matter for the status of the translation // Calculate key count that actually matter for the status of the translation
// Unused entries don't break the game // Unused entries don't break the game
let current_i18n_entry_count = current_i18n.len();
let real_entry_count = current_i18n_entry_count - unused_entries; let real_entry_count = current_i18n_entry_count - unused_entries;
let uptodate_percent = (uptodate_entries as f32 / real_entry_count as f32) * 100_f32;
let outdated_percent = (outdated_entries as f32 / real_entry_count as f32) * 100_f32;
let untranslated_percent =
((notfound_entries + unknown_entries) as f32 / real_entry_count as f32) * 100_f32;
println!( LocalizationStats {
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
let result = FindLocalization {
uptodate_entries, uptodate_entries,
unused_entries, unused_entries,
outdated_entries, outdated_entries,
notfound_entries, notfound_entries,
errors: unknown_entries, errors: unknown_entries,
real_entry_count, real_entry_count,
}; }
Some(result) }
fn print_translation_stats(
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
stats: &LocalizationStats,
state_map: &mut LocalizationAnalysis,
be_verbose: bool,
relfile: PathBuf,
ref_manifest: &Path,
) {
let uptodate_percent =
(stats.uptodate_entries as f32 / stats.real_entry_count as f32) * 100_f32;
let outdated_percent =
(stats.outdated_entries as f32 / stats.real_entry_count as f32) * 100_f32;
let untranslated_percent =
((stats.errors + stats.errors) as f32 / stats.real_entry_count as f32) * 100_f32;
// Display
if be_verbose {
println!(
"\n{:60}| {:40} | {:40}",
"Key name",
relfile.to_str().unwrap(),
ref_manifest.to_str().unwrap(),
);
} else {
println!("\nKey name");
}
state_map.show(LocalizationState::NotFound, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unused, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Outdated, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unknown, be_verbose, ref_i18n_map);
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
} }
/// Test one language /// Test one language
/// `code` - name of the directory in assets (de_DE for example) /// `code` - name of the directory in assets (de_DE for example)
/// `root_dir` - absolute path to main repo /// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is /// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n') /// 'assets/voxygen/i18n')
pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Path) { pub fn test_specific_localization(
code: &str,
root_dir: &Path,
assets_path: &Path,
be_verbose: bool,
) {
// Relative paths from root of repo to assets // Relative paths from root of repo to assets
let ref_lang_dir = asset_path.join(REFERENCE_LANG); let ref_lang_dir = assets_path.join(REFERENCE_LANG);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron"); let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
// Initialize Git objects // Initialize Git objects
@ -619,39 +517,26 @@ pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Pa
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest); let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(ref_manifest_blob.content()) let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it"); .expect("Expect to parse reference i18n RON file, can't proceed without it");
let mut i18n_references: HashMap<String, LocalizationEntryState> = generate_key_version(
&repo,
&LocalizationFragment::from(loc.clone()),
&ref_manifest,
&ref_manifest_blob,
);
// Gathering info about keys from reference language // Gathering info about keys from reference language
complete_key_versions( let reference_i18n = gather_state(
&repo, &loc,
&head_ref, &ref_manifest_blob,
&mut i18n_references, &ref_manifest,
root_dir, root_dir,
&ref_lang_dir, &ref_lang_dir,
&repo,
&head_ref,
); );
for sub_directory in loc.sub_directories.iter() {
let subdir_path = &ref_lang_dir.join(sub_directory);
complete_key_versions(
&repo,
&head_ref,
&mut i18n_references,
root_dir,
&subdir_path,
);
}
// Testing how specific language is localized // Testing how specific language is localized
let dir = asset_path.join(code); let dir = assets_path.join(code);
test_localization_directory( test_localization_directory(
&dir, &dir,
root_dir, root_dir,
&ref_manifest, &ref_manifest,
&i18n_references, &reference_i18n,
be_verbose,
&repo, &repo,
&head_ref, &head_ref,
); );
@ -659,20 +544,17 @@ pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Pa
/// Test all localizations /// Test all localizations
/// `root_dir` - absolute path to main repo /// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is /// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n') /// 'assets/voxygen/i18n')
pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) { pub fn test_all_localizations(root_dir: &Path, assets_path: &Path, be_verbose: bool) {
let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG); let ref_lang_dir = assets_path.join(REFERENCE_LANG);
let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron"); let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
if !root_dir.join(&ref_i18n_dir_path).is_dir() { if !root_dir.join(&ref_lang_dir).is_dir() {
panic!( panic!("Reference language folder not found {:?}", &ref_lang_dir)
"Reference language folder not found {:?}",
&ref_i18n_dir_path
)
} }
if !root_dir.join(&ref_i18n_path).is_file() { if !root_dir.join(&ref_manifest).is_file() {
panic!("Reference language file not found {:?}", &ref_i18n_path) panic!("Reference language file not found {:?}", &ref_manifest)
} }
// Initialize Git objects // Initialize Git objects
@ -681,46 +563,32 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
let head_ref = repo.head().expect("Impossible to get the HEAD reference"); let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Read HEAD for the reference language file // Read HEAD for the reference language file
let i18n_ref_blob = read_file_from_path(&repo, &head_ref, &ref_i18n_path); let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(i18n_ref_blob.content()) let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it"); .expect("Expect to parse reference i18n RON file, can't proceed without it");
let mut i18n_references: HashMap<String, LocalizationEntryState> = generate_key_version(
&repo,
&LocalizationFragment::from(loc.clone()),
&ref_i18n_path,
&i18n_ref_blob,
);
// Gathering info about keys from reference language // Gathering info about keys from reference language
complete_key_versions( let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
&repo, &repo,
&head_ref, &head_ref,
&mut i18n_references,
root_dir,
&ref_i18n_dir_path,
); );
// read HEAD for the subfolders
for sub_directory in loc.sub_directories.iter() {
let subdir_path = &ref_i18n_dir_path.join(sub_directory);
complete_key_versions(
&repo,
&head_ref,
&mut i18n_references,
root_dir,
&subdir_path,
);
}
// Compare to other reference files // Compare to other reference files
let i18n_directories = i18n_directories(&root_dir.join(asset_path)); let i18n_directories = i18n_directories(&root_dir.join(assets_path));
let mut i18n_entry_counts: HashMap<PathBuf, FindLocalization> = HashMap::new(); let mut i18n_entry_counts: HashMap<PathBuf, LocalizationStats> = HashMap::new();
for dir in &i18n_directories { for dir in &i18n_directories {
let rel_dir = dir.strip_prefix(root_dir).unwrap(); let rel_dir = dir.strip_prefix(root_dir).unwrap();
let result = test_localization_directory( let result = test_localization_directory(
rel_dir, rel_dir,
root_dir, root_dir,
&ref_i18n_path, &ref_manifest,
&i18n_references, &reference_i18n,
be_verbose,
&repo, &repo,
&head_ref, &head_ref,
); );
@ -729,6 +597,10 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
} }
} }
print_overall_stats(i18n_entry_counts);
}
fn print_overall_stats(i18n_entry_counts: HashMap<PathBuf, LocalizationStats>) {
let mut overall_uptodate_entry_count = 0; let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0; let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0; let mut overall_untranslated_entry_count = 0;
@ -742,8 +614,11 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
"", "up-to-date", "outdated", "untranslated", "unused", "errors", "", "up-to-date", "outdated", "untranslated", "unused", "errors",
); );
for (path, test_result) in i18n_entry_counts { let mut i18n_stats: Vec<(&PathBuf, &LocalizationStats)> = i18n_entry_counts.iter().collect();
let FindLocalization { i18n_stats.sort_by_key(|(_, result)| result.notfound_entries);
for (path, test_result) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate, uptodate_entries: uptodate,
outdated_entries: outdated, outdated_entries: outdated,
unused_entries: unused, unused_entries: unused,

View File

@ -1,22 +1,51 @@
use std::{env::args, path::Path, vec::Vec}; use clap::{App, Arg};
use veloren_i18n::analysis; use std::path::Path;
use veloren_i18n::{analysis, verification};
fn main() { fn main() {
let cli: Vec<String> = args().collect(); let matches = App::new("i18n-check")
.version("0.1.0")
.author("juliancoffee <lightdarkdaughter@gmail.com>")
.about("Test veloren localizations")
.arg(
Arg::with_name("CODE")
.required(false)
.help("Run diagnostic for specific language code (de_DE as example)"),
)
.arg(
Arg::with_name("verify")
.long("verify")
.help("verify all localizations"),
)
.arg(
Arg::with_name("test")
.long("test")
.help("test all localizations"),
)
.arg(
Arg::with_name("verbose")
.short("v")
.long("verbose")
.help("print additional information"),
)
.get_matches();
// Generate paths // Generate paths
let curr_dir = std::env::current_dir().unwrap(); let root = veloren_i18n::find_root().expect("Failed to find root of repository");
let root = curr_dir.parent().unwrap().parent().unwrap();
let asset_path = Path::new("assets/voxygen/i18n/"); let asset_path = Path::new("assets/voxygen/i18n/");
for (i, arg) in cli.iter().enumerate() {
match arg.as_str() { if let Some(code) = matches.value_of("CODE") {
"--all" => analysis::test_all_localizations(root, asset_path), analysis::test_specific_localization(
"--verify" => analysis::verify_all_localizations(root, asset_path), code,
"--lang" => { &root,
let code = cli[i + 1].clone(); &asset_path,
analysis::test_specific_localization(code, root, asset_path); matches.is_present("verbose"),
}, );
_ => continue, }
} if matches.is_present("test") {
analysis::test_all_localizations(&root, &asset_path, matches.is_present("verbose"));
}
if matches.is_present("verify") {
verification::verify_all_localizations(&root, &asset_path);
} }
} }

View File

@ -2,6 +2,10 @@ use crate::assets::{self, AssetExt, AssetGuard, AssetHandle};
use deunicode::deunicode; use deunicode::deunicode;
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{
fs,
path::{Path, PathBuf},
};
use tracing::warn; use tracing::warn;
/// The reference language, aka the more up-to-date localization data. /// The reference language, aka the more up-to-date localization data.
@ -49,11 +53,11 @@ pub type Fonts = HashMap<String, Font>;
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub(crate) struct RawLocalization { pub(crate) struct RawLocalization {
pub(crate) sub_directories: Vec<String>, pub(crate) sub_directories: Vec<String>,
pub(crate) string_map: HashMap<String, String>,
pub(crate) vector_map: HashMap<String, Vec<String>>,
pub(crate) convert_utf8_to_ascii: bool, pub(crate) convert_utf8_to_ascii: bool,
pub(crate) fonts: Fonts, pub(crate) fonts: Fonts,
pub(crate) metadata: LanguageMetadata, pub(crate) metadata: LanguageMetadata,
pub(crate) string_map: HashMap<String, String>,
pub(crate) vector_map: HashMap<String, Vec<String>>,
} }
/// Store internationalization data /// Store internationalization data
@ -85,7 +89,7 @@ struct Language {
/// Store internationalization maps /// Store internationalization maps
/// These structs are meant to be merged into a Language /// These structs are meant to be merged into a Language
#[derive(Debug, PartialEq, Serialize, Deserialize)] #[derive(Debug, PartialEq, Serialize, Deserialize)]
struct LocalizationFragment { pub(crate) struct LocalizationFragment {
/// A map storing the localized texts /// A map storing the localized texts
/// ///
/// Localized content can be accessed using a String key. /// Localized content can be accessed using a String key.
@ -100,7 +104,7 @@ struct LocalizationFragment {
impl Language { impl Language {
/// Get a localized text from the given key /// Get a localized text from the given key
pub fn get<'a>(&'a self, key: &'a str) -> Option<&str> { pub fn get<'a>(&'a self, key: &'a str) -> Option<&str> {
self.string_map.get(key).map(|s| s.as_str()) self.string_map.get(key).map(String::as_str)
} }
/// Get a variation of localized text from the given key /// Get a variation of localized text from the given key
@ -110,16 +114,13 @@ impl Language {
/// If the key is not present in the localization object /// If the key is not present in the localization object
/// then the key is returned. /// then the key is returned.
pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> Option<&str> { pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> Option<&str> {
self.vector_map self.vector_map.get(key).and_then(|v| {
.get(key) if v.is_empty() {
.map(|v| { None
if !v.is_empty() { } else {
Some(v[index as usize % v.len()].as_str()) Some(v[index as usize % v.len()].as_str())
} else { }
None })
}
})
.flatten()
} }
} }
@ -247,8 +248,7 @@ impl LocalizationGuard {
self.active.get(key).unwrap_or_else(|| { self.active.get(key).unwrap_or_else(|| {
self.fallback self.fallback
.as_ref() .as_ref()
.map(|f| f.get(key)) .and_then(|f| f.get(key))
.flatten()
.unwrap_or(key) .unwrap_or(key)
}) })
} }
@ -263,8 +263,7 @@ impl LocalizationGuard {
self.active.get_variation(key, index).unwrap_or_else(|| { self.active.get_variation(key, index).unwrap_or_else(|| {
self.fallback self.fallback
.as_ref() .as_ref()
.map(|f| f.get_variation(key, index)) .and_then(|f| f.get_variation(key, index))
.flatten()
.unwrap_or(key) .unwrap_or(key)
}) })
} }
@ -389,42 +388,66 @@ pub fn list_localizations() -> Vec<LanguageMetadata> {
/// Start hot reloading of i18n assets /// Start hot reloading of i18n assets
pub fn start_hot_reloading() { assets::start_hot_reloading(); } pub fn start_hot_reloading() { assets::start_hot_reloading(); }
/// Return path to repository by searching 10 directories back
pub fn find_root() -> Option<PathBuf> {
std::env::current_dir().map_or(None, |path| {
// If we are in the root, push path
if path.join(".git").is_dir() {
return Some(path);
}
// Search .git directory in parent directries
for ancestor in path.ancestors().take(10) {
if ancestor.join(".git").is_dir() {
return Some(ancestor.to_path_buf());
}
}
None
})
}
/// List localization directories as a `PathBuf` vector
pub fn i18n_directories(i18n_dir: &Path) -> Vec<PathBuf> {
fs::read_dir(i18n_dir)
.unwrap()
.map(|res| res.map(|e| e.path()).unwrap())
.filter(|e| e.is_dir())
.collect()
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use crate::analysis;
use std::path::Path;
// Test that localization list is loaded (not empty) // Test that localization list is loaded (not empty)
#[test] #[test]
fn test_localization_list() { fn test_localization_list() {
let list = list_localizations(); let list = super::list_localizations();
assert!(!list.is_empty()); assert!(!list.is_empty());
} }
// Test that reference language can be loaded // Test that reference language can be loaded
#[test] #[test]
fn test_localization_handle() { let _ = LocalizationHandle::load_expect(REFERENCE_LANG); } fn test_localization_handle() {
let _ = super::LocalizationHandle::load_expect(super::REFERENCE_LANG);
}
// Test to verify all languages that they are VALID and loadable, without // Test to verify all languages that they are VALID and loadable, without
// need of git just on the local assets folder // need of git just on the local assets folder
#[test] #[test]
fn verify_all_localizations() { fn verify_all_localizations() {
// Generate paths // Generate paths
let i18n_asset_path = Path::new("assets/voxygen/i18n/"); let i18n_asset_path = std::path::Path::new("assets/voxygen/i18n/");
let curr_dir = std::env::current_dir().unwrap(); let root_dir = super::find_root().expect("Failed to discover repository root");
let root_dir = curr_dir.parent().unwrap().parent().unwrap(); crate::verification::verify_all_localizations(&root_dir, &i18n_asset_path);
analysis::verify_all_localizations(&root_dir, &i18n_asset_path);
} }
// Test to verify all languages and print missing and faulty localisation // Test to verify all languages and print missing and faulty localisation
#[test] #[test]
#[ignore] #[ignore]
fn test_all_localizations() { fn test_all_localizations() {
// Options
let be_verbose = true;
// Generate paths // Generate paths
let i18n_asset_path = Path::new("assets/voxygen/i18n/"); let i18n_asset_path = std::path::Path::new("assets/voxygen/i18n/");
let curr_dir = std::env::current_dir().unwrap(); let root_dir = super::find_root().expect("Failed to discover repository root");
let root_dir = curr_dir.parent().unwrap().parent().unwrap(); crate::analysis::test_all_localizations(&root_dir, &i18n_asset_path, be_verbose);
analysis::test_all_localizations(&root_dir, &i18n_asset_path);
} }
} }

View File

@ -1,5 +1,7 @@
#[cfg(any(feature = "bin", test))]
pub mod analysis; pub mod analysis;
mod data; mod data;
pub mod verification;
use common_assets as assets; use common_assets as assets;
pub use data::*; pub use data::*;

View File

@ -0,0 +1,88 @@
use ron::de::from_reader;
use std::{fs, path::Path};
use crate::data::{
i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG,
};
fn verify_localization_directory(root_dir: &Path, directory_path: &Path) {
// Walk through each file in the directory
for i18n_file in root_dir.join(&directory_path).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() {
// Skip folders and the manifest file (which does not contain the same struct we
// want to load)
if file_type.is_file()
&& i18n_file.file_name().to_string_lossy()
!= (LANG_MANIFEST_FILE.to_string() + ".ron")
{
let full_path = i18n_file.path();
println!("-> {:?}", full_path.strip_prefix(&root_dir).unwrap());
let f = fs::File::open(&full_path).expect("Failed opening file");
let _loc: LocalizationFragment = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
full_path.to_string_lossy(),
e
);
},
};
}
}
}
}
/// Test to verify all languages that they are VALID and loadable, without
/// need of git just on the local assets folder
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG);
let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron");
assert!(
root_dir.join(&ref_i18n_dir_path).is_dir(),
"Reference language folder doesn't exist, something is wrong!"
);
assert!(
root_dir.join(&ref_i18n_path).is_file(),
"Reference language manifest file doesn't exist, something is wrong!"
);
let i18n_directories = i18n_directories(&root_dir.join(asset_path));
// This simple check ONLY guarantees that an arbitrary minimum of translation
// files exists. It's just to notice unintentional deletion of all
// files, or modifying the paths. In case you want to delete all
// language you have to adjust this number:
assert!(
i18n_directories.len() > 5,
"have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \
folder is empty?"
);
for i18n_directory in i18n_directories {
// Attempt to load the manifest file
let manifest_path = i18n_directory.join(LANG_MANIFEST_FILE.to_string() + ".ron");
println!(
"verifying {:?}",
manifest_path.strip_prefix(&root_dir).unwrap()
);
let f = fs::File::open(&manifest_path).expect("Failed opening file");
let raw_localization: RawLocalization = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
i18n_directory.to_string_lossy(),
e
);
},
};
// Walk through each files and try to load them
verify_localization_directory(root_dir, &i18n_directory);
// Walk through each subdirectories and try to load files in them
for sub_directory in raw_localization.sub_directories.iter() {
let subdir_path = &i18n_directory.join(sub_directory);
verify_localization_directory(root_dir, &subdir_path);
}
}
}