Merge branch 'juliancoffee/improve_i18n_check' into 'master'

Refactor i18n analysis code and improve UX of testing binary

See merge request veloren/veloren!2289
This commit is contained in:
Marcel 2021-05-10 22:13:01 +00:00
commit 217462805d
8 changed files with 451 additions and 426 deletions

1
Cargo.lock generated
View File

@ -5644,6 +5644,7 @@ dependencies = [
name = "veloren-i18n"
version = "0.9.0"
dependencies = [
"clap",
"deunicode",
"git2",
"hashbrown",

View File

@ -7,6 +7,7 @@ version = "0.9.0"
[[bin]]
name = "i18n-check"
required-features = ["bin"]
[dependencies]
# Assets
@ -14,8 +15,16 @@ hashbrown = { version = "0.9", features = ["serde", "nightly"] }
common-assets = {package = "veloren-common-assets", path = "../../common/assets"}
deunicode = "1.0"
serde = { version = "1.0", features = ["derive"] }
# Diagnostic
git2 = { version = "0.12", default-features = false }
ron = "0.6"
tracing = "0.1"
# Diagnostic
ron = "0.6"
git2 = { version = "0.12", default-features = false, optional = true }
# Binary
clap = { version = "2.33", features = ["suggestions"], default-features = false, optional = true }
[dev-dependencies]
git2 = { version = "0.12", default-features = false }
[features]
bin = ["git2", "clap"]

View File

@ -1,8 +1,6 @@
# Usage
Get diagnostic for specific language <br/>
`$ cargo run --bin i18n-check -- --lang <lang_code>` <br/>
Test all languages <br/>
`$ cargo run --bin i18n-check -- --all`
Verify all directories <br/>
`$ cargo run --bin i18n-check -- --verify`
`$ cargo run --features=bin -- --help` <br/>
(Or if somewhere else in the repo) <br/>
`$ cargo run -p veloren-i18n --features=bin -- --help` <br/>
For example, diagnostic for specific language <br/>
`$ cargo run -p veloren-i18n --features=bin -- <lang_code>` <br/>

View File

@ -1,136 +1,12 @@
use ron::de::{from_bytes, from_reader};
use serde::{Deserialize, Serialize};
use std::{
fs,
path::{Path, PathBuf},
};
use ron::de::from_bytes;
use std::path::{Path, PathBuf};
use crate::data::{
i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG,
};
use hashbrown::{HashMap, HashSet};
/// The reference language, aka the more up-to-date localization data. Also the
/// default language at first startup.
const REFERENCE_LANG: &str = "en";
const LANG_MANIFEST_FILE: &str = "_manifest";
/// How a language can be described
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
struct LanguageMetadata {
/// A human friendly language name (e.g. "English (US)")
language_name: String,
/// A short text identifier for this language (e.g. "en_US")
///
/// On the opposite of `language_name` that can change freely,
/// `language_identifier` value shall be stable in time as it
/// is used by setting components to store the language
/// selected by the user.
language_identifier: String,
}
/// Store font metadata
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
struct Font {
/// Key to retrieve the font in the asset system
asset_key: String,
/// Scale ratio to resize the UI text dynamicly
scale_ratio: f32,
}
/// Store font metadata
type Fonts = HashMap<String, Font>;
/// Raw localization data, expect the strings to not be loaded here
/// However, metadata informations are correct
/// See `Localization` for more info on each attributes
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
struct RawLocalization {
sub_directories: Vec<String>,
string_map: HashMap<String, String>,
vector_map: HashMap<String, Vec<String>>,
convert_utf8_to_ascii: bool,
fonts: Fonts,
metadata: LanguageMetadata,
}
/// Store internationalization data
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct Localization {
/// A list of subdirectories to lookup for localization files
sub_directories: Vec<String>,
/// A map storing the localized texts
///
/// Localized content can be accessed using a String key.
string_map: HashMap<String, String>,
/// A map for storing variations of localized texts, for example multiple
/// ways of saying "Help, I'm under attack". Used primarily for npc
/// dialogue.
vector_map: HashMap<String, Vec<String>>,
/// Whether to convert the input text encoded in UTF-8
/// into a ASCII version by using the `deunicode` crate.
convert_utf8_to_ascii: bool,
/// Font configuration is stored here
fonts: Fonts,
metadata: LanguageMetadata,
}
/// Store internationalization maps
/// These structs are meant to be merged into a Localization
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct LocalizationFragment {
/// A map storing the localized texts
///
/// Localized content can be accessed using a String key.
string_map: HashMap<String, String>,
/// A map for storing variations of localized texts, for example multiple
/// ways of saying "Help, I'm under attack". Used primarily for npc
/// dialogue.
vector_map: HashMap<String, Vec<String>>,
}
impl Localization {}
impl From<RawLocalization> for Localization {
fn from(raw: RawLocalization) -> Self {
Self {
sub_directories: raw.sub_directories,
string_map: raw.string_map,
vector_map: raw.vector_map,
convert_utf8_to_ascii: raw.convert_utf8_to_ascii,
fonts: raw.fonts,
metadata: raw.metadata,
}
}
}
impl From<RawLocalization> for LocalizationFragment {
fn from(raw: RawLocalization) -> Self {
Self {
string_map: raw.string_map,
vector_map: raw.vector_map,
}
}
}
#[derive(Clone, Debug)]
struct LocalizationList(Vec<LanguageMetadata>);
/// List localization directories as a PathBuf vector
fn i18n_directories(i18n_dir: &Path) -> Vec<PathBuf> {
fs::read_dir(i18n_dir)
.unwrap()
.map(|res| res.map(|e| e.path()).unwrap())
.filter(|e| e.is_dir())
.collect()
}
#[derive(Eq, Hash, Debug, PartialEq)]
#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
enum LocalizationState {
UpToDate,
NotFound,
@ -140,7 +16,7 @@ enum LocalizationState {
}
#[derive(Debug, PartialEq)]
struct FindLocalization {
struct LocalizationStats {
uptodate_entries: usize,
outdated_entries: usize,
unused_entries: usize,
@ -149,7 +25,61 @@ struct FindLocalization {
real_entry_count: usize,
}
#[derive(Debug)]
#[derive(Default)]
struct LocalizationAnalysis {
notfound: Vec<(String, Option<git2::Oid>)>,
unused: Vec<(String, Option<git2::Oid>)>,
outdated: Vec<(String, Option<git2::Oid>)>,
unknown: Vec<(String, Option<git2::Oid>)>,
}
impl LocalizationAnalysis {
fn get_mut(
&mut self,
state: LocalizationState,
) -> Option<&mut Vec<(String, Option<git2::Oid>)>> {
match state {
LocalizationState::NotFound => Some(&mut self.notfound),
LocalizationState::Unused => Some(&mut self.unused),
LocalizationState::Outdated => Some(&mut self.outdated),
LocalizationState::Unknown => Some(&mut self.unknown),
_ => None,
}
}
fn show(
&mut self,
state: LocalizationState,
be_verbose: bool,
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
) {
let entries = self
.get_mut(state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state));
if entries.is_empty() {
return;
}
println!("\n\t[{:?}]", state);
entries.sort();
for (key, commit_id) in entries {
if be_verbose {
let our_commit = commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
let ref_commit = ref_i18n_map
.get(key)
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,);
} else {
println!("{}", key);
}
}
}
}
#[derive(Copy, Clone, Debug)]
struct LocalizationEntryState {
key_line: Option<usize>,
chuck_line_range: Option<(usize, usize)>,
@ -220,11 +150,9 @@ fn generate_key_version<'a>(
.keys()
.map(|k| (k.to_owned(), LocalizationEntryState::new()))
.collect();
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
// Make the file hot
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
@ -290,12 +218,12 @@ fn complete_key_versions<'a>(
head_ref: &git2::Reference,
i18n_key_versions: &mut HashMap<String, LocalizationEntryState>,
root_dir: &Path,
asset_path: &Path,
lang_dir: &Path,
) {
//TODO: review unwraps in this file
// For each file (if it's not a directory) in directory
for i18n_file in root_dir.join(&asset_path).read_dir().unwrap().flatten() {
for i18n_file in root_dir.join(&lang_dir).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() {
if file_type.is_file() {
println!("-> {:?}", i18n_file.file_name());
@ -320,106 +248,54 @@ fn complete_key_versions<'a>(
}
}
fn verify_localization_directory(root_dir: &Path, directory_path: &Path) {
// Walk through each file in the directory
for i18n_file in root_dir.join(&directory_path).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() {
// Skip folders and the manifest file (which does not contain the same struct we
// want to load)
if file_type.is_file()
&& i18n_file.file_name().to_string_lossy()
!= (LANG_MANIFEST_FILE.to_string() + ".ron")
{
let full_path = i18n_file.path();
println!("-> {:?}", full_path.strip_prefix(&root_dir).unwrap());
let f = fs::File::open(&full_path).expect("Failed opening file");
let _: LocalizationFragment = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
full_path.to_string_lossy(),
e
fn gather_state(
loc: &RawLocalization,
i18n_blob: &git2::Blob,
ref_manifest: &Path,
root_dir: &Path,
lang_dir: &Path,
repo: &git2::Repository,
head_ref: &git2::Reference,
) -> HashMap<String, LocalizationEntryState> {
// Generate map
let mut i18n_map = generate_key_version(
repo,
&LocalizationFragment::from(loc.clone()),
ref_manifest,
i18n_blob,
);
},
};
}
}
}
// Gathering info about keys from language
complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, lang_dir);
// read HEAD for the subfolders
for sub_directory in loc.sub_directories.iter() {
let subdir_path = &lang_dir.join(sub_directory);
complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, subdir_path);
}
/// Test to verify all languages that they are VALID and loadable, without
/// need of git just on the local assets folder
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG);
let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron");
assert!(
root_dir.join(&ref_i18n_dir_path).is_dir(),
"Reference language folder doesn't exist, something is wrong!"
);
assert!(
root_dir.join(&ref_i18n_path).is_file(),
"Reference language manifest file doesn't exist, something is wrong!"
);
let i18n_directories = i18n_directories(&root_dir.join(asset_path));
// This simple check ONLY guarantees that an arbitrary minimum of translation
// files exists. It's just to notice unintentional deletion of all
// files, or modifying the paths. In case you want to delete all
// language you have to adjust this number:
assert!(
i18n_directories.len() > 5,
"have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \
folder is empty?"
);
for i18n_directory in i18n_directories {
// Attempt to load the manifest file
let manifest_path = i18n_directory.join(LANG_MANIFEST_FILE.to_string() + ".ron");
println!(
"verifying {:?}",
manifest_path.strip_prefix(&root_dir).unwrap()
);
let f = fs::File::open(&manifest_path).expect("Failed opening file");
let raw_localization: RawLocalization = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
i18n_directory.to_string_lossy(),
e
);
},
};
// Walk through each files and try to load them
verify_localization_directory(root_dir, &i18n_directory);
// Walk through each subdirectories and try to load files in them
for sub_directory in raw_localization.sub_directories.iter() {
let subdir_path = &i18n_directory.join(sub_directory);
verify_localization_directory(root_dir, &subdir_path);
}
}
i18n_map
}
// Helper function to test localization directory
// `asset_path` - path to localization directory. Relative from root of the
// `lang_dir` - path to localization directory. Relative from root of the
// repo.
// `root_dir` - absolute path to repo
// `ref_i18n_path` - path to reference manifest
// `ref_manifest` - path to reference manifest
// `i18n_references` - keys from reference language
// `repo` - git object for main repo
// `head_ref` - HEAD
fn test_localization_directory(
asset_path: &Path,
lang_dir: &Path,
root_dir: &Path,
ref_i18n_path: &Path,
ref_manifest: &Path,
i18n_references: &HashMap<String, LocalizationEntryState>,
be_verbose: bool,
repo: &git2::Repository,
head_ref: &git2::Reference,
) -> Option<FindLocalization> {
let relfile = asset_path.join(&(LANG_MANIFEST_FILE.to_string() + ".ron"));
if relfile == ref_i18n_path {
) -> Option<LocalizationStats> {
let relfile = lang_dir.join(&(LANG_MANIFEST_FILE.to_string() + ".ron"));
if relfile == ref_manifest {
return None;
}
println!("\n-----------------------------------");
@ -439,20 +315,40 @@ fn test_localization_directory(
return None;
},
};
let mut current_i18n = generate_key_version(
&repo,
&LocalizationFragment::from(current_loc.clone()),
&relfile,
// Gather state of current localization
let mut current_i18n = gather_state(
&current_loc,
&current_blob,
ref_manifest,
root_dir,
lang_dir,
repo,
head_ref,
);
// read HEAD for the fragment files
complete_key_versions(&repo, &head_ref, &mut current_i18n, root_dir, &asset_path);
// read HEAD for the subfolders
for sub_directory in current_loc.sub_directories.iter() {
let subdir_path = &asset_path.join(sub_directory);
complete_key_versions(&repo, &head_ref, &mut current_i18n, root_dir, &subdir_path);
// Comparing with reference localization
fill_info(&mut current_i18n, &i18n_references, repo, &relfile);
let mut state_map = LocalizationAnalysis::default();
let result = gather_results(current_i18n, &mut state_map);
print_translation_stats(
&i18n_references,
&result,
&mut state_map,
be_verbose,
relfile,
ref_manifest,
);
Some(result)
}
fn fill_info(
current_i18n: &mut HashMap<String, LocalizationEntryState>,
i18n_references: &HashMap<String, LocalizationEntryState>,
repo: &git2::Repository,
relfile: &Path,
) {
for (ref_key, ref_state) in i18n_references.iter() {
match current_i18n.get_mut(ref_key) {
Some(state) => {
@ -505,109 +401,111 @@ fn test_localization_directory(
{
state.state = LocalizationState::Unused;
}
}
let keys: Vec<&String> = current_i18n.keys().collect();
let mut state_map: HashMap<LocalizationState, Vec<(&String, Option<git2::Oid>)>> =
HashMap::new();
state_map.insert(LocalizationState::Outdated, Vec::new());
state_map.insert(LocalizationState::NotFound, Vec::new());
state_map.insert(LocalizationState::Unknown, Vec::new());
state_map.insert(LocalizationState::Unused, Vec::new());
let current_i18n_entry_count = current_i18n.len();
fn gather_results(
current_i18n: HashMap<String, LocalizationEntryState>,
state_map: &mut LocalizationAnalysis,
) -> LocalizationStats {
let mut uptodate_entries = 0;
let mut outdated_entries = 0;
let mut unused_entries = 0;
let mut notfound_entries = 0;
let mut unknown_entries = 0;
let keys: Vec<&String> = current_i18n.keys().collect();
for key in keys {
let entry = current_i18n.get(key).unwrap();
if entry.state != LocalizationState::UpToDate {
let state_keys = state_map
.get_mut(&entry.state)
.expect("vectors must be added");
state_keys.push((key, entry.commit_id));
match entry.state {
LocalizationState::Outdated => outdated_entries += 1,
LocalizationState::NotFound => notfound_entries += 1,
LocalizationState::Unknown => unknown_entries += 1,
LocalizationState::Unused => unused_entries += 1,
LocalizationState::UpToDate => unreachable!(),
LocalizationState::UpToDate => uptodate_entries += 1,
};
} else {
uptodate_entries += 1;
if entry.state != LocalizationState::UpToDate {
let state_keys = state_map
.get_mut(entry.state)
.expect("vectors must be added");
state_keys.push((key.to_owned(), entry.commit_id));
}
}
// Display
println!(
"\n{:60}| {:40} | {:40}\n",
"Key name",
relfile.to_str().unwrap(),
ref_i18n_path.to_str().unwrap()
);
for (state, mut lines) in state_map {
if lines.is_empty() {
continue;
}
println!("\n\t[{:?}]", state);
lines.sort();
for line in lines {
println!(
"{:60}| {:40} | {:40}",
line.0,
line.1
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_string()),
i18n_references
.get(line.0)
.map(|s| s.commit_id)
.flatten()
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_string()),
);
}
}
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
uptodate_entries, outdated_entries, unused_entries, notfound_entries, unknown_entries
);
// Calculate key count that actually matter for the status of the translation
// Unused entries don't break the game
let current_i18n_entry_count = current_i18n.len();
let real_entry_count = current_i18n_entry_count - unused_entries;
let uptodate_percent = (uptodate_entries as f32 / real_entry_count as f32) * 100_f32;
let outdated_percent = (outdated_entries as f32 / real_entry_count as f32) * 100_f32;
let untranslated_percent =
((notfound_entries + unknown_entries) as f32 / real_entry_count as f32) * 100_f32;
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
let result = FindLocalization {
LocalizationStats {
uptodate_entries,
unused_entries,
outdated_entries,
notfound_entries,
errors: unknown_entries,
real_entry_count,
};
Some(result)
}
}
fn print_translation_stats(
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
stats: &LocalizationStats,
state_map: &mut LocalizationAnalysis,
be_verbose: bool,
relfile: PathBuf,
ref_manifest: &Path,
) {
let uptodate_percent =
(stats.uptodate_entries as f32 / stats.real_entry_count as f32) * 100_f32;
let outdated_percent =
(stats.outdated_entries as f32 / stats.real_entry_count as f32) * 100_f32;
let untranslated_percent =
((stats.errors + stats.errors) as f32 / stats.real_entry_count as f32) * 100_f32;
// Display
if be_verbose {
println!(
"\n{:60}| {:40} | {:40}",
"Key name",
relfile.to_str().unwrap(),
ref_manifest.to_str().unwrap(),
);
} else {
println!("\nKey name");
}
state_map.show(LocalizationState::NotFound, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unused, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Outdated, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unknown, be_verbose, ref_i18n_map);
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
}
/// Test one language
/// `code` - name of the directory in assets (de_DE for example)
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is
/// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Path) {
pub fn test_specific_localization(
code: &str,
root_dir: &Path,
assets_path: &Path,
be_verbose: bool,
) {
// Relative paths from root of repo to assets
let ref_lang_dir = asset_path.join(REFERENCE_LANG);
let ref_lang_dir = assets_path.join(REFERENCE_LANG);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
// Initialize Git objects
@ -619,39 +517,26 @@ pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Pa
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it");
let mut i18n_references: HashMap<String, LocalizationEntryState> = generate_key_version(
&repo,
&LocalizationFragment::from(loc.clone()),
&ref_manifest,
&ref_manifest_blob,
);
// Gathering info about keys from reference language
complete_key_versions(
&repo,
&head_ref,
&mut i18n_references,
let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
);
for sub_directory in loc.sub_directories.iter() {
let subdir_path = &ref_lang_dir.join(sub_directory);
complete_key_versions(
&repo,
&head_ref,
&mut i18n_references,
root_dir,
&subdir_path,
);
}
// Testing how specific language is localized
let dir = asset_path.join(code);
let dir = assets_path.join(code);
test_localization_directory(
&dir,
root_dir,
&ref_manifest,
&i18n_references,
&reference_i18n,
be_verbose,
&repo,
&head_ref,
);
@ -659,20 +544,17 @@ pub fn test_specific_localization(code: String, root_dir: &Path, asset_path: &Pa
/// Test all localizations
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is
/// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG);
let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron");
pub fn test_all_localizations(root_dir: &Path, assets_path: &Path, be_verbose: bool) {
let ref_lang_dir = assets_path.join(REFERENCE_LANG);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
if !root_dir.join(&ref_i18n_dir_path).is_dir() {
panic!(
"Reference language folder not found {:?}",
&ref_i18n_dir_path
)
if !root_dir.join(&ref_lang_dir).is_dir() {
panic!("Reference language folder not found {:?}", &ref_lang_dir)
}
if !root_dir.join(&ref_i18n_path).is_file() {
panic!("Reference language file not found {:?}", &ref_i18n_path)
if !root_dir.join(&ref_manifest).is_file() {
panic!("Reference language file not found {:?}", &ref_manifest)
}
// Initialize Git objects
@ -681,46 +563,32 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Read HEAD for the reference language file
let i18n_ref_blob = read_file_from_path(&repo, &head_ref, &ref_i18n_path);
let loc: RawLocalization = from_bytes(i18n_ref_blob.content())
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it");
let mut i18n_references: HashMap<String, LocalizationEntryState> = generate_key_version(
&repo,
&LocalizationFragment::from(loc.clone()),
&ref_i18n_path,
&i18n_ref_blob,
);
// Gathering info about keys from reference language
complete_key_versions(
let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
&repo,
&head_ref,
&mut i18n_references,
root_dir,
&ref_i18n_dir_path,
);
// read HEAD for the subfolders
for sub_directory in loc.sub_directories.iter() {
let subdir_path = &ref_i18n_dir_path.join(sub_directory);
complete_key_versions(
&repo,
&head_ref,
&mut i18n_references,
root_dir,
&subdir_path,
);
}
// Compare to other reference files
let i18n_directories = i18n_directories(&root_dir.join(asset_path));
let mut i18n_entry_counts: HashMap<PathBuf, FindLocalization> = HashMap::new();
let i18n_directories = i18n_directories(&root_dir.join(assets_path));
let mut i18n_entry_counts: HashMap<PathBuf, LocalizationStats> = HashMap::new();
for dir in &i18n_directories {
let rel_dir = dir.strip_prefix(root_dir).unwrap();
let result = test_localization_directory(
rel_dir,
root_dir,
&ref_i18n_path,
&i18n_references,
&ref_manifest,
&reference_i18n,
be_verbose,
&repo,
&head_ref,
);
@ -729,6 +597,10 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
}
}
print_overall_stats(i18n_entry_counts);
}
fn print_overall_stats(i18n_entry_counts: HashMap<PathBuf, LocalizationStats>) {
let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0;
@ -742,8 +614,11 @@ pub fn test_all_localizations(root_dir: &Path, asset_path: &Path) {
"", "up-to-date", "outdated", "untranslated", "unused", "errors",
);
for (path, test_result) in i18n_entry_counts {
let FindLocalization {
let mut i18n_stats: Vec<(&PathBuf, &LocalizationStats)> = i18n_entry_counts.iter().collect();
i18n_stats.sort_by_key(|(_, result)| result.notfound_entries);
for (path, test_result) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate,
outdated_entries: outdated,
unused_entries: unused,

View File

@ -1,22 +1,51 @@
use std::{env::args, path::Path, vec::Vec};
use veloren_i18n::analysis;
use clap::{App, Arg};
use std::path::Path;
use veloren_i18n::{analysis, verification};
fn main() {
let cli: Vec<String> = args().collect();
let matches = App::new("i18n-check")
.version("0.1.0")
.author("juliancoffee <lightdarkdaughter@gmail.com>")
.about("Test veloren localizations")
.arg(
Arg::with_name("CODE")
.required(false)
.help("Run diagnostic for specific language code (de_DE as example)"),
)
.arg(
Arg::with_name("verify")
.long("verify")
.help("verify all localizations"),
)
.arg(
Arg::with_name("test")
.long("test")
.help("test all localizations"),
)
.arg(
Arg::with_name("verbose")
.short("v")
.long("verbose")
.help("print additional information"),
)
.get_matches();
// Generate paths
let curr_dir = std::env::current_dir().unwrap();
let root = curr_dir.parent().unwrap().parent().unwrap();
let root = veloren_i18n::find_root().expect("Failed to find root of repository");
let asset_path = Path::new("assets/voxygen/i18n/");
for (i, arg) in cli.iter().enumerate() {
match arg.as_str() {
"--all" => analysis::test_all_localizations(root, asset_path),
"--verify" => analysis::verify_all_localizations(root, asset_path),
"--lang" => {
let code = cli[i + 1].clone();
analysis::test_specific_localization(code, root, asset_path);
},
_ => continue,
}
if let Some(code) = matches.value_of("CODE") {
analysis::test_specific_localization(
code,
&root,
&asset_path,
matches.is_present("verbose"),
);
}
if matches.is_present("test") {
analysis::test_all_localizations(&root, &asset_path, matches.is_present("verbose"));
}
if matches.is_present("verify") {
verification::verify_all_localizations(&root, &asset_path);
}
}

View File

@ -2,6 +2,10 @@ use crate::assets::{self, AssetExt, AssetGuard, AssetHandle};
use deunicode::deunicode;
use hashbrown::{HashMap, HashSet};
use serde::{Deserialize, Serialize};
use std::{
fs,
path::{Path, PathBuf},
};
use tracing::warn;
/// The reference language, aka the more up-to-date localization data.
@ -49,11 +53,11 @@ pub type Fonts = HashMap<String, Font>;
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub(crate) struct RawLocalization {
pub(crate) sub_directories: Vec<String>,
pub(crate) string_map: HashMap<String, String>,
pub(crate) vector_map: HashMap<String, Vec<String>>,
pub(crate) convert_utf8_to_ascii: bool,
pub(crate) fonts: Fonts,
pub(crate) metadata: LanguageMetadata,
pub(crate) string_map: HashMap<String, String>,
pub(crate) vector_map: HashMap<String, Vec<String>>,
}
/// Store internationalization data
@ -85,7 +89,7 @@ struct Language {
/// Store internationalization maps
/// These structs are meant to be merged into a Language
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct LocalizationFragment {
pub(crate) struct LocalizationFragment {
/// A map storing the localized texts
///
/// Localized content can be accessed using a String key.
@ -100,7 +104,7 @@ struct LocalizationFragment {
impl Language {
/// Get a localized text from the given key
pub fn get<'a>(&'a self, key: &'a str) -> Option<&str> {
self.string_map.get(key).map(|s| s.as_str())
self.string_map.get(key).map(String::as_str)
}
/// Get a variation of localized text from the given key
@ -110,16 +114,13 @@ impl Language {
/// If the key is not present in the localization object
/// then the key is returned.
pub fn get_variation<'a>(&'a self, key: &'a str, index: u16) -> Option<&str> {
self.vector_map
.get(key)
.map(|v| {
if !v.is_empty() {
Some(v[index as usize % v.len()].as_str())
} else {
self.vector_map.get(key).and_then(|v| {
if v.is_empty() {
None
} else {
Some(v[index as usize % v.len()].as_str())
}
})
.flatten()
}
}
@ -247,8 +248,7 @@ impl LocalizationGuard {
self.active.get(key).unwrap_or_else(|| {
self.fallback
.as_ref()
.map(|f| f.get(key))
.flatten()
.and_then(|f| f.get(key))
.unwrap_or(key)
})
}
@ -263,8 +263,7 @@ impl LocalizationGuard {
self.active.get_variation(key, index).unwrap_or_else(|| {
self.fallback
.as_ref()
.map(|f| f.get_variation(key, index))
.flatten()
.and_then(|f| f.get_variation(key, index))
.unwrap_or(key)
})
}
@ -389,42 +388,66 @@ pub fn list_localizations() -> Vec<LanguageMetadata> {
/// Start hot reloading of i18n assets
pub fn start_hot_reloading() { assets::start_hot_reloading(); }
/// Return path to repository by searching 10 directories back
pub fn find_root() -> Option<PathBuf> {
std::env::current_dir().map_or(None, |path| {
// If we are in the root, push path
if path.join(".git").is_dir() {
return Some(path);
}
// Search .git directory in parent directries
for ancestor in path.ancestors().take(10) {
if ancestor.join(".git").is_dir() {
return Some(ancestor.to_path_buf());
}
}
None
})
}
/// List localization directories as a `PathBuf` vector
pub fn i18n_directories(i18n_dir: &Path) -> Vec<PathBuf> {
fs::read_dir(i18n_dir)
.unwrap()
.map(|res| res.map(|e| e.path()).unwrap())
.filter(|e| e.is_dir())
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis;
use std::path::Path;
// Test that localization list is loaded (not empty)
#[test]
fn test_localization_list() {
let list = list_localizations();
let list = super::list_localizations();
assert!(!list.is_empty());
}
// Test that reference language can be loaded
#[test]
fn test_localization_handle() { let _ = LocalizationHandle::load_expect(REFERENCE_LANG); }
fn test_localization_handle() {
let _ = super::LocalizationHandle::load_expect(super::REFERENCE_LANG);
}
// Test to verify all languages that they are VALID and loadable, without
// need of git just on the local assets folder
#[test]
fn verify_all_localizations() {
// Generate paths
let i18n_asset_path = Path::new("assets/voxygen/i18n/");
let curr_dir = std::env::current_dir().unwrap();
let root_dir = curr_dir.parent().unwrap().parent().unwrap();
analysis::verify_all_localizations(&root_dir, &i18n_asset_path);
let i18n_asset_path = std::path::Path::new("assets/voxygen/i18n/");
let root_dir = super::find_root().expect("Failed to discover repository root");
crate::verification::verify_all_localizations(&root_dir, &i18n_asset_path);
}
// Test to verify all languages and print missing and faulty localisation
#[test]
#[ignore]
fn test_all_localizations() {
// Options
let be_verbose = true;
// Generate paths
let i18n_asset_path = Path::new("assets/voxygen/i18n/");
let curr_dir = std::env::current_dir().unwrap();
let root_dir = curr_dir.parent().unwrap().parent().unwrap();
analysis::test_all_localizations(&root_dir, &i18n_asset_path);
let i18n_asset_path = std::path::Path::new("assets/voxygen/i18n/");
let root_dir = super::find_root().expect("Failed to discover repository root");
crate::analysis::test_all_localizations(&root_dir, &i18n_asset_path, be_verbose);
}
}

View File

@ -1,5 +1,7 @@
#[cfg(any(feature = "bin", test))]
pub mod analysis;
mod data;
pub mod verification;
use common_assets as assets;
pub use data::*;

View File

@ -0,0 +1,88 @@
use ron::de::from_reader;
use std::{fs, path::Path};
use crate::data::{
i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG,
};
fn verify_localization_directory(root_dir: &Path, directory_path: &Path) {
// Walk through each file in the directory
for i18n_file in root_dir.join(&directory_path).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() {
// Skip folders and the manifest file (which does not contain the same struct we
// want to load)
if file_type.is_file()
&& i18n_file.file_name().to_string_lossy()
!= (LANG_MANIFEST_FILE.to_string() + ".ron")
{
let full_path = i18n_file.path();
println!("-> {:?}", full_path.strip_prefix(&root_dir).unwrap());
let f = fs::File::open(&full_path).expect("Failed opening file");
let _loc: LocalizationFragment = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
full_path.to_string_lossy(),
e
);
},
};
}
}
}
}
/// Test to verify all languages that they are VALID and loadable, without
/// need of git just on the local assets folder
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
let ref_i18n_dir_path = asset_path.join(REFERENCE_LANG);
let ref_i18n_path = ref_i18n_dir_path.join(LANG_MANIFEST_FILE.to_string() + ".ron");
assert!(
root_dir.join(&ref_i18n_dir_path).is_dir(),
"Reference language folder doesn't exist, something is wrong!"
);
assert!(
root_dir.join(&ref_i18n_path).is_file(),
"Reference language manifest file doesn't exist, something is wrong!"
);
let i18n_directories = i18n_directories(&root_dir.join(asset_path));
// This simple check ONLY guarantees that an arbitrary minimum of translation
// files exists. It's just to notice unintentional deletion of all
// files, or modifying the paths. In case you want to delete all
// language you have to adjust this number:
assert!(
i18n_directories.len() > 5,
"have less than 5 translation folders, arbitrary minimum check failed. Maybe the i18n \
folder is empty?"
);
for i18n_directory in i18n_directories {
// Attempt to load the manifest file
let manifest_path = i18n_directory.join(LANG_MANIFEST_FILE.to_string() + ".ron");
println!(
"verifying {:?}",
manifest_path.strip_prefix(&root_dir).unwrap()
);
let f = fs::File::open(&manifest_path).expect("Failed opening file");
let raw_localization: RawLocalization = match from_reader(f) {
Ok(v) => v,
Err(e) => {
panic!(
"Could not parse {} RON file, error: {}",
i18n_directory.to_string_lossy(),
e
);
},
};
// Walk through each files and try to load them
verify_localization_directory(root_dir, &i18n_directory);
// Walk through each subdirectories and try to load files in them
for sub_directory in raw_localization.sub_directories.iter() {
let subdir_path = &i18n_directory.join(sub_directory);
verify_localization_directory(root_dir, &subdir_path);
}
}
}