redo i18n git analysis completly and introduce a way to export csv data

also improve the performance by caching git_graph_descendant
This commit is contained in:
Marcel Märtens 2021-07-28 15:20:09 +02:00
parent a142d59637
commit 2a314c3cab
7 changed files with 743 additions and 884 deletions

View File

@ -1,369 +1,99 @@
use crate::{
gitfragments::{
read_file_from_path, transform_fragment, LocalizationEntryState, LocalizationState,
},
i18n_directories,
raw::{self, RawFragment, RawLanguage},
stats::{
print_csv_file, print_overall_stats, print_translation_stats, LocalizationAnalysis,
LocalizationStats,
},
REFERENCE_LANG,
};
use hashbrown::{hash_map::Entry, HashMap, HashSet};
use ron::de::from_bytes; use ron::de::from_bytes;
use std::path::{Path, PathBuf}; use std::path::Path;
use crate::raw::{ /// Fill the entry State base information (except `state`) for a complete
i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG, /// language
}; fn gather_entry_state<'a>(
use hashbrown::{HashMap, HashSet};
#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
enum LocalizationState {
UpToDate,
NotFound,
Outdated,
Unknown,
Unused,
}
#[derive(Debug, PartialEq)]
struct LocalizationStats {
uptodate_entries: usize,
outdated_entries: usize,
unused_entries: usize,
notfound_entries: usize,
errors: usize,
real_entry_count: usize,
}
#[derive(Default)]
struct LocalizationAnalysis {
uptodate: Vec<(String, Option<git2::Oid>)>,
notfound: Vec<(String, Option<git2::Oid>)>,
unused: Vec<(String, Option<git2::Oid>)>,
outdated: Vec<(String, Option<git2::Oid>)>,
unknown: Vec<(String, Option<git2::Oid>)>,
}
impl LocalizationAnalysis {
fn get_mut(
&mut self,
state: LocalizationState,
) -> Option<&mut Vec<(String, Option<git2::Oid>)>> {
match state {
LocalizationState::UpToDate => Some(&mut self.uptodate),
LocalizationState::NotFound => Some(&mut self.notfound),
LocalizationState::Unused => Some(&mut self.unused),
LocalizationState::Outdated => Some(&mut self.outdated),
LocalizationState::Unknown => Some(&mut self.unknown),
}
}
fn show(
&mut self,
state: LocalizationState,
be_verbose: bool,
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
) {
let entries = self.unwrap_entries(state);
if entries.is_empty() {
return;
}
println!("\n\t[{:?}]", state);
entries.sort();
for (key, commit_id) in entries {
if be_verbose {
let our_commit = LocalizationAnalysis::create_our_commit(commit_id);
let ref_commit = ref_i18n_map
.get(key)
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,);
} else {
println!("{}", key);
}
}
}
//TODO: Add which file each faulty translation is in
fn csv(&mut self, state: LocalizationState) {
let entries = self.unwrap_entries(state);
for (key, commit_id) in entries {
let our_commit = LocalizationAnalysis::create_our_commit(commit_id);
println!(
"{},{},{},{:?},{}",
"sv", "_manifest.yml", key, state, our_commit
);
}
}
fn unwrap_entries(
&mut self,
state: LocalizationState,
) -> &mut Vec<(String, Option<git2::Oid>)> {
self.get_mut(state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state))
}
fn create_our_commit(commit_id: &mut Option<git2::Oid>) -> String {
commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned())
}
}
#[derive(Copy, Clone, Debug)]
struct LocalizationEntryState {
key_line: Option<usize>,
chuck_line_range: Option<(usize, usize)>,
commit_id: Option<git2::Oid>,
state: LocalizationState,
}
impl LocalizationEntryState {
fn new() -> LocalizationEntryState {
LocalizationEntryState {
key_line: None,
chuck_line_range: None,
commit_id: None,
state: LocalizationState::Unknown,
}
}
}
/// Returns the Git blob associated with the given reference and path
fn read_file_from_path<'a>(
repo: &'a git2::Repository, repo: &'a git2::Repository,
reference: &git2::Reference, head_ref: &git2::Reference,
path: &std::path::Path, language_identifier: &str,
) -> git2::Blob<'a> { root_path: &Path,
let tree = reference relative_i18n_root_path: &Path,
.peel_to_tree() ) -> RawLanguage<LocalizationEntryState> {
.expect("Impossible to peel HEAD to a tree object"); println!("-> {:?}", &language_identifier);
tree.get_path(path) let i18n_root_path = root_path.join(relative_i18n_root_path);
.unwrap_or_else(|_| { // load standard manifest
let manifest = raw::load_manifest(&i18n_root_path, language_identifier)
.expect("failed to load language manifest");
// transform language into LocalizationEntryState
let mut fragments = HashMap::new();
// For each file in directory
let files = raw::fragments_pathes_in_language(&i18n_root_path, language_identifier)
.expect("failed to get all files in language");
for subpath in files {
let path = relative_i18n_root_path
.join(language_identifier)
.join(&subpath);
println!(" -> {:?}", &subpath);
let i18n_blob = read_file_from_path(repo, head_ref, &path);
let fragment: RawFragment<String> = from_bytes(i18n_blob.content()).unwrap_or_else(|e| {
panic!( panic!(
"Impossible to find the file {:?} in reference {:?}", "Could not parse {} RON file, skipping: {}",
path, subpath.to_string_lossy(),
reference.name() e
) )
}) });
.to_object(repo) let frag = transform_fragment(repo, (&path, fragment), &i18n_blob);
.unwrap() fragments.insert(subpath.to_path_buf(), frag);
.peel_to_blob()
.expect("Impossible to fetch the Git object")
} }
fn correspond(line: &str, key: &str) -> bool { RawLanguage::<LocalizationEntryState> {
let pat = { manifest,
// Get left part of split fragments,
let mut begin = line }
.split(':') }
.next()
.expect("split always produces value") /// fills in the `state`
.trim() fn compare_lang_with_reference(
.chars(); current_i18n: &mut RawLanguage<LocalizationEntryState>,
// Remove quotes i18n_references: &RawLanguage<LocalizationEntryState>,
begin.next(); repo: &git2::Repository,
begin.next_back(); ) {
begin.as_str() // git graph decendent of is slow, so we cache it
let mut graph_decendent_of_cache = HashMap::new();
let mut cached_graph_descendant_of = |commit, ancestor| -> bool {
let key = (commit, ancestor);
match graph_decendent_of_cache.entry(key) {
Entry::Occupied(entry) => {
return *entry.get();
},
Entry::Vacant(entry) => {
let value = repo.graph_descendant_of(commit, ancestor).unwrap_or(false);
*entry.insert(value)
},
}
}; };
pat == key // match files
} for (ref_path, ref_fragment) in i18n_references.fragments.iter() {
let cur_fragment = match current_i18n.fragments.get_mut(ref_path) {
fn generate_key_version<'a>( Some(c) => c,
repo: &'a git2::Repository,
localization: &LocalizationFragment,
path: &std::path::Path,
file_blob: &git2::Blob,
) -> HashMap<String, LocalizationEntryState> {
let mut keys: HashMap<String, LocalizationEntryState> = localization
.string_map
.keys()
.map(|k| (k.to_owned(), LocalizationEntryState::new()))
.collect();
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
}
let mut error_check_set: Vec<String> = vec![];
// Find commit for each keys
repo.blame_file(path, None)
.expect("Impossible to generate the Git blame")
.iter()
.for_each(|e: git2::BlameHunk| {
for (key, state) in keys.iter_mut() {
let line = match state.key_line {
Some(l) => l,
None => { None => {
if !error_check_set.contains(key) {
eprintln!( eprintln!(
"Key {} does not have a git line in it's state! Skipping key.", "language {} is missing file: {:?}",
key current_i18n.manifest.metadata.language_identifier, ref_path
); );
error_check_set.push(key.clone());
}
continue; continue;
}, },
}; };
if line + 1 >= e.final_start_line() for (ref_key, ref_state) in ref_fragment.string_map.iter() {
&& line + 1 < e.final_start_line() + e.lines_in_hunk() match cur_fragment.string_map.get_mut(ref_key) {
{
state.chuck_line_range = Some((
e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(),
));
state.commit_id = match state.commit_id {
Some(existing_commit) => {
match repo.graph_descendant_of(e.final_commit_id(), existing_commit) {
Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err),
}
},
None => Some(e.final_commit_id()),
};
}
}
});
keys
}
fn complete_key_versions<'a>(
repo: &'a git2::Repository,
head_ref: &git2::Reference,
i18n_key_versions: &mut HashMap<String, LocalizationEntryState>,
root_dir: &Path,
lang_dir: &Path,
) {
//TODO: review unwraps in this file
// For each file in directory
for i18n_file in root_dir.join(&lang_dir).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() {
if file_type.is_file() {
println!("-> {:?}", i18n_file.file_name());
let full_path = i18n_file.path();
let path = full_path.strip_prefix(root_dir).unwrap();
let i18n_blob = read_file_from_path(repo, head_ref, path);
let i18n: LocalizationFragment =
from_bytes(i18n_blob.content()).unwrap_or_else(|e| {
panic!(
"Could not parse {} RON file, skipping: {}",
i18n_file.path().to_string_lossy(),
e
)
});
i18n_key_versions.extend(generate_key_version(repo, &i18n, path, &i18n_blob));
} else if file_type.is_dir() {
// If it's a directory, recursively check it
complete_key_versions(
repo,
head_ref,
i18n_key_versions,
root_dir,
&i18n_file.path(),
);
}
}
}
}
fn gather_state(
loc: &RawLocalization,
i18n_blob: &git2::Blob,
ref_manifest: &Path,
root_dir: &Path,
lang_dir: &Path,
repo: &git2::Repository,
head_ref: &git2::Reference,
) -> HashMap<String, LocalizationEntryState> {
// Generate map
let mut i18n_map = generate_key_version(
repo,
&LocalizationFragment::from(loc.clone()),
ref_manifest,
i18n_blob,
);
// Gathering info about keys from language
complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, lang_dir);
i18n_map
}
// Helper function to test localization directory
// `lang_dir` - path to localization directory. Relative from root of the
// repo.
// `root_dir` - absolute path to repo
// `ref_manifest` - path to reference manifest
// `i18n_references` - keys from reference language
// `repo` - git object for main repo
// `head_ref` - HEAD
fn test_localization_directory(
lang_dir: &Path,
root_dir: &Path,
ref_manifest: &Path,
i18n_references: &HashMap<String, LocalizationEntryState>,
be_verbose: bool,
csv_enabled: bool,
repo: &git2::Repository,
head_ref: &git2::Reference,
) -> Option<LocalizationStats> {
let relfile = lang_dir.join(&(LANG_MANIFEST_FILE.to_string() + ".ron"));
if relfile == ref_manifest {
return None;
}
println!("\n-----------------------------------");
println!("{:?}", relfile);
println!("-----------------------------------");
// Find the localization entry state
let current_blob = read_file_from_path(repo, head_ref, &relfile);
let current_loc: RawLocalization = from_bytes(current_blob.content()).unwrap_or_else(|e| {
panic!(
"Could not parse {} RON file, skipping: {}",
relfile.to_string_lossy(),
e
)
});
// Gather state of current localization
let mut current_i18n = gather_state(
&current_loc,
&current_blob,
ref_manifest,
root_dir,
lang_dir,
repo,
head_ref,
);
// Comparing with reference localization
fill_info(&mut current_i18n, i18n_references, repo, &relfile);
let mut state_map = LocalizationAnalysis::default();
let result = gather_results(current_i18n, &mut state_map);
print_csv_file(&mut state_map, relfile);
Some(result)
}
fn fill_info(
current_i18n: &mut HashMap<String, LocalizationEntryState>,
i18n_references: &HashMap<String, LocalizationEntryState>,
repo: &git2::Repository,
relfile: &Path,
) {
for (ref_key, ref_state) in i18n_references.iter() {
match current_i18n.get_mut(ref_key) {
Some(state) => { Some(state) => {
let commit_id = match state.commit_id { let commit_id = match state.commit_id {
Some(c) => c, Some(c) => c,
@ -371,7 +101,7 @@ fn fill_info(
eprintln!( eprintln!(
"Commit ID of key {} in i18n file {} is missing! Skipping key.", "Commit ID of key {} in i18n file {} is missing! Skipping key.",
ref_key, ref_key,
relfile.to_string_lossy() ref_path.to_string_lossy()
); );
continue; continue;
}, },
@ -380,307 +110,174 @@ fn fill_info(
Some(c) => c, Some(c) => c,
None => { None => {
eprintln!( eprintln!(
"Commit ID of key {} in reference i18n file is missing! Skipping key.", "Commit ID of key {} in reference i18n file is missing! Skipping \
key.",
ref_key ref_key
); );
continue; continue;
}, },
}; };
if commit_id != ref_commit_id if commit_id != ref_commit_id
&& !repo && !cached_graph_descendant_of(commit_id, ref_commit_id)
.graph_descendant_of(commit_id, ref_commit_id)
.unwrap_or(false)
{ {
state.state = LocalizationState::Outdated; state.state = Some(LocalizationState::Outdated);
} else { } else {
state.state = LocalizationState::UpToDate; state.state = Some(LocalizationState::UpToDate);
} }
}, },
None => { None => {
current_i18n.insert(ref_key.to_owned(), LocalizationEntryState { cur_fragment
.string_map
.insert(ref_key.to_owned(), LocalizationEntryState {
key_line: None, key_line: None,
chuck_line_range: None, chuck_line_range: None,
commit_id: None, commit_id: None,
state: LocalizationState::NotFound, state: Some(LocalizationState::NotFound),
}); });
}, },
} }
} }
let ref_keys: HashSet<&String> = i18n_references.keys().collect(); let ref_keys: HashSet<&String> = ref_fragment.string_map.keys().collect();
for (_, state) in current_i18n for (_, state) in cur_fragment
.string_map
.iter_mut() .iter_mut()
.filter(|&(k, _)| !ref_keys.contains(k)) .filter(|&(k, _)| !ref_keys.contains(k))
{ {
state.state = LocalizationState::Unused; state.state = Some(LocalizationState::Unused);
}
} }
} }
fn gather_results( fn gather_results(
current_i18n: HashMap<String, LocalizationEntryState>, current_i18n: &RawLanguage<LocalizationEntryState>,
state_map: &mut LocalizationAnalysis, ) -> (LocalizationAnalysis, LocalizationStats) {
) -> LocalizationStats { let mut state_map =
let mut uptodate_entries = 0; LocalizationAnalysis::new(&current_i18n.manifest.metadata.language_identifier);
let mut outdated_entries = 0; let mut stats = LocalizationStats::default();
let mut unused_entries = 0;
let mut notfound_entries = 0;
let mut unknown_entries = 0;
let keys: Vec<&String> = current_i18n.keys().collect(); for (file, fragments) in &current_i18n.fragments {
for key in keys { for (key, entry) in &fragments.string_map {
let entry = current_i18n.get(key).unwrap();
match entry.state { match entry.state {
LocalizationState::Outdated => outdated_entries += 1, Some(LocalizationState::Outdated) => stats.outdated_entries += 1,
LocalizationState::NotFound => notfound_entries += 1, Some(LocalizationState::NotFound) => stats.notfound_entries += 1,
LocalizationState::Unknown => unknown_entries += 1, None => stats.errors += 1,
LocalizationState::Unused => unused_entries += 1, Some(LocalizationState::Unused) => stats.unused_entries += 1,
LocalizationState::UpToDate => uptodate_entries += 1, Some(LocalizationState::UpToDate) => stats.uptodate_entries += 1,
}; };
if entry.state != LocalizationState::UpToDate { if entry.state != Some(LocalizationState::UpToDate) {
let state_keys = state_map let state_keys = state_map.data.get_mut(&entry.state).expect("prefiled");
.get_mut(entry.state) state_keys.push((file.clone(), key.to_owned(), entry.commit_id));
.expect("vectors must be added"); }
state_keys.push((key.to_owned(), entry.commit_id));
} }
} }
// Calculate key count that actually matter for the status of the translation for (_, entries) in state_map.data.iter_mut() {
// Unused entries don't break the game entries.sort();
let current_i18n_entry_count = current_i18n.len();
let real_entry_count = current_i18n_entry_count - unused_entries;
LocalizationStats {
uptodate_entries,
unused_entries,
outdated_entries,
notfound_entries,
errors: unknown_entries,
real_entry_count,
}
} }
fn print_translation_stats( (state_map, stats)
ref_i18n_map: &HashMap<String, LocalizationEntryState>, }
stats: &LocalizationStats,
state_map: &mut LocalizationAnalysis, /// completely analysis multiple languages without printing
be_verbose: bool, fn complete_analysis(
relfile: PathBuf, language_identifiers: &[&str],
ref_manifest: &Path, root_path: &Path,
relative_i18n_root_path: &Path,
) -> (
HashMap<String, (LocalizationAnalysis, LocalizationStats)>,
/* ref lang */ RawLanguage<LocalizationEntryState>,
) { ) {
let uptodate_percent = let mut result = HashMap::new();
(stats.uptodate_entries as f32 / stats.real_entry_count as f32) * 100_f32; // Initialize Git objects
let outdated_percent = let repo = git2::Repository::discover(&root_path)
(stats.outdated_entries as f32 / stats.real_entry_count as f32) * 100_f32; .unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_path));
let untranslated_percent = let head_ref = repo.head().expect("Impossible to get the HEAD reference");
((stats.errors + stats.errors) as f32 / stats.real_entry_count as f32) * 100_f32;
// Display // Read Reference Language
if be_verbose { let ref_language = gather_entry_state(
println!( &repo,
"\n{:60}| {:40} | {:40}", &head_ref,
"Key name", REFERENCE_LANG,
relfile.to_str().unwrap(), root_path,
ref_manifest.to_str().unwrap(), relative_i18n_root_path,
); );
} else { for &language_identifier in language_identifiers {
println!("\nKey name"); let mut cur_language = gather_entry_state(
&repo,
&head_ref,
language_identifier,
root_path,
relative_i18n_root_path,
);
compare_lang_with_reference(&mut cur_language, &ref_language, &repo);
let (state_map, stats) = gather_results(&cur_language);
result.insert(language_identifier.to_owned(), (state_map, stats));
} }
(result, ref_language)
state_map.show(LocalizationState::NotFound, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unused, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Outdated, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unknown, be_verbose, ref_i18n_map);
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
}
fn print_csv_file(state_map: &mut LocalizationAnalysis, relfile: PathBuf) {
println!("country_code,file_name,translation_code,status,git_commit");
state_map.csv(LocalizationState::UpToDate);
state_map.csv(LocalizationState::NotFound);
state_map.csv(LocalizationState::Unused);
state_map.csv(LocalizationState::Outdated);
state_map.csv(LocalizationState::Unknown);
} }
/// Test one language /// Test one language
/// `code` - name of the directory in assets (de_DE for example) /// - `code`: name of the directory in assets (de_DE for example)
/// `root_dir` - absolute path to main repo /// - `root_path`: absolute path to main repo
/// `assets_path` - relative path to asset directory (right now it is /// - `relative_i18n_root_path`: relative path to asset directory (right now it
/// 'assets/voxygen/i18n') /// is 'assets/voxygen/i18n')
/// be_verbose - /// - be_verbose: print extra info
/// csv_enabled - generate csv files in target folder /// - csv_enabled: generate csv files in target folder
pub fn test_specific_localization( pub fn test_specific_localizations(
code: &str, language_identifiers: &[&str],
root_dir: &Path, root_path: &Path,
assets_path: &Path, relative_i18n_root_path: &Path,
be_verbose: bool, be_verbose: bool,
csv_enabled: bool, csv_enabled: bool,
) { ) {
// Relative paths from root of repo to assets let (analysis, reference_language) =
let ref_lang_dir = assets_path.join(REFERENCE_LANG); complete_analysis(language_identifiers, root_path, relative_i18n_root_path);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron"); for (language_identifier, (state_map, stats)) in &analysis {
if csv_enabled {
// Initialize Git objects print_csv_file(state_map);
let repo = git2::Repository::discover(&root_dir) } else {
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir)); print_translation_stats(
let head_ref = repo.head().expect("Impossible to get the HEAD reference"); language_identifier,
&reference_language,
// Read HEAD for the reference language manifest stats,
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest); state_map,
let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it");
// Gathering info about keys from reference language
let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
&repo,
&head_ref,
);
// Testing how specific language is localized
let dir = assets_path.join(code);
test_localization_directory(
&dir,
root_dir,
&ref_manifest,
&reference_i18n,
be_verbose, be_verbose,
csv_enabled,
&repo,
&head_ref,
); );
} }
}
if analysis.len() > 1 {
print_overall_stats(analysis);
}
}
/// Test all localizations /// Test all localizations
/// `root_dir` - absolute path to main repo
/// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
/// csv_enabled - generate csv files in target folder
pub fn test_all_localizations( pub fn test_all_localizations(
root_dir: &Path, root_path: &Path,
assets_path: &Path, relative_i18n_root_path: &Path,
be_verbose: bool, be_verbose: bool,
csv_enabled: bool, csv_enabled: bool,
) { ) {
let ref_lang_dir = assets_path.join(REFERENCE_LANG); let i18n_root_path = root_path.join(relative_i18n_root_path);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
if !root_dir.join(&ref_lang_dir).is_dir() {
panic!("Reference language folder not found {:?}", &ref_lang_dir)
}
if !root_dir.join(&ref_manifest).is_file() {
panic!("Reference language file not found {:?}", &ref_manifest)
}
// Initialize Git objects
let repo = git2::Repository::discover(&root_dir)
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir));
let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Read HEAD for the reference language file
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it");
// Gathering info about keys from reference language
let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
&repo,
&head_ref,
);
// Compare to other reference files // Compare to other reference files
let i18n_directories = i18n_directories(&root_dir.join(assets_path)); let language_identifiers = i18n_directories(&i18n_root_path)
let mut i18n_entry_counts: HashMap<PathBuf, LocalizationStats> = HashMap::new(); .into_iter()
for dir in &i18n_directories { .map(|p| {
let rel_dir = dir.strip_prefix(root_dir).unwrap(); p.strip_prefix(&i18n_root_path)
let result = test_localization_directory( .unwrap()
rel_dir, .to_str()
root_dir, .unwrap()
&ref_manifest, .to_owned()
&reference_i18n, })
.collect::<Vec<_>>();
test_specific_localizations(
&language_identifiers
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>(),
root_path,
relative_i18n_root_path,
be_verbose, be_verbose,
csv_enabled, csv_enabled,
&repo,
&head_ref,
);
if let Some(values) = result {
i18n_entry_counts.insert(dir.clone(), values);
}
}
print_overall_stats(i18n_entry_counts);
}
fn print_overall_stats(i18n_entry_counts: HashMap<PathBuf, LocalizationStats>) {
let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0;
let mut overall_real_entry_count = 0;
println!("-----------------------------------------------------------------------------");
println!("Overall Translation Status");
println!("-----------------------------------------------------------------------------");
println!(
"{:12}| {:8} | {:8} | {:8} | {:8} | {:8}",
"", "up-to-date", "outdated", "untranslated", "unused", "errors",
);
let mut i18n_stats: Vec<(&PathBuf, &LocalizationStats)> = i18n_entry_counts.iter().collect();
i18n_stats.sort_by_key(|(_, result)| result.notfound_entries);
for (path, test_result) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate,
outdated_entries: outdated,
unused_entries: unused,
notfound_entries: untranslated,
errors,
real_entry_count: real,
} = test_result;
overall_uptodate_entry_count += uptodate;
overall_outdated_entry_count += outdated;
overall_untranslated_entry_count += untranslated;
overall_real_entry_count += real;
println!(
"{:12}|{:8} |{:6} |{:8} |{:6} |{:8}",
path.file_name().unwrap().to_string_lossy(),
uptodate,
outdated,
untranslated,
unused,
errors,
); );
} }
println!(
"\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated",
(overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
);
println!("-----------------------------------------------------------------------------\n");
}

View File

@ -1,4 +1,3 @@
/*
use clap::{App, Arg}; use clap::{App, Arg};
use std::path::Path; use std::path::Path;
use veloren_i18n::{analysis, verification}; use veloren_i18n::{analysis, verification};
@ -37,31 +36,29 @@ fn main() {
.get_matches(); .get_matches();
// Generate paths // Generate paths
let root = common_assets::find_root().expect("Failed to find root of repository"); let root_path = common_assets::find_root().expect("Failed to find root of repository");
let asset_path = Path::new("assets/voxygen/i18n/"); let relative_i18n_root_path = Path::new("assets/voxygen/i18n/");
let be_verbose = matches.is_present("verbose");
let csv_enabled = matches.is_present("csv"); let csv_enabled = matches.is_present("csv");
if let Some(code) = matches.value_of("CODE") { if let Some(code) = matches.value_of("CODE") {
analysis::test_specific_localization( analysis::test_specific_localizations(
code, &[code],
&root, &root_path,
&asset_path, relative_i18n_root_path,
matches.is_present("verbose"), be_verbose,
csv_enabled, csv_enabled,
); );
} }
if matches.is_present("test") { if matches.is_present("test") {
analysis::test_all_localizations( analysis::test_all_localizations(
&root, &root_path,
&asset_path, relative_i18n_root_path,
matches.is_present("verbose"), be_verbose,
csv_enabled, csv_enabled,
); );
} }
if matches.is_present("verify") { if matches.is_present("verify") {
verification::verify_all_localizations(&root, &asset_path); verification::verify_all_localizations(&root_path, relative_i18n_root_path);
}
} }
}*/
fn main() {}

View File

@ -1,49 +1,66 @@
//! fragment attached with git versioning information //! fragment attached with git versioning information
use hashbrown::{HashMap}; use crate::raw::RawFragment;
use std::path::{Path, PathBuf}; use hashbrown::HashMap;
use std::sync::RwLock; use std::path::Path;
use std::sync::Arc;
use crate::raw::{RawFragment};
struct GitCache<'a> { #[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
pub root_dir: PathBuf, pub(crate) enum LocalizationState {
pub blobs: RwLock<HashMap<PathBuf, Arc<git2::Blob<'a>>>>, UpToDate,
pub repo: git2::Repository, NotFound,
//pub head_ref: git2::Reference<'a>, Outdated,
Unused,
} }
impl<'a> GitCache<'a> { pub(crate) const ALL_LOCALIZATION_STATES: [Option<LocalizationState>; 5] = [
pub fn new(root_dir: &Path) -> Self { Some(LocalizationState::UpToDate),
let repo = git2::Repository::discover(&root_dir) Some(LocalizationState::NotFound),
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir)); Some(LocalizationState::Outdated),
//let head_ref = repo.head().expect("Impossible to get the HEAD reference"); Some(LocalizationState::Unused),
None,
];
let root_dir = root_dir.to_path_buf(); #[derive(Copy, Clone, Debug)]
let blobs = RwLock::new(HashMap::new()); pub(crate) struct LocalizationEntryState {
Self { pub(crate) key_line: Option<usize>,
root_dir, pub(crate) chuck_line_range: Option<(usize, usize)>,
blobs, pub(crate) commit_id: Option<git2::Oid>,
repo, pub(crate) state: Option<LocalizationState>,
//head_ref, }
impl LocalizationState {
pub(crate) fn print(this: &Option<Self>) -> String {
match this {
Some(LocalizationState::UpToDate) => "UpToDate",
Some(LocalizationState::NotFound) => "NotFound",
Some(LocalizationState::Outdated) => "Outdated",
Some(LocalizationState::Unused) => "Unused",
None => "Unknown",
}
.to_owned()
} }
} }
impl LocalizationEntryState {
fn new(key_line: Option<usize>) -> LocalizationEntryState {
LocalizationEntryState {
key_line,
chuck_line_range: None,
commit_id: None,
state: None,
}
}
}
/// Returns the Git blob associated with the given reference and path /// Returns the Git blob associated with the given reference and path
fn read_file_from_path( pub(crate) fn read_file_from_path<'a>(
&'a self, repo: &'a git2::Repository,
reference: &git2::Reference, reference: &git2::Reference,
path: &std::path::Path, path: &std::path::Path,
) -> Arc<git2::Blob<'a>> { ) -> git2::Blob<'a> {
// return from cache
let lock = self.blobs.read().unwrap();
if let Some(blob) = lock.get(path) {
return blob.clone();
}
drop(lock);
// load file not in cache
let tree = reference let tree = reference
.peel_to_tree() .peel_to_tree()
.expect("Impossible to peel HEAD to a tree object"); .expect("Impossible to peel HEAD to a tree object");
let blob = Arc::new(tree.get_path(path) tree.get_path(path)
.unwrap_or_else(|_| { .unwrap_or_else(|_| {
panic!( panic!(
"Impossible to find the file {:?} in reference {:?}", "Impossible to find the file {:?} in reference {:?}",
@ -51,122 +68,90 @@ impl<'a> GitCache<'a> {
reference.name() reference.name()
) )
}) })
.to_object(&self.repo) .to_object(repo)
.unwrap() .unwrap()
.peel_to_blob() .peel_to_blob()
.expect("Impossible to fetch the Git object")); .expect("Impossible to fetch the Git object")
let mut lock = self.blobs.write().unwrap();
let pathbuf = path.to_path_buf();
lock.insert(pathbuf, blob.clone());
blob
}
} }
/*
/// Extend a Fragment with historical git data /// Extend a Fragment with historical git data
/// The actual translation gets dropped /// The actual translation gets dropped
fn generate_key_version<'a>( /// TODO: transform vector_map too
repo: &'a GitCache, pub(crate) fn transform_fragment<'a>(
path: &Path,
fragment: RawFragment<String>,
) -> RawFragment<LocalizationEntryState> {
let file_blob = repo.read_file_from_path(path);
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
}
}*/
/*
fn generate_key_version<'a>(
repo: &'a git2::Repository, repo: &'a git2::Repository,
fragment: &RawFragment<String>, fragment: (&Path, RawFragment<String>),
path: &std::path::Path,
file_blob: &git2::Blob, file_blob: &git2::Blob,
) -> HashMap<String, LocalizationEntryState> { ) -> RawFragment<LocalizationEntryState> {
let mut keys: HashMap<String, LocalizationEntryState> = localization let (path, fragment) = fragment;
.string_map // Find key start lines by searching all lines which have `:` in them (as they
.keys() // are probably keys) and getting the first part of such line trimming
.map(|k| (k.to_owned(), LocalizationEntryState::new())) // whitespace and quotes. Quite buggy heuristic
.collect();
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file"); let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect(); // we only need the key part of the file to process
for (line_nb, line) in file_content.lines().enumerate() { let file_content_keys = file_content.lines().enumerate().filter_map(|(no, line)| {
let mut found_key = None; line.split_once(':').map(|(key, _)| {
let mut key = key.trim().chars();
key.next();
key.next_back();
(no, key.as_str())
})
});
//speed up the search by sorting all keys!
let mut file_content_keys_sorted = file_content_keys.into_iter().collect::<Vec<_>>();
file_content_keys_sorted.sort_by_key(|(_, key)| *key);
for key in to_process.iter() { let mut result = RawFragment::<LocalizationEntryState> {
if correspond(line, key) { string_map: HashMap::new(),
found_key = Some(key.to_owned()); vector_map: HashMap::new(),
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
}; };
for (original_key, _) in fragment.string_map {
let line_nb = file_content_keys_sorted
.binary_search_by_key(&original_key.as_str(), |(_, key)| *key)
.map_or_else(
|_| {
eprintln!(
"Key {} does not have a git line in it's state!",
original_key
);
None
},
|id| Some(file_content_keys_sorted[id].0),
);
result
.string_map
.insert(original_key, LocalizationEntryState::new(line_nb));
} }
let mut error_check_set: Vec<String> = vec![]; // Find commit for each keys, THIS PART IS SLOW (2s/4s)
// Find commit for each keys for e in repo
repo.blame_file(path, None) .blame_file(path, None)
.expect("Impossible to generate the Git blame") .expect("Impossible to generate the Git blame")
.iter() .iter()
.for_each(|e: git2::BlameHunk| {
for (key, state) in keys.iter_mut() {
let line = match state.key_line {
Some(l) => l,
None => {
if !error_check_set.contains(key) {
eprintln!(
"Key {} does not have a git line in it's state! Skipping key.",
key
);
error_check_set.push(key.clone());
}
continue;
},
};
if line + 1 >= e.final_start_line()
&& line + 1 < e.final_start_line() + e.lines_in_hunk()
{ {
state.chuck_line_range = Some(( for (_, state) in result.string_map.iter_mut() {
if let Some(line) = state.key_line {
let range = (
e.final_start_line(), e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(), e.final_start_line() + e.lines_in_hunk(),
)); );
state.commit_id = match state.commit_id { if line + 1 >= range.0 && line + 1 < range.1 {
Some(existing_commit) => { state.chuck_line_range = Some(range);
match repo.graph_descendant_of(e.final_commit_id(), existing_commit) { state.commit_id = state.commit_id.map_or_else(
|| Some(e.final_commit_id()),
|existing_commit| match repo
.graph_descendant_of(e.final_commit_id(), existing_commit)
{
Ok(true) => Some(e.final_commit_id()), Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit), Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err), Err(err) => panic!("{}", err),
}
}, },
None => Some(e.final_commit_id()), );
}; }
} }
} }
});
keys
} }
result
*/ }

View File

@ -1,19 +1,20 @@
#[cfg(any(feature = "bin", test))] #[cfg(any(feature = "bin", test))]
pub mod analysis;
#[cfg(any(feature = "bin", test))]
pub mod gitfragments; pub mod gitfragments;
//#[cfg(any(feature = "bin", test))]
//pub mod analysis;
pub mod raw; pub mod raw;
#[cfg(any(feature = "bin", test))] pub mod stats;
pub mod verification; pub mod verification;
use common_assets::{self, source::DirEntry, AssetExt, AssetGuard, AssetHandle}; use common_assets::{self, source::DirEntry, AssetExt, AssetGuard, AssetHandle};
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use raw::{RawFragment, RawLanguage, RawManifest};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
fs, io, fs, io,
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use tracing::warn; use tracing::warn;
use raw::{RawManifest, RawFragment, RawLanguage};
/// The reference language, aka the more up-to-date localization data. /// The reference language, aka the more up-to-date localization data.
/// Also the default language at first startup. /// Also the default language at first startup.
@ -118,8 +119,15 @@ impl common_assets::Compound for Language {
.load_dir::<RawFragment<String>>(asset_key, true)? .load_dir::<RawFragment<String>>(asset_key, true)?
.iter() .iter()
{ {
let id = fragment_asset.id();
// Activate this once ._manifest is fully transformed and only contains metadata
// or regex: "<veloren\.\w+\._manifest"
/*
if id.starts_with("voxygen.") && id.ends_with("._manifest") {
continue;
}*/
let read = fragment_asset.read(); let read = fragment_asset.read();
fragments.insert(PathBuf::from(fragment_asset.id()), read.clone()); fragments.insert(PathBuf::from(id), read.clone());
} }
Ok(Language::from(RawLanguage { Ok(Language::from(RawLanguage {
@ -266,7 +274,10 @@ impl LocalizationHandle {
struct FindManifests; struct FindManifests;
impl common_assets::Compound for FindManifests { impl common_assets::Compound for FindManifests {
fn load<S: common_assets::Source>(_: &common_assets::AssetCache<S>, _: &str) -> Result<Self, common_assets::Error> { fn load<S: common_assets::Source>(
_: &common_assets::AssetCache<S>,
_: &str,
) -> Result<Self, common_assets::Error> {
Ok(Self) Ok(Self)
} }
} }
@ -328,7 +339,6 @@ pub fn i18n_directories(i18n_dir: &Path) -> Vec<PathBuf> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::path::Path; use std::path::Path;
use common_assets;
// Test that localization list is loaded (not empty) // Test that localization list is loaded (not empty)
#[test] #[test]
@ -357,11 +367,9 @@ mod tests {
#[test] #[test]
#[ignore] #[ignore]
fn test_all_localizations() { fn test_all_localizations() {
// Options
let be_verbose = true;
// Generate paths // Generate paths
let i18n_root_path = Path::new("assets/voxygen/i18n/"); let i18n_root_path = Path::new("assets/voxygen/i18n/");
let root_dir = common_assets::find_root().expect("Failed to discover repository root"); let root_dir = common_assets::find_root().expect("Failed to discover repository root");
crate::analysis::test_all_localizations(&root_dir, i18n_root_path, be_verbose); crate::analysis::test_all_localizations(&root_dir, i18n_root_path, true, false);
} }
} }

View File

@ -1,12 +1,24 @@
//! handle the loading of a `Language` //! handle the loading of a `Language`
use hashbrown::hash_map::HashMap; //! Paths:
use std::path::{Path, PathBuf}; //! - `root_path`: repo part, git main folder
use serde::{Deserialize, Serialize}; //! - `language_identifier`: `en`, `de_DE`, `fr_FR`, etc..
use std::fs; //! - `relative_i18n_root_path`: relative path to i18n path which contains
use ron::de::from_reader; //! `language_identifier` folders from `root_path`
//! - `i18n_root_path`: absolute path to `relative_i18n_root_path`
//! - `i18n_path`: absolute path to `i18n_root_path` + `language_identifier`
//! - `subfolder`: all folders in `i18n_path`
//!
//! wherever possible we use relative paths only. So expect 1 absolute
//! `root_path` or `i18n_root_path` to be required and all others be relative.
use crate::{Fonts, Language, LanguageMetadata, LANG_EXTENSION, LANG_MANIFEST_FILE};
use deunicode::deunicode; use deunicode::deunicode;
use crate::{Fonts, LanguageMetadata, LANG_MANIFEST_FILE, LANG_EXTENSION}; use hashbrown::hash_map::HashMap;
use crate::Language; use ron::de::from_reader;
use serde::{Deserialize, Serialize};
use std::{
fs,
path::{Path, PathBuf},
};
/// Raw localization metadata from LANG_MANIFEST_FILE file /// Raw localization metadata from LANG_MANIFEST_FILE file
/// See `Language` for more info on each attributes /// See `Language` for more info on each attributes
@ -27,7 +39,7 @@ pub(crate) struct RawFragment<T> {
pub(crate) struct RawLanguage<T> { pub(crate) struct RawLanguage<T> {
pub(crate) manifest: RawManifest, pub(crate) manifest: RawManifest,
pub(crate) fragments: HashMap<PathBuf, RawFragment<T>>, pub(crate) fragments: HashMap</* relative to i18n_path */ PathBuf, RawFragment<T>>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -35,48 +47,52 @@ pub(crate) enum RawError {
RonError(ron::Error), RonError(ron::Error),
} }
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`, `fr_FR` folders /// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`,
pub(crate) fn load_manifest(i18n_root_path: &Path, language_identifier: &str) -> Result<RawManifest, common_assets::Error> { /// `fr_FR` folders
let manifest_file = i18n_root_path.join(language_identifier).join(format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION)); pub(crate) fn load_manifest(
println!("file , {:?}", manifest_file); i18n_root_path: &Path,
language_identifier: &str,
) -> Result<RawManifest, common_assets::Error> {
let manifest_file = i18n_root_path
.join(language_identifier)
.join(format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION));
tracing::debug!(?manifest_file, "manifest loaded");
let f = fs::File::open(&manifest_file)?; let f = fs::File::open(&manifest_file)?;
Ok(from_reader(f).map_err(RawError::RonError)?) let manifest: RawManifest = from_reader(f).map_err(RawError::RonError)?;
// verify that the folder name `de_DE` matches the value inside the metadata!
assert_eq!(manifest.metadata.language_identifier, language_identifier);
Ok(manifest)
} }
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`, `fr_FR` files /// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`,
pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> Result<RawLanguage<String>, common_assets::Error> { /// `fr_FR` files
pub(crate) fn load_raw_language(
i18n_root_path: &Path,
manifest: RawManifest,
) -> Result<RawLanguage<String>, common_assets::Error> {
let language_identifier = &manifest.metadata.language_identifier; let language_identifier = &manifest.metadata.language_identifier;
let fragments = recursive_load_raw_language(i18n_root_path, language_identifier, Path::new(""))?; let i18n_path = i18n_root_path.join(language_identifier);
//get List of files
let files = fragments_pathes_in_language(i18n_root_path, language_identifier)?;
// Walk through each file in the directory
let mut fragments = HashMap::new();
for fragment_file in &files {
let relative_path = fragment_file.strip_prefix(&i18n_path).unwrap();
let f = fs::File::open(fragment_file)?;
let fragment = from_reader(f).map_err(RawError::RonError)?;
fragments.insert(relative_path.to_path_buf(), fragment);
}
Ok(RawLanguage { Ok(RawLanguage {
manifest, manifest,
fragments, fragments,
}) })
} }
fn recursive_load_raw_language(i18n_root_path: &Path, language_identifier: &str, subfolder: &Path) -> Result<HashMap<PathBuf,RawFragment<String>>, common_assets::Error> {
// Walk through each file in the directory
let mut fragments = HashMap::new();
let search_dir = i18n_root_path.join(language_identifier).join(subfolder);
for fragment_file in search_dir.read_dir().unwrap().flatten() {
let file_type = fragment_file.file_type()?;
if file_type.is_dir() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&search_dir).unwrap();
fragments.extend(recursive_load_raw_language(i18n_root_path, language_identifier, relative_path)?);
} else if file_type.is_file() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_root_path).unwrap();
let f = fs::File::open(&full_path)?;
let fragment = from_reader(f).map_err(RawError::RonError)?;
fragments.insert(relative_path.to_path_buf(), fragment);
}
}
Ok(fragments)
}
impl From<RawLanguage<String>> for Language { impl From<RawLanguage<String>> for Language {
fn from(raw: RawLanguage<String>) -> Self { fn from(raw: RawLanguage<String>) -> Self {
let mut string_map = HashMap::new(); let mut string_map = HashMap::new();
let mut vector_map = HashMap::new(); let mut vector_map = HashMap::new();
@ -105,11 +121,56 @@ impl From<RawLanguage<String>> for Language {
vector_map, vector_map,
convert_utf8_to_ascii, convert_utf8_to_ascii,
fonts: raw.manifest.fonts, fonts: raw.manifest.fonts,
metadata: metadata, metadata,
} }
} }
} }
pub(crate) fn fragments_pathes_in_language(
i18n_root_path: &Path,
language_identifier: &str,
) -> Result<Vec</* relative to i18n_path */ PathBuf>, std::io::Error> {
let mut result = vec![];
recursive_fragments_paths_in_language(
i18n_root_path,
language_identifier,
Path::new(""),
&mut result,
)?;
Ok(result)
}
/// i18n_path = i18n_root_path.join(REFERENCE_LANG);
fn recursive_fragments_paths_in_language(
i18n_root_path: &Path,
language_identifier: &str,
subfolder: &Path,
result: &mut Vec<PathBuf>,
) -> Result<(), std::io::Error> {
let i18n_path = i18n_root_path.join(language_identifier);
let search_dir = i18n_path.join(subfolder);
for fragment_file in search_dir.read_dir().unwrap().flatten() {
let file_type = fragment_file.file_type()?;
if file_type.is_dir() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_path).unwrap();
recursive_fragments_paths_in_language(
i18n_root_path,
language_identifier,
relative_path,
result,
)?;
} else if file_type.is_file() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_path).unwrap();
if relative_path != Path::new(&format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION)) {
result.push(relative_path.to_path_buf());
}
}
}
Ok(())
}
impl core::fmt::Display for RawError { impl core::fmt::Display for RawError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self { match self {
@ -120,13 +181,9 @@ impl core::fmt::Display for RawError {
impl std::error::Error for RawError {} impl std::error::Error for RawError {}
impl From<RawError> for common_assets::Error { impl From<RawError> for common_assets::Error {
fn from(e: RawError) -> Self { fn from(e: RawError) -> Self { Self::Conversion(Box::new(e)) }
Self::Conversion(Box::new(e))
} }
}
impl common_assets::Asset for RawManifest { impl common_assets::Asset for RawManifest {
type Loader = common_assets::RonLoader; type Loader = common_assets::RonLoader;

205
voxygen/i18n/src/stats.rs Normal file
View File

@ -0,0 +1,205 @@
use crate::{
gitfragments::{LocalizationEntryState, LocalizationState, ALL_LOCALIZATION_STATES},
raw::RawLanguage,
};
use hashbrown::HashMap;
use std::path::PathBuf;
#[derive(Default, Debug, PartialEq)]
pub(crate) struct LocalizationStats {
pub(crate) uptodate_entries: usize,
pub(crate) notfound_entries: usize,
pub(crate) unused_entries: usize,
pub(crate) outdated_entries: usize,
pub(crate) errors: usize,
}
#[allow(clippy::type_complexity)]
pub(crate) struct LocalizationAnalysis {
language_identifier: String,
pub(crate) data: HashMap<Option<LocalizationState>, Vec<(PathBuf, String, Option<git2::Oid>)>>,
}
impl LocalizationStats {
/// Calculate key count that actually matter for the status of the
/// translation Unused entries don't break the game
pub(crate) fn get_real_entry_count(&self) -> usize {
self.outdated_entries + self.notfound_entries + self.errors + self.uptodate_entries
}
}
impl LocalizationAnalysis {
pub(crate) fn new(language_identifier: &str) -> Self {
let mut data = HashMap::new();
data.insert(Some(LocalizationState::UpToDate), vec![]);
data.insert(Some(LocalizationState::NotFound), vec![]);
data.insert(Some(LocalizationState::Unused), vec![]);
data.insert(Some(LocalizationState::Outdated), vec![]);
data.insert(None, vec![]);
Self {
language_identifier: language_identifier.to_owned(),
data,
}
}
fn show(
&self,
state: Option<LocalizationState>,
reference_language: &RawLanguage<LocalizationEntryState>,
be_verbose: bool,
) {
let entries = self.data.get(&state).unwrap_or_else(|| {
panic!(
"called on invalid state: {}",
LocalizationState::print(&state)
)
});
if entries.is_empty() {
return;
}
println!("\n\t[{}]", LocalizationState::print(&state));
for (path, key, commit_id) in entries {
if be_verbose {
let our_commit = LocalizationAnalysis::print_commit(commit_id);
let ref_commit = reference_language
.fragments
.get(path)
.and_then(|entry| entry.string_map.get(key))
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,);
} else {
println!("{}", key);
}
}
}
fn csv(&self, state: Option<LocalizationState>) {
let entries = self
.data
.get(&state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state));
for (path, key, commit_id) in entries {
let our_commit = LocalizationAnalysis::print_commit(commit_id);
println!(
"{},{:?},{},{},{}",
self.language_identifier,
path,
key,
LocalizationState::print(&state),
our_commit
);
}
}
fn print_commit(commit_id: &Option<git2::Oid>) -> String {
commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned())
}
}
pub(crate) fn print_translation_stats(
language_identifier: &str,
reference_language: &RawLanguage<LocalizationEntryState>,
stats: &LocalizationStats,
state_map: &LocalizationAnalysis,
be_verbose: bool,
) {
let real_entry_count = stats.get_real_entry_count() as f32;
let uptodate_percent = (stats.uptodate_entries as f32 / real_entry_count) * 100_f32;
let outdated_percent = (stats.outdated_entries as f32 / real_entry_count) * 100_f32;
let untranslated_percent = ((stats.errors + stats.errors) as f32 / real_entry_count) * 100_f32;
// Display
if be_verbose {
println!(
"\n{:60}| {:40} | {:40}",
"Key name",
language_identifier,
reference_language.manifest.metadata.language_identifier,
);
} else {
println!("\nKey name");
}
for state in &ALL_LOCALIZATION_STATES {
if state == &Some(LocalizationState::UpToDate) {
continue;
}
state_map.show(*state, reference_language, be_verbose);
}
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
}
pub(crate) fn print_csv_file(state_map: &LocalizationAnalysis) {
println!("country_code,file_name,translation_code,status,git_commit");
for state in &ALL_LOCALIZATION_STATES {
if state == &Some(LocalizationState::UpToDate) {
continue;
}
state_map.csv(*state);
}
}
pub(crate) fn print_overall_stats(
analysis: HashMap<String, (LocalizationAnalysis, LocalizationStats)>,
) {
let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0;
let mut overall_real_entry_count = 0;
println!("-----------------------------------------------------------------------------");
println!("Overall Translation Status");
println!("-----------------------------------------------------------------------------");
println!(
"{:12}| {:8} | {:8} | {:8} | {:8} | {:8}",
"", "up-to-date", "outdated", "untranslated", "unused", "errors",
);
let mut i18n_stats: Vec<(&String, &(_, LocalizationStats))> = analysis.iter().collect();
i18n_stats.sort_by_key(|(_, (_, v))| v.notfound_entries);
for (path, (_, test_result)) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate,
outdated_entries: outdated,
unused_entries: unused,
notfound_entries: untranslated,
errors,
} = test_result;
overall_uptodate_entry_count += uptodate;
overall_outdated_entry_count += outdated;
overall_untranslated_entry_count += untranslated;
overall_real_entry_count += test_result.get_real_entry_count();
println!(
"{:12}|{:8} |{:6} |{:8} |{:6} |{:8}",
path, uptodate, outdated, untranslated, unused, errors,
);
}
println!(
"\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated",
(overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
);
println!("-----------------------------------------------------------------------------\n");
}

View File

@ -1,23 +1,23 @@
use std::{path::Path}; use std::path::Path;
use crate::{i18n_directories, LANG_MANIFEST_FILE, REFERENCE_LANG}; use crate::{i18n_directories, raw, LANG_MANIFEST_FILE, REFERENCE_LANG};
use crate::raw;
/// Test to verify all languages that they are VALID and loadable, without /// Test to verify all languages that they are VALID and loadable, without
/// need of git just on the local assets folder /// need of git just on the local assets folder
/// `root_dir` - absolute path to main repo /// `root_path` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is /// `relative_i18n_root_path` - relative path to asset directory (right now it
/// 'assets/voxygen/i18n') /// is 'assets/voxygen/i18n')
pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) { pub fn verify_all_localizations(root_path: &Path, relative_i18n_root_path: &Path) {
let i18n_root_path = root_dir.join(asset_path); let i18n_root_path = root_path.join(relative_i18n_root_path);
let ref_i18n_path = i18n_root_path.join(REFERENCE_LANG); let ref_i18n_path = i18n_root_path.join(REFERENCE_LANG);
let ref_i18n_manifest_path = ref_i18n_path.join(LANG_MANIFEST_FILE.to_string() + "." + crate::LANG_EXTENSION); let ref_i18n_manifest_path =
ref_i18n_path.join(LANG_MANIFEST_FILE.to_string() + "." + crate::LANG_EXTENSION);
assert!( assert!(
root_dir.join(&ref_i18n_path).is_dir(), root_path.join(&ref_i18n_path).is_dir(),
"Reference language folder doesn't exist, something is wrong!" "Reference language folder doesn't exist, something is wrong!"
); );
assert!( assert!(
root_dir.join(&ref_i18n_manifest_path).is_file(), root_path.join(&ref_i18n_manifest_path).is_file(),
"Reference language manifest file doesn't exist, something is wrong!" "Reference language manifest file doesn't exist, something is wrong!"
); );
let i18n_directories = i18n_directories(&i18n_root_path); let i18n_directories = i18n_directories(&i18n_root_path);
@ -31,19 +31,29 @@ pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
folder is empty?" folder is empty?"
); );
for i18n_directory in i18n_directories { for i18n_directory in i18n_directories {
let display_language_identifier = i18n_directory.strip_prefix(&root_dir).unwrap().to_str().unwrap(); let display_language_identifier = i18n_directory
let language_identifier = i18n_directory.strip_prefix(&i18n_root_path).unwrap().to_str().unwrap(); .strip_prefix(&root_path)
println!( .unwrap()
"verifying {:?}", .to_str()
display_language_identifier .unwrap();
); let language_identifier = i18n_directory
.strip_prefix(&i18n_root_path)
.unwrap()
.to_str()
.unwrap();
println!("verifying {:?}", display_language_identifier);
// Walk through each files and try to load them // Walk through each files and try to load them
verify_localization_directory(root_dir, &asset_path, language_identifier); verify_localization_directory(root_path, relative_i18n_root_path, language_identifier);
} }
} }
fn verify_localization_directory(root_dir: &Path, asset_path: &Path, language_identifier: &str) { fn verify_localization_directory(
let i18n_path = root_dir.join(asset_path); root_path: &Path,
let manifest = raw::load_manifest(&i18n_path, language_identifier).expect("error accessing manifest file"); relative_i18n_root_path: &Path,
language_identifier: &str,
) {
let i18n_path = root_path.join(relative_i18n_root_path);
let manifest =
raw::load_manifest(&i18n_path, language_identifier).expect("error accessing manifest file");
raw::load_raw_language(&i18n_path, manifest).expect("error accessing fragment file"); raw::load_raw_language(&i18n_path, manifest).expect("error accessing fragment file");
} }