redo i18n git analysis completly and introduce a way to export csv data

also improve the performance by caching git_graph_descendant
This commit is contained in:
Marcel Märtens 2021-07-28 15:20:09 +02:00
parent a5696e83a9
commit bdda5ccd72
7 changed files with 743 additions and 884 deletions

View File

@ -1,686 +1,283 @@
use ron::de::from_bytes;
use std::path::{Path, PathBuf};
use crate::raw::{
i18n_directories, LocalizationFragment, RawLocalization, LANG_MANIFEST_FILE, REFERENCE_LANG,
use crate::{
gitfragments::{
read_file_from_path, transform_fragment, LocalizationEntryState, LocalizationState,
},
i18n_directories,
raw::{self, RawFragment, RawLanguage},
stats::{
print_csv_file, print_overall_stats, print_translation_stats, LocalizationAnalysis,
LocalizationStats,
},
REFERENCE_LANG,
};
use hashbrown::{HashMap, HashSet};
use hashbrown::{hash_map::Entry, HashMap, HashSet};
use ron::de::from_bytes;
use std::path::Path;
#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
enum LocalizationState {
UpToDate,
NotFound,
Outdated,
Unknown,
Unused,
}
#[derive(Debug, PartialEq)]
struct LocalizationStats {
uptodate_entries: usize,
outdated_entries: usize,
unused_entries: usize,
notfound_entries: usize,
errors: usize,
real_entry_count: usize,
}
#[derive(Default)]
struct LocalizationAnalysis {
uptodate: Vec<(String, Option<git2::Oid>)>,
notfound: Vec<(String, Option<git2::Oid>)>,
unused: Vec<(String, Option<git2::Oid>)>,
outdated: Vec<(String, Option<git2::Oid>)>,
unknown: Vec<(String, Option<git2::Oid>)>,
}
impl LocalizationAnalysis {
fn get_mut(
&mut self,
state: LocalizationState,
) -> Option<&mut Vec<(String, Option<git2::Oid>)>> {
match state {
LocalizationState::UpToDate => Some(&mut self.uptodate),
LocalizationState::NotFound => Some(&mut self.notfound),
LocalizationState::Unused => Some(&mut self.unused),
LocalizationState::Outdated => Some(&mut self.outdated),
LocalizationState::Unknown => Some(&mut self.unknown),
}
}
fn show(
&mut self,
state: LocalizationState,
be_verbose: bool,
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
) {
let entries = self.unwrap_entries(state);
if entries.is_empty() {
return;
}
println!("\n\t[{:?}]", state);
entries.sort();
for (key, commit_id) in entries {
if be_verbose {
let our_commit = LocalizationAnalysis::create_our_commit(commit_id);
let ref_commit = ref_i18n_map
.get(key)
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,);
} else {
println!("{}", key);
}
}
}
//TODO: Add which file each faulty translation is in
fn csv(&mut self, state: LocalizationState) {
let entries = self.unwrap_entries(state);
for (key, commit_id) in entries {
let our_commit = LocalizationAnalysis::create_our_commit(commit_id);
println!(
"{},{},{},{:?},{}",
"sv", "_manifest.yml", key, state, our_commit
);
}
}
fn unwrap_entries(
&mut self,
state: LocalizationState,
) -> &mut Vec<(String, Option<git2::Oid>)> {
self.get_mut(state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state))
}
fn create_our_commit(commit_id: &mut Option<git2::Oid>) -> String {
commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned())
}
}
#[derive(Copy, Clone, Debug)]
struct LocalizationEntryState {
key_line: Option<usize>,
chuck_line_range: Option<(usize, usize)>,
commit_id: Option<git2::Oid>,
state: LocalizationState,
}
impl LocalizationEntryState {
fn new() -> LocalizationEntryState {
LocalizationEntryState {
key_line: None,
chuck_line_range: None,
commit_id: None,
state: LocalizationState::Unknown,
}
}
}
/// Returns the Git blob associated with the given reference and path
fn read_file_from_path<'a>(
repo: &'a git2::Repository,
reference: &git2::Reference,
path: &std::path::Path,
) -> git2::Blob<'a> {
let tree = reference
.peel_to_tree()
.expect("Impossible to peel HEAD to a tree object");
tree.get_path(path)
.unwrap_or_else(|_| {
panic!(
"Impossible to find the file {:?} in reference {:?}",
path,
reference.name()
)
})
.to_object(repo)
.unwrap()
.peel_to_blob()
.expect("Impossible to fetch the Git object")
}
fn correspond(line: &str, key: &str) -> bool {
let pat = {
// Get left part of split
let mut begin = line
.split(':')
.next()
.expect("split always produces value")
.trim()
.chars();
// Remove quotes
begin.next();
begin.next_back();
begin.as_str()
};
pat == key
}
fn generate_key_version<'a>(
repo: &'a git2::Repository,
localization: &LocalizationFragment,
path: &std::path::Path,
file_blob: &git2::Blob,
) -> HashMap<String, LocalizationEntryState> {
let mut keys: HashMap<String, LocalizationEntryState> = localization
.string_map
.keys()
.map(|k| (k.to_owned(), LocalizationEntryState::new()))
.collect();
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
}
let mut error_check_set: Vec<String> = vec![];
// Find commit for each keys
repo.blame_file(path, None)
.expect("Impossible to generate the Git blame")
.iter()
.for_each(|e: git2::BlameHunk| {
for (key, state) in keys.iter_mut() {
let line = match state.key_line {
Some(l) => l,
None => {
if !error_check_set.contains(key) {
eprintln!(
"Key {} does not have a git line in it's state! Skipping key.",
key
);
error_check_set.push(key.clone());
}
continue;
},
};
if line + 1 >= e.final_start_line()
&& line + 1 < e.final_start_line() + e.lines_in_hunk()
{
state.chuck_line_range = Some((
e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(),
));
state.commit_id = match state.commit_id {
Some(existing_commit) => {
match repo.graph_descendant_of(e.final_commit_id(), existing_commit) {
Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err),
}
},
None => Some(e.final_commit_id()),
};
}
}
});
keys
}
fn complete_key_versions<'a>(
/// Fill the entry State base information (except `state`) for a complete
/// language
fn gather_entry_state<'a>(
repo: &'a git2::Repository,
head_ref: &git2::Reference,
i18n_key_versions: &mut HashMap<String, LocalizationEntryState>,
root_dir: &Path,
lang_dir: &Path,
) {
//TODO: review unwraps in this file
language_identifier: &str,
root_path: &Path,
relative_i18n_root_path: &Path,
) -> RawLanguage<LocalizationEntryState> {
println!("-> {:?}", &language_identifier);
let i18n_root_path = root_path.join(relative_i18n_root_path);
// load standard manifest
let manifest = raw::load_manifest(&i18n_root_path, language_identifier)
.expect("failed to load language manifest");
// transform language into LocalizationEntryState
let mut fragments = HashMap::new();
// For each file in directory
for i18n_file in root_dir.join(&lang_dir).read_dir().unwrap().flatten() {
if let Ok(file_type) = i18n_file.file_type() {
if file_type.is_file() {
println!("-> {:?}", i18n_file.file_name());
let files = raw::fragments_pathes_in_language(&i18n_root_path, language_identifier)
.expect("failed to get all files in language");
for subpath in files {
let path = relative_i18n_root_path
.join(language_identifier)
.join(&subpath);
println!(" -> {:?}", &subpath);
let i18n_blob = read_file_from_path(repo, head_ref, &path);
let fragment: RawFragment<String> = from_bytes(i18n_blob.content()).unwrap_or_else(|e| {
panic!(
"Could not parse {} RON file, skipping: {}",
subpath.to_string_lossy(),
e
)
});
let frag = transform_fragment(repo, (&path, fragment), &i18n_blob);
fragments.insert(subpath.to_path_buf(), frag);
}
let full_path = i18n_file.path();
let path = full_path.strip_prefix(root_dir).unwrap();
let i18n_blob = read_file_from_path(repo, head_ref, path);
let i18n: LocalizationFragment =
from_bytes(i18n_blob.content()).unwrap_or_else(|e| {
panic!(
"Could not parse {} RON file, skipping: {}",
i18n_file.path().to_string_lossy(),
e
)
});
i18n_key_versions.extend(generate_key_version(repo, &i18n, path, &i18n_blob));
} else if file_type.is_dir() {
// If it's a directory, recursively check it
complete_key_versions(
repo,
head_ref,
i18n_key_versions,
root_dir,
&i18n_file.path(),
RawLanguage::<LocalizationEntryState> {
manifest,
fragments,
}
}
/// fills in the `state`
fn compare_lang_with_reference(
current_i18n: &mut RawLanguage<LocalizationEntryState>,
i18n_references: &RawLanguage<LocalizationEntryState>,
repo: &git2::Repository,
) {
// git graph decendent of is slow, so we cache it
let mut graph_decendent_of_cache = HashMap::new();
let mut cached_graph_descendant_of = |commit, ancestor| -> bool {
let key = (commit, ancestor);
match graph_decendent_of_cache.entry(key) {
Entry::Occupied(entry) => {
return *entry.get();
},
Entry::Vacant(entry) => {
let value = repo.graph_descendant_of(commit, ancestor).unwrap_or(false);
*entry.insert(value)
},
}
};
// match files
for (ref_path, ref_fragment) in i18n_references.fragments.iter() {
let cur_fragment = match current_i18n.fragments.get_mut(ref_path) {
Some(c) => c,
None => {
eprintln!(
"language {} is missing file: {:?}",
current_i18n.manifest.metadata.language_identifier, ref_path
);
continue;
},
};
for (ref_key, ref_state) in ref_fragment.string_map.iter() {
match cur_fragment.string_map.get_mut(ref_key) {
Some(state) => {
let commit_id = match state.commit_id {
Some(c) => c,
None => {
eprintln!(
"Commit ID of key {} in i18n file {} is missing! Skipping key.",
ref_key,
ref_path.to_string_lossy()
);
continue;
},
};
let ref_commit_id = match ref_state.commit_id {
Some(c) => c,
None => {
eprintln!(
"Commit ID of key {} in reference i18n file is missing! Skipping \
key.",
ref_key
);
continue;
},
};
if commit_id != ref_commit_id
&& !cached_graph_descendant_of(commit_id, ref_commit_id)
{
state.state = Some(LocalizationState::Outdated);
} else {
state.state = Some(LocalizationState::UpToDate);
}
},
None => {
cur_fragment
.string_map
.insert(ref_key.to_owned(), LocalizationEntryState {
key_line: None,
chuck_line_range: None,
commit_id: None,
state: Some(LocalizationState::NotFound),
});
},
}
}
}
}
fn gather_state(
loc: &RawLocalization,
i18n_blob: &git2::Blob,
ref_manifest: &Path,
root_dir: &Path,
lang_dir: &Path,
repo: &git2::Repository,
head_ref: &git2::Reference,
) -> HashMap<String, LocalizationEntryState> {
// Generate map
let mut i18n_map = generate_key_version(
repo,
&LocalizationFragment::from(loc.clone()),
ref_manifest,
i18n_blob,
);
// Gathering info about keys from language
complete_key_versions(repo, head_ref, &mut i18n_map, root_dir, lang_dir);
i18n_map
}
// Helper function to test localization directory
// `lang_dir` - path to localization directory. Relative from root of the
// repo.
// `root_dir` - absolute path to repo
// `ref_manifest` - path to reference manifest
// `i18n_references` - keys from reference language
// `repo` - git object for main repo
// `head_ref` - HEAD
fn test_localization_directory(
lang_dir: &Path,
root_dir: &Path,
ref_manifest: &Path,
i18n_references: &HashMap<String, LocalizationEntryState>,
be_verbose: bool,
csv_enabled: bool,
repo: &git2::Repository,
head_ref: &git2::Reference,
) -> Option<LocalizationStats> {
let relfile = lang_dir.join(&(LANG_MANIFEST_FILE.to_string() + ".ron"));
if relfile == ref_manifest {
return None;
}
println!("\n-----------------------------------");
println!("{:?}", relfile);
println!("-----------------------------------");
// Find the localization entry state
let current_blob = read_file_from_path(repo, head_ref, &relfile);
let current_loc: RawLocalization = from_bytes(current_blob.content()).unwrap_or_else(|e| {
panic!(
"Could not parse {} RON file, skipping: {}",
relfile.to_string_lossy(),
e
)
});
// Gather state of current localization
let mut current_i18n = gather_state(
&current_loc,
&current_blob,
ref_manifest,
root_dir,
lang_dir,
repo,
head_ref,
);
// Comparing with reference localization
fill_info(&mut current_i18n, i18n_references, repo, &relfile);
let mut state_map = LocalizationAnalysis::default();
let result = gather_results(current_i18n, &mut state_map);
print_csv_file(&mut state_map, relfile);
Some(result)
}
fn fill_info(
current_i18n: &mut HashMap<String, LocalizationEntryState>,
i18n_references: &HashMap<String, LocalizationEntryState>,
repo: &git2::Repository,
relfile: &Path,
) {
for (ref_key, ref_state) in i18n_references.iter() {
match current_i18n.get_mut(ref_key) {
Some(state) => {
let commit_id = match state.commit_id {
Some(c) => c,
None => {
eprintln!(
"Commit ID of key {} in i18n file {} is missing! Skipping key.",
ref_key,
relfile.to_string_lossy()
);
continue;
},
};
let ref_commit_id = match ref_state.commit_id {
Some(c) => c,
None => {
eprintln!(
"Commit ID of key {} in reference i18n file is missing! Skipping key.",
ref_key
);
continue;
},
};
if commit_id != ref_commit_id
&& !repo
.graph_descendant_of(commit_id, ref_commit_id)
.unwrap_or(false)
{
state.state = LocalizationState::Outdated;
} else {
state.state = LocalizationState::UpToDate;
}
},
None => {
current_i18n.insert(ref_key.to_owned(), LocalizationEntryState {
key_line: None,
chuck_line_range: None,
commit_id: None,
state: LocalizationState::NotFound,
});
},
let ref_keys: HashSet<&String> = ref_fragment.string_map.keys().collect();
for (_, state) in cur_fragment
.string_map
.iter_mut()
.filter(|&(k, _)| !ref_keys.contains(k))
{
state.state = Some(LocalizationState::Unused);
}
}
let ref_keys: HashSet<&String> = i18n_references.keys().collect();
for (_, state) in current_i18n
.iter_mut()
.filter(|&(k, _)| !ref_keys.contains(k))
{
state.state = LocalizationState::Unused;
}
}
fn gather_results(
current_i18n: HashMap<String, LocalizationEntryState>,
state_map: &mut LocalizationAnalysis,
) -> LocalizationStats {
let mut uptodate_entries = 0;
let mut outdated_entries = 0;
let mut unused_entries = 0;
let mut notfound_entries = 0;
let mut unknown_entries = 0;
current_i18n: &RawLanguage<LocalizationEntryState>,
) -> (LocalizationAnalysis, LocalizationStats) {
let mut state_map =
LocalizationAnalysis::new(&current_i18n.manifest.metadata.language_identifier);
let mut stats = LocalizationStats::default();
let keys: Vec<&String> = current_i18n.keys().collect();
for key in keys {
let entry = current_i18n.get(key).unwrap();
match entry.state {
LocalizationState::Outdated => outdated_entries += 1,
LocalizationState::NotFound => notfound_entries += 1,
LocalizationState::Unknown => unknown_entries += 1,
LocalizationState::Unused => unused_entries += 1,
LocalizationState::UpToDate => uptodate_entries += 1,
};
if entry.state != LocalizationState::UpToDate {
let state_keys = state_map
.get_mut(entry.state)
.expect("vectors must be added");
state_keys.push((key.to_owned(), entry.commit_id));
for (file, fragments) in &current_i18n.fragments {
for (key, entry) in &fragments.string_map {
match entry.state {
Some(LocalizationState::Outdated) => stats.outdated_entries += 1,
Some(LocalizationState::NotFound) => stats.notfound_entries += 1,
None => stats.errors += 1,
Some(LocalizationState::Unused) => stats.unused_entries += 1,
Some(LocalizationState::UpToDate) => stats.uptodate_entries += 1,
};
if entry.state != Some(LocalizationState::UpToDate) {
let state_keys = state_map.data.get_mut(&entry.state).expect("prefiled");
state_keys.push((file.clone(), key.to_owned(), entry.commit_id));
}
}
}
// Calculate key count that actually matter for the status of the translation
// Unused entries don't break the game
let current_i18n_entry_count = current_i18n.len();
let real_entry_count = current_i18n_entry_count - unused_entries;
LocalizationStats {
uptodate_entries,
unused_entries,
outdated_entries,
notfound_entries,
errors: unknown_entries,
real_entry_count,
for (_, entries) in state_map.data.iter_mut() {
entries.sort();
}
(state_map, stats)
}
fn print_translation_stats(
ref_i18n_map: &HashMap<String, LocalizationEntryState>,
stats: &LocalizationStats,
state_map: &mut LocalizationAnalysis,
be_verbose: bool,
relfile: PathBuf,
ref_manifest: &Path,
/// completely analysis multiple languages without printing
fn complete_analysis(
language_identifiers: &[&str],
root_path: &Path,
relative_i18n_root_path: &Path,
) -> (
HashMap<String, (LocalizationAnalysis, LocalizationStats)>,
/* ref lang */ RawLanguage<LocalizationEntryState>,
) {
let uptodate_percent =
(stats.uptodate_entries as f32 / stats.real_entry_count as f32) * 100_f32;
let outdated_percent =
(stats.outdated_entries as f32 / stats.real_entry_count as f32) * 100_f32;
let untranslated_percent =
((stats.errors + stats.errors) as f32 / stats.real_entry_count as f32) * 100_f32;
let mut result = HashMap::new();
// Initialize Git objects
let repo = git2::Repository::discover(&root_path)
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_path));
let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Display
if be_verbose {
println!(
"\n{:60}| {:40} | {:40}",
"Key name",
relfile.to_str().unwrap(),
ref_manifest.to_str().unwrap(),
// Read Reference Language
let ref_language = gather_entry_state(
&repo,
&head_ref,
REFERENCE_LANG,
root_path,
relative_i18n_root_path,
);
for &language_identifier in language_identifiers {
let mut cur_language = gather_entry_state(
&repo,
&head_ref,
language_identifier,
root_path,
relative_i18n_root_path,
);
} else {
println!("\nKey name");
compare_lang_with_reference(&mut cur_language, &ref_language, &repo);
let (state_map, stats) = gather_results(&cur_language);
result.insert(language_identifier.to_owned(), (state_map, stats));
}
state_map.show(LocalizationState::NotFound, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unused, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Outdated, be_verbose, ref_i18n_map);
state_map.show(LocalizationState::Unknown, be_verbose, ref_i18n_map);
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
}
fn print_csv_file(state_map: &mut LocalizationAnalysis, relfile: PathBuf) {
println!("country_code,file_name,translation_code,status,git_commit");
state_map.csv(LocalizationState::UpToDate);
state_map.csv(LocalizationState::NotFound);
state_map.csv(LocalizationState::Unused);
state_map.csv(LocalizationState::Outdated);
state_map.csv(LocalizationState::Unknown);
(result, ref_language)
}
/// Test one language
/// `code` - name of the directory in assets (de_DE for example)
/// `root_dir` - absolute path to main repo
/// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
/// be_verbose -
/// csv_enabled - generate csv files in target folder
pub fn test_specific_localization(
code: &str,
root_dir: &Path,
assets_path: &Path,
/// - `code`: name of the directory in assets (de_DE for example)
/// - `root_path`: absolute path to main repo
/// - `relative_i18n_root_path`: relative path to asset directory (right now it
/// is 'assets/voxygen/i18n')
/// - be_verbose: print extra info
/// - csv_enabled: generate csv files in target folder
pub fn test_specific_localizations(
language_identifiers: &[&str],
root_path: &Path,
relative_i18n_root_path: &Path,
be_verbose: bool,
csv_enabled: bool,
) {
// Relative paths from root of repo to assets
let ref_lang_dir = assets_path.join(REFERENCE_LANG);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
// Initialize Git objects
let repo = git2::Repository::discover(&root_dir)
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir));
let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Read HEAD for the reference language manifest
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it");
// Gathering info about keys from reference language
let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
&repo,
&head_ref,
);
// Testing how specific language is localized
let dir = assets_path.join(code);
test_localization_directory(
&dir,
root_dir,
&ref_manifest,
&reference_i18n,
be_verbose,
csv_enabled,
&repo,
&head_ref,
);
let (analysis, reference_language) =
complete_analysis(language_identifiers, root_path, relative_i18n_root_path);
for (language_identifier, (state_map, stats)) in &analysis {
if csv_enabled {
print_csv_file(state_map);
} else {
print_translation_stats(
language_identifier,
&reference_language,
stats,
state_map,
be_verbose,
);
}
}
if analysis.len() > 1 {
print_overall_stats(analysis);
}
}
/// Test all localizations
/// `root_dir` - absolute path to main repo
/// `assets_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
/// csv_enabled - generate csv files in target folder
pub fn test_all_localizations(
root_dir: &Path,
assets_path: &Path,
root_path: &Path,
relative_i18n_root_path: &Path,
be_verbose: bool,
csv_enabled: bool,
) {
let ref_lang_dir = assets_path.join(REFERENCE_LANG);
let ref_manifest = ref_lang_dir.join(LANG_MANIFEST_FILE.to_string() + ".ron");
if !root_dir.join(&ref_lang_dir).is_dir() {
panic!("Reference language folder not found {:?}", &ref_lang_dir)
}
if !root_dir.join(&ref_manifest).is_file() {
panic!("Reference language file not found {:?}", &ref_manifest)
}
// Initialize Git objects
let repo = git2::Repository::discover(&root_dir)
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir));
let head_ref = repo.head().expect("Impossible to get the HEAD reference");
// Read HEAD for the reference language file
let ref_manifest_blob = read_file_from_path(&repo, &head_ref, &ref_manifest);
let loc: RawLocalization = from_bytes(ref_manifest_blob.content())
.expect("Expect to parse reference i18n RON file, can't proceed without it");
// Gathering info about keys from reference language
let reference_i18n = gather_state(
&loc,
&ref_manifest_blob,
&ref_manifest,
root_dir,
&ref_lang_dir,
&repo,
&head_ref,
);
let i18n_root_path = root_path.join(relative_i18n_root_path);
// Compare to other reference files
let i18n_directories = i18n_directories(&root_dir.join(assets_path));
let mut i18n_entry_counts: HashMap<PathBuf, LocalizationStats> = HashMap::new();
for dir in &i18n_directories {
let rel_dir = dir.strip_prefix(root_dir).unwrap();
let result = test_localization_directory(
rel_dir,
root_dir,
&ref_manifest,
&reference_i18n,
be_verbose,
csv_enabled,
&repo,
&head_ref,
);
if let Some(values) = result {
i18n_entry_counts.insert(dir.clone(), values);
}
}
print_overall_stats(i18n_entry_counts);
}
fn print_overall_stats(i18n_entry_counts: HashMap<PathBuf, LocalizationStats>) {
let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0;
let mut overall_real_entry_count = 0;
println!("-----------------------------------------------------------------------------");
println!("Overall Translation Status");
println!("-----------------------------------------------------------------------------");
println!(
"{:12}| {:8} | {:8} | {:8} | {:8} | {:8}",
"", "up-to-date", "outdated", "untranslated", "unused", "errors",
let language_identifiers = i18n_directories(&i18n_root_path)
.into_iter()
.map(|p| {
p.strip_prefix(&i18n_root_path)
.unwrap()
.to_str()
.unwrap()
.to_owned()
})
.collect::<Vec<_>>();
test_specific_localizations(
&language_identifiers
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>(),
root_path,
relative_i18n_root_path,
be_verbose,
csv_enabled,
);
let mut i18n_stats: Vec<(&PathBuf, &LocalizationStats)> = i18n_entry_counts.iter().collect();
i18n_stats.sort_by_key(|(_, result)| result.notfound_entries);
for (path, test_result) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate,
outdated_entries: outdated,
unused_entries: unused,
notfound_entries: untranslated,
errors,
real_entry_count: real,
} = test_result;
overall_uptodate_entry_count += uptodate;
overall_outdated_entry_count += outdated;
overall_untranslated_entry_count += untranslated;
overall_real_entry_count += real;
println!(
"{:12}|{:8} |{:6} |{:8} |{:6} |{:8}",
path.file_name().unwrap().to_string_lossy(),
uptodate,
outdated,
untranslated,
unused,
errors,
);
}
println!(
"\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated",
(overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
);
println!("-----------------------------------------------------------------------------\n");
}

View File

@ -1,4 +1,3 @@
/*
use clap::{App, Arg};
use std::path::Path;
use veloren_i18n::{analysis, verification};
@ -37,31 +36,29 @@ fn main() {
.get_matches();
// Generate paths
let root = common_assets::find_root().expect("Failed to find root of repository");
let asset_path = Path::new("assets/voxygen/i18n/");
let root_path = common_assets::find_root().expect("Failed to find root of repository");
let relative_i18n_root_path = Path::new("assets/voxygen/i18n/");
let be_verbose = matches.is_present("verbose");
let csv_enabled = matches.is_present("csv");
if let Some(code) = matches.value_of("CODE") {
analysis::test_specific_localization(
code,
&root,
&asset_path,
matches.is_present("verbose"),
analysis::test_specific_localizations(
&[code],
&root_path,
relative_i18n_root_path,
be_verbose,
csv_enabled,
);
}
if matches.is_present("test") {
analysis::test_all_localizations(
&root,
&asset_path,
matches.is_present("verbose"),
&root_path,
relative_i18n_root_path,
be_verbose,
csv_enabled,
);
}
if matches.is_present("verify") {
verification::verify_all_localizations(&root, &asset_path);
verification::verify_all_localizations(&root_path, relative_i18n_root_path);
}
}*/
fn main() {}
}

View File

@ -1,172 +1,157 @@
//! fragment attached with git versioning information
use hashbrown::{HashMap};
use std::path::{Path, PathBuf};
use std::sync::RwLock;
use std::sync::Arc;
use crate::raw::{RawFragment};
use crate::raw::RawFragment;
use hashbrown::HashMap;
use std::path::Path;
struct GitCache<'a> {
pub root_dir: PathBuf,
pub blobs: RwLock<HashMap<PathBuf, Arc<git2::Blob<'a>>>>,
pub repo: git2::Repository,
//pub head_ref: git2::Reference<'a>,
#[derive(Copy, Clone, Eq, Hash, Debug, PartialEq)]
pub(crate) enum LocalizationState {
UpToDate,
NotFound,
Outdated,
Unused,
}
impl<'a> GitCache<'a> {
pub fn new(root_dir: &Path) -> Self {
let repo = git2::Repository::discover(&root_dir)
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir));
//let head_ref = repo.head().expect("Impossible to get the HEAD reference");
pub(crate) const ALL_LOCALIZATION_STATES: [Option<LocalizationState>; 5] = [
Some(LocalizationState::UpToDate),
Some(LocalizationState::NotFound),
Some(LocalizationState::Outdated),
Some(LocalizationState::Unused),
None,
];
let root_dir = root_dir.to_path_buf();
let blobs = RwLock::new(HashMap::new());
Self {
root_dir,
blobs,
repo,
//head_ref,
#[derive(Copy, Clone, Debug)]
pub(crate) struct LocalizationEntryState {
pub(crate) key_line: Option<usize>,
pub(crate) chuck_line_range: Option<(usize, usize)>,
pub(crate) commit_id: Option<git2::Oid>,
pub(crate) state: Option<LocalizationState>,
}
impl LocalizationState {
pub(crate) fn print(this: &Option<Self>) -> String {
match this {
Some(LocalizationState::UpToDate) => "UpToDate",
Some(LocalizationState::NotFound) => "NotFound",
Some(LocalizationState::Outdated) => "Outdated",
Some(LocalizationState::Unused) => "Unused",
None => "Unknown",
}
}
/// Returns the Git blob associated with the given reference and path
fn read_file_from_path(
&'a self,
reference: &git2::Reference,
path: &std::path::Path,
) -> Arc<git2::Blob<'a>> {
// return from cache
let lock = self.blobs.read().unwrap();
if let Some(blob) = lock.get(path) {
return blob.clone();
}
drop(lock);
// load file not in cache
let tree = reference
.peel_to_tree()
.expect("Impossible to peel HEAD to a tree object");
let blob = Arc::new(tree.get_path(path)
.unwrap_or_else(|_| {
panic!(
"Impossible to find the file {:?} in reference {:?}",
path,
reference.name()
)
})
.to_object(&self.repo)
.unwrap()
.peel_to_blob()
.expect("Impossible to fetch the Git object"));
let mut lock = self.blobs.write().unwrap();
let pathbuf = path.to_path_buf();
lock.insert(pathbuf, blob.clone());
blob
.to_owned()
}
}
/*
impl LocalizationEntryState {
fn new(key_line: Option<usize>) -> LocalizationEntryState {
LocalizationEntryState {
key_line,
chuck_line_range: None,
commit_id: None,
state: None,
}
}
}
/// Returns the Git blob associated with the given reference and path
pub(crate) fn read_file_from_path<'a>(
repo: &'a git2::Repository,
reference: &git2::Reference,
path: &std::path::Path,
) -> git2::Blob<'a> {
let tree = reference
.peel_to_tree()
.expect("Impossible to peel HEAD to a tree object");
tree.get_path(path)
.unwrap_or_else(|_| {
panic!(
"Impossible to find the file {:?} in reference {:?}",
path,
reference.name()
)
})
.to_object(repo)
.unwrap()
.peel_to_blob()
.expect("Impossible to fetch the Git object")
}
/// Extend a Fragment with historical git data
/// The actual translation gets dropped
fn generate_key_version<'a>(
repo: &'a GitCache,
path: &Path,
fragment: RawFragment<String>,
) -> RawFragment<LocalizationEntryState> {
let file_blob = repo.read_file_from_path(path);
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
}
}*/
/*
fn generate_key_version<'a>(
/// TODO: transform vector_map too
pub(crate) fn transform_fragment<'a>(
repo: &'a git2::Repository,
fragment: &RawFragment<String>,
path: &std::path::Path,
fragment: (&Path, RawFragment<String>),
file_blob: &git2::Blob,
) -> HashMap<String, LocalizationEntryState> {
let mut keys: HashMap<String, LocalizationEntryState> = localization
.string_map
.keys()
.map(|k| (k.to_owned(), LocalizationEntryState::new()))
.collect();
// Find key start lines
) -> RawFragment<LocalizationEntryState> {
let (path, fragment) = fragment;
// Find key start lines by searching all lines which have `:` in them (as they
// are probably keys) and getting the first part of such line trimming
// whitespace and quotes. Quite buggy heuristic
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
// we only need the key part of the file to process
let file_content_keys = file_content.lines().enumerate().filter_map(|(no, line)| {
line.split_once(':').map(|(key, _)| {
let mut key = key.trim().chars();
key.next();
key.next_back();
(no, key.as_str())
})
});
//speed up the search by sorting all keys!
let mut file_content_keys_sorted = file_content_keys.into_iter().collect::<Vec<_>>();
file_content_keys_sorted.sort_by_key(|(_, key)| *key);
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
let mut result = RawFragment::<LocalizationEntryState> {
string_map: HashMap::new(),
vector_map: HashMap::new(),
};
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
for (original_key, _) in fragment.string_map {
let line_nb = file_content_keys_sorted
.binary_search_by_key(&original_key.as_str(), |(_, key)| *key)
.map_or_else(
|_| {
eprintln!(
"Key {} does not have a git line in it's state!",
original_key
);
None
},
|id| Some(file_content_keys_sorted[id].0),
);
result
.string_map
.insert(original_key, LocalizationEntryState::new(line_nb));
}
let mut error_check_set: Vec<String> = vec![];
// Find commit for each keys
repo.blame_file(path, None)
// Find commit for each keys, THIS PART IS SLOW (2s/4s)
for e in repo
.blame_file(path, None)
.expect("Impossible to generate the Git blame")
.iter()
.for_each(|e: git2::BlameHunk| {
for (key, state) in keys.iter_mut() {
let line = match state.key_line {
Some(l) => l,
None => {
if !error_check_set.contains(key) {
eprintln!(
"Key {} does not have a git line in it's state! Skipping key.",
key
);
error_check_set.push(key.clone());
}
continue;
},
};
if line + 1 >= e.final_start_line()
&& line + 1 < e.final_start_line() + e.lines_in_hunk()
{
state.chuck_line_range = Some((
e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(),
));
state.commit_id = match state.commit_id {
Some(existing_commit) => {
match repo.graph_descendant_of(e.final_commit_id(), existing_commit) {
Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err),
}
{
for (_, state) in result.string_map.iter_mut() {
if let Some(line) = state.key_line {
let range = (
e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(),
);
if line + 1 >= range.0 && line + 1 < range.1 {
state.chuck_line_range = Some(range);
state.commit_id = state.commit_id.map_or_else(
|| Some(e.final_commit_id()),
|existing_commit| match repo
.graph_descendant_of(e.final_commit_id(), existing_commit)
{
Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err),
},
None => Some(e.final_commit_id()),
};
);
}
}
});
}
}
keys
result
}
*/

View File

@ -1,19 +1,20 @@
#[cfg(any(feature = "bin", test))]
pub mod analysis;
#[cfg(any(feature = "bin", test))]
pub mod gitfragments;
//#[cfg(any(feature = "bin", test))]
//pub mod analysis;
pub mod raw;
#[cfg(any(feature = "bin", test))] pub mod stats;
pub mod verification;
use common_assets::{self, source::DirEntry, AssetExt, AssetGuard, AssetHandle};
use hashbrown::{HashMap, HashSet};
use raw::{RawFragment, RawLanguage, RawManifest};
use serde::{Deserialize, Serialize};
use std::{
fs, io,
path::{Path, PathBuf},
};
use tracing::warn;
use raw::{RawManifest, RawFragment, RawLanguage};
/// The reference language, aka the more up-to-date localization data.
/// Also the default language at first startup.
@ -118,11 +119,18 @@ impl common_assets::Compound for Language {
.load_dir::<RawFragment<String>>(asset_key, true)?
.iter()
{
let id = fragment_asset.id();
// Activate this once ._manifest is fully transformed and only contains metadata
// or regex: "<veloren\.\w+\._manifest"
/*
if id.starts_with("voxygen.") && id.ends_with("._manifest") {
continue;
}*/
let read = fragment_asset.read();
fragments.insert(PathBuf::from(fragment_asset.id()), read.clone());
fragments.insert(PathBuf::from(id), read.clone());
}
Ok(Language::from(RawLanguage{
Ok(Language::from(RawLanguage {
manifest,
fragments,
}))
@ -266,7 +274,10 @@ impl LocalizationHandle {
struct FindManifests;
impl common_assets::Compound for FindManifests {
fn load<S: common_assets::Source>(_: &common_assets::AssetCache<S>, _: &str) -> Result<Self, common_assets::Error> {
fn load<S: common_assets::Source>(
_: &common_assets::AssetCache<S>,
_: &str,
) -> Result<Self, common_assets::Error> {
Ok(Self)
}
}
@ -328,7 +339,6 @@ pub fn i18n_directories(i18n_dir: &Path) -> Vec<PathBuf> {
#[cfg(test)]
mod tests {
use std::path::Path;
use common_assets;
// Test that localization list is loaded (not empty)
#[test]
@ -357,11 +367,9 @@ mod tests {
#[test]
#[ignore]
fn test_all_localizations() {
// Options
let be_verbose = true;
// Generate paths
let i18n_root_path = Path::new("assets/voxygen/i18n/");
let root_dir = common_assets::find_root().expect("Failed to discover repository root");
crate::analysis::test_all_localizations(&root_dir, i18n_root_path, be_verbose);
crate::analysis::test_all_localizations(&root_dir, i18n_root_path, true, false);
}
}

View File

@ -1,12 +1,24 @@
//! handle the loading of a `Language`
use hashbrown::hash_map::HashMap;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use std::fs;
use ron::de::from_reader;
//! Paths:
//! - `root_path`: repo part, git main folder
//! - `language_identifier`: `en`, `de_DE`, `fr_FR`, etc..
//! - `relative_i18n_root_path`: relative path to i18n path which contains
//! `language_identifier` folders from `root_path`
//! - `i18n_root_path`: absolute path to `relative_i18n_root_path`
//! - `i18n_path`: absolute path to `i18n_root_path` + `language_identifier`
//! - `subfolder`: all folders in `i18n_path`
//!
//! wherever possible we use relative paths only. So expect 1 absolute
//! `root_path` or `i18n_root_path` to be required and all others be relative.
use crate::{Fonts, Language, LanguageMetadata, LANG_EXTENSION, LANG_MANIFEST_FILE};
use deunicode::deunicode;
use crate::{Fonts, LanguageMetadata, LANG_MANIFEST_FILE, LANG_EXTENSION};
use crate::Language;
use hashbrown::hash_map::HashMap;
use ron::de::from_reader;
use serde::{Deserialize, Serialize};
use std::{
fs,
path::{Path, PathBuf},
};
/// Raw localization metadata from LANG_MANIFEST_FILE file
/// See `Language` for more info on each attributes
@ -27,7 +39,7 @@ pub(crate) struct RawFragment<T> {
pub(crate) struct RawLanguage<T> {
pub(crate) manifest: RawManifest,
pub(crate) fragments: HashMap<PathBuf, RawFragment<T>>,
pub(crate) fragments: HashMap</* relative to i18n_path */ PathBuf, RawFragment<T>>,
}
#[derive(Debug)]
@ -35,48 +47,52 @@ pub(crate) enum RawError {
RonError(ron::Error),
}
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`, `fr_FR` folders
pub(crate) fn load_manifest(i18n_root_path: &Path, language_identifier: &str) -> Result<RawManifest, common_assets::Error> {
let manifest_file = i18n_root_path.join(language_identifier).join(format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION));
println!("file , {:?}", manifest_file);
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`,
/// `fr_FR` folders
pub(crate) fn load_manifest(
i18n_root_path: &Path,
language_identifier: &str,
) -> Result<RawManifest, common_assets::Error> {
let manifest_file = i18n_root_path
.join(language_identifier)
.join(format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION));
tracing::debug!(?manifest_file, "manifest loaded");
let f = fs::File::open(&manifest_file)?;
Ok(from_reader(f).map_err(RawError::RonError)?)
let manifest: RawManifest = from_reader(f).map_err(RawError::RonError)?;
// verify that the folder name `de_DE` matches the value inside the metadata!
assert_eq!(manifest.metadata.language_identifier, language_identifier);
Ok(manifest)
}
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`, `fr_FR` files
pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> Result<RawLanguage<String>, common_assets::Error> {
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`,
/// `fr_FR` files
pub(crate) fn load_raw_language(
i18n_root_path: &Path,
manifest: RawManifest,
) -> Result<RawLanguage<String>, common_assets::Error> {
let language_identifier = &manifest.metadata.language_identifier;
let fragments = recursive_load_raw_language(i18n_root_path, language_identifier, Path::new(""))?;
Ok(RawLanguage{
let i18n_path = i18n_root_path.join(language_identifier);
//get List of files
let files = fragments_pathes_in_language(i18n_root_path, language_identifier)?;
// Walk through each file in the directory
let mut fragments = HashMap::new();
for fragment_file in &files {
let relative_path = fragment_file.strip_prefix(&i18n_path).unwrap();
let f = fs::File::open(fragment_file)?;
let fragment = from_reader(f).map_err(RawError::RonError)?;
fragments.insert(relative_path.to_path_buf(), fragment);
}
Ok(RawLanguage {
manifest,
fragments,
})
}
fn recursive_load_raw_language(i18n_root_path: &Path, language_identifier: &str, subfolder: &Path) -> Result<HashMap<PathBuf,RawFragment<String>>, common_assets::Error> {
// Walk through each file in the directory
let mut fragments = HashMap::new();
let search_dir = i18n_root_path.join(language_identifier).join(subfolder);
for fragment_file in search_dir.read_dir().unwrap().flatten() {
let file_type = fragment_file.file_type()?;
if file_type.is_dir() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&search_dir).unwrap();
fragments.extend(recursive_load_raw_language(i18n_root_path, language_identifier, relative_path)?);
} else if file_type.is_file() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_root_path).unwrap();
let f = fs::File::open(&full_path)?;
let fragment = from_reader(f).map_err(RawError::RonError)?;
fragments.insert(relative_path.to_path_buf(), fragment);
}
}
Ok(fragments)
}
impl From<RawLanguage<String>> for Language {
fn from(raw: RawLanguage<String>) -> Self {
let mut string_map = HashMap::new();
let mut vector_map = HashMap::new();
@ -105,11 +121,56 @@ impl From<RawLanguage<String>> for Language {
vector_map,
convert_utf8_to_ascii,
fonts: raw.manifest.fonts,
metadata: metadata,
metadata,
}
}
}
pub(crate) fn fragments_pathes_in_language(
i18n_root_path: &Path,
language_identifier: &str,
) -> Result<Vec</* relative to i18n_path */ PathBuf>, std::io::Error> {
let mut result = vec![];
recursive_fragments_paths_in_language(
i18n_root_path,
language_identifier,
Path::new(""),
&mut result,
)?;
Ok(result)
}
/// i18n_path = i18n_root_path.join(REFERENCE_LANG);
fn recursive_fragments_paths_in_language(
i18n_root_path: &Path,
language_identifier: &str,
subfolder: &Path,
result: &mut Vec<PathBuf>,
) -> Result<(), std::io::Error> {
let i18n_path = i18n_root_path.join(language_identifier);
let search_dir = i18n_path.join(subfolder);
for fragment_file in search_dir.read_dir().unwrap().flatten() {
let file_type = fragment_file.file_type()?;
if file_type.is_dir() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_path).unwrap();
recursive_fragments_paths_in_language(
i18n_root_path,
language_identifier,
relative_path,
result,
)?;
} else if file_type.is_file() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_path).unwrap();
if relative_path != Path::new(&format!("{}.{}", LANG_MANIFEST_FILE, LANG_EXTENSION)) {
result.push(relative_path.to_path_buf());
}
}
}
Ok(())
}
impl core::fmt::Display for RawError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
@ -120,14 +181,10 @@ impl core::fmt::Display for RawError {
impl std::error::Error for RawError {}
impl From<RawError> for common_assets::Error {
fn from(e: RawError) -> Self {
Self::Conversion(Box::new(e))
}
fn from(e: RawError) -> Self { Self::Conversion(Box::new(e)) }
}
impl common_assets::Asset for RawManifest {
type Loader = common_assets::RonLoader;
@ -138,4 +195,4 @@ impl common_assets::Asset for RawFragment<String> {
type Loader = common_assets::RonLoader;
const EXTENSION: &'static str = LANG_EXTENSION;
}
}

205
voxygen/i18n/src/stats.rs Normal file
View File

@ -0,0 +1,205 @@
use crate::{
gitfragments::{LocalizationEntryState, LocalizationState, ALL_LOCALIZATION_STATES},
raw::RawLanguage,
};
use hashbrown::HashMap;
use std::path::PathBuf;
#[derive(Default, Debug, PartialEq)]
pub(crate) struct LocalizationStats {
pub(crate) uptodate_entries: usize,
pub(crate) notfound_entries: usize,
pub(crate) unused_entries: usize,
pub(crate) outdated_entries: usize,
pub(crate) errors: usize,
}
#[allow(clippy::type_complexity)]
pub(crate) struct LocalizationAnalysis {
language_identifier: String,
pub(crate) data: HashMap<Option<LocalizationState>, Vec<(PathBuf, String, Option<git2::Oid>)>>,
}
impl LocalizationStats {
/// Calculate key count that actually matter for the status of the
/// translation Unused entries don't break the game
pub(crate) fn get_real_entry_count(&self) -> usize {
self.outdated_entries + self.notfound_entries + self.errors + self.uptodate_entries
}
}
impl LocalizationAnalysis {
pub(crate) fn new(language_identifier: &str) -> Self {
let mut data = HashMap::new();
data.insert(Some(LocalizationState::UpToDate), vec![]);
data.insert(Some(LocalizationState::NotFound), vec![]);
data.insert(Some(LocalizationState::Unused), vec![]);
data.insert(Some(LocalizationState::Outdated), vec![]);
data.insert(None, vec![]);
Self {
language_identifier: language_identifier.to_owned(),
data,
}
}
fn show(
&self,
state: Option<LocalizationState>,
reference_language: &RawLanguage<LocalizationEntryState>,
be_verbose: bool,
) {
let entries = self.data.get(&state).unwrap_or_else(|| {
panic!(
"called on invalid state: {}",
LocalizationState::print(&state)
)
});
if entries.is_empty() {
return;
}
println!("\n\t[{}]", LocalizationState::print(&state));
for (path, key, commit_id) in entries {
if be_verbose {
let our_commit = LocalizationAnalysis::print_commit(commit_id);
let ref_commit = reference_language
.fragments
.get(path)
.and_then(|entry| entry.string_map.get(key))
.and_then(|s| s.commit_id)
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned());
println!("{:60}| {:40} | {:40}", key, our_commit, ref_commit,);
} else {
println!("{}", key);
}
}
}
fn csv(&self, state: Option<LocalizationState>) {
let entries = self
.data
.get(&state)
.unwrap_or_else(|| panic!("called on invalid state: {:?}", state));
for (path, key, commit_id) in entries {
let our_commit = LocalizationAnalysis::print_commit(commit_id);
println!(
"{},{:?},{},{},{}",
self.language_identifier,
path,
key,
LocalizationState::print(&state),
our_commit
);
}
}
fn print_commit(commit_id: &Option<git2::Oid>) -> String {
commit_id
.map(|s| format!("{}", s))
.unwrap_or_else(|| "None".to_owned())
}
}
pub(crate) fn print_translation_stats(
language_identifier: &str,
reference_language: &RawLanguage<LocalizationEntryState>,
stats: &LocalizationStats,
state_map: &LocalizationAnalysis,
be_verbose: bool,
) {
let real_entry_count = stats.get_real_entry_count() as f32;
let uptodate_percent = (stats.uptodate_entries as f32 / real_entry_count) * 100_f32;
let outdated_percent = (stats.outdated_entries as f32 / real_entry_count) * 100_f32;
let untranslated_percent = ((stats.errors + stats.errors) as f32 / real_entry_count) * 100_f32;
// Display
if be_verbose {
println!(
"\n{:60}| {:40} | {:40}",
"Key name",
language_identifier,
reference_language.manifest.metadata.language_identifier,
);
} else {
println!("\nKey name");
}
for state in &ALL_LOCALIZATION_STATES {
if state == &Some(LocalizationState::UpToDate) {
continue;
}
state_map.show(*state, reference_language, be_verbose);
}
println!(
"\n{} up-to-date, {} outdated, {} unused, {} not found, {} unknown entries",
stats.uptodate_entries,
stats.outdated_entries,
stats.unused_entries,
stats.notfound_entries,
stats.errors,
);
println!(
"{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated\n",
uptodate_percent, outdated_percent, untranslated_percent,
);
}
pub(crate) fn print_csv_file(state_map: &LocalizationAnalysis) {
println!("country_code,file_name,translation_code,status,git_commit");
for state in &ALL_LOCALIZATION_STATES {
if state == &Some(LocalizationState::UpToDate) {
continue;
}
state_map.csv(*state);
}
}
pub(crate) fn print_overall_stats(
analysis: HashMap<String, (LocalizationAnalysis, LocalizationStats)>,
) {
let mut overall_uptodate_entry_count = 0;
let mut overall_outdated_entry_count = 0;
let mut overall_untranslated_entry_count = 0;
let mut overall_real_entry_count = 0;
println!("-----------------------------------------------------------------------------");
println!("Overall Translation Status");
println!("-----------------------------------------------------------------------------");
println!(
"{:12}| {:8} | {:8} | {:8} | {:8} | {:8}",
"", "up-to-date", "outdated", "untranslated", "unused", "errors",
);
let mut i18n_stats: Vec<(&String, &(_, LocalizationStats))> = analysis.iter().collect();
i18n_stats.sort_by_key(|(_, (_, v))| v.notfound_entries);
for (path, (_, test_result)) in i18n_stats {
let LocalizationStats {
uptodate_entries: uptodate,
outdated_entries: outdated,
unused_entries: unused,
notfound_entries: untranslated,
errors,
} = test_result;
overall_uptodate_entry_count += uptodate;
overall_outdated_entry_count += outdated;
overall_untranslated_entry_count += untranslated;
overall_real_entry_count += test_result.get_real_entry_count();
println!(
"{:12}|{:8} |{:6} |{:8} |{:6} |{:8}",
path, uptodate, outdated, untranslated, unused, errors,
);
}
println!(
"\n{:.2}% up-to-date, {:.2}% outdated, {:.2}% untranslated",
(overall_uptodate_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_outdated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
(overall_untranslated_entry_count as f32 / overall_real_entry_count as f32) * 100_f32,
);
println!("-----------------------------------------------------------------------------\n");
}

View File

@ -1,23 +1,23 @@
use std::{path::Path};
use std::path::Path;
use crate::{i18n_directories, LANG_MANIFEST_FILE, REFERENCE_LANG};
use crate::raw;
use crate::{i18n_directories, raw, LANG_MANIFEST_FILE, REFERENCE_LANG};
/// Test to verify all languages that they are VALID and loadable, without
/// need of git just on the local assets folder
/// `root_dir` - absolute path to main repo
/// `asset_path` - relative path to asset directory (right now it is
/// 'assets/voxygen/i18n')
pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
let i18n_root_path = root_dir.join(asset_path);
/// `root_path` - absolute path to main repo
/// `relative_i18n_root_path` - relative path to asset directory (right now it
/// is 'assets/voxygen/i18n')
pub fn verify_all_localizations(root_path: &Path, relative_i18n_root_path: &Path) {
let i18n_root_path = root_path.join(relative_i18n_root_path);
let ref_i18n_path = i18n_root_path.join(REFERENCE_LANG);
let ref_i18n_manifest_path = ref_i18n_path.join(LANG_MANIFEST_FILE.to_string() + "." + crate::LANG_EXTENSION);
let ref_i18n_manifest_path =
ref_i18n_path.join(LANG_MANIFEST_FILE.to_string() + "." + crate::LANG_EXTENSION);
assert!(
root_dir.join(&ref_i18n_path).is_dir(),
root_path.join(&ref_i18n_path).is_dir(),
"Reference language folder doesn't exist, something is wrong!"
);
assert!(
root_dir.join(&ref_i18n_manifest_path).is_file(),
root_path.join(&ref_i18n_manifest_path).is_file(),
"Reference language manifest file doesn't exist, something is wrong!"
);
let i18n_directories = i18n_directories(&i18n_root_path);
@ -31,19 +31,29 @@ pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
folder is empty?"
);
for i18n_directory in i18n_directories {
let display_language_identifier = i18n_directory.strip_prefix(&root_dir).unwrap().to_str().unwrap();
let language_identifier = i18n_directory.strip_prefix(&i18n_root_path).unwrap().to_str().unwrap();
println!(
"verifying {:?}",
display_language_identifier
);
let display_language_identifier = i18n_directory
.strip_prefix(&root_path)
.unwrap()
.to_str()
.unwrap();
let language_identifier = i18n_directory
.strip_prefix(&i18n_root_path)
.unwrap()
.to_str()
.unwrap();
println!("verifying {:?}", display_language_identifier);
// Walk through each files and try to load them
verify_localization_directory(root_dir, &asset_path, language_identifier);
verify_localization_directory(root_path, relative_i18n_root_path, language_identifier);
}
}
fn verify_localization_directory(root_dir: &Path, asset_path: &Path, language_identifier: &str) {
let i18n_path = root_dir.join(asset_path);
let manifest = raw::load_manifest(&i18n_path, language_identifier).expect("error accessing manifest file");
fn verify_localization_directory(
root_path: &Path,
relative_i18n_root_path: &Path,
language_identifier: &str,
) {
let i18n_path = root_path.join(relative_i18n_root_path);
let manifest =
raw::load_manifest(&i18n_path, language_identifier).expect("error accessing manifest file");
raw::load_raw_language(&i18n_path, manifest).expect("error accessing fragment file");
}
}