experiment with a git cache

This commit is contained in:
Marcel Märtens 2021-07-27 20:07:08 +02:00
parent c501b2eb70
commit a5696e83a9
5 changed files with 208 additions and 25 deletions

View File

@ -1,3 +1,4 @@
/*
use clap::{App, Arg};
use std::path::Path;
use veloren_i18n::{analysis, verification};
@ -60,4 +61,7 @@ fn main() {
if matches.is_present("verify") {
verification::verify_all_localizations(&root, &asset_path);
}
}
}*/
fn main() {}

View File

@ -0,0 +1,172 @@
//! fragment attached with git versioning information
use hashbrown::{HashMap};
use std::path::{Path, PathBuf};
use std::sync::RwLock;
use std::sync::Arc;
use crate::raw::{RawFragment};
struct GitCache<'a> {
pub root_dir: PathBuf,
pub blobs: RwLock<HashMap<PathBuf, Arc<git2::Blob<'a>>>>,
pub repo: git2::Repository,
//pub head_ref: git2::Reference<'a>,
}
impl<'a> GitCache<'a> {
pub fn new(root_dir: &Path) -> Self {
let repo = git2::Repository::discover(&root_dir)
.unwrap_or_else(|_| panic!("Failed to open the Git repository at {:?}", &root_dir));
//let head_ref = repo.head().expect("Impossible to get the HEAD reference");
let root_dir = root_dir.to_path_buf();
let blobs = RwLock::new(HashMap::new());
Self {
root_dir,
blobs,
repo,
//head_ref,
}
}
/// Returns the Git blob associated with the given reference and path
fn read_file_from_path(
&'a self,
reference: &git2::Reference,
path: &std::path::Path,
) -> Arc<git2::Blob<'a>> {
// return from cache
let lock = self.blobs.read().unwrap();
if let Some(blob) = lock.get(path) {
return blob.clone();
}
drop(lock);
// load file not in cache
let tree = reference
.peel_to_tree()
.expect("Impossible to peel HEAD to a tree object");
let blob = Arc::new(tree.get_path(path)
.unwrap_or_else(|_| {
panic!(
"Impossible to find the file {:?} in reference {:?}",
path,
reference.name()
)
})
.to_object(&self.repo)
.unwrap()
.peel_to_blob()
.expect("Impossible to fetch the Git object"));
let mut lock = self.blobs.write().unwrap();
let pathbuf = path.to_path_buf();
lock.insert(pathbuf, blob.clone());
blob
}
}
/*
/// Extend a Fragment with historical git data
/// The actual translation gets dropped
fn generate_key_version<'a>(
repo: &'a GitCache,
path: &Path,
fragment: RawFragment<String>,
) -> RawFragment<LocalizationEntryState> {
let file_blob = repo.read_file_from_path(path);
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
}
}*/
/*
fn generate_key_version<'a>(
repo: &'a git2::Repository,
fragment: &RawFragment<String>,
path: &std::path::Path,
file_blob: &git2::Blob,
) -> HashMap<String, LocalizationEntryState> {
let mut keys: HashMap<String, LocalizationEntryState> = localization
.string_map
.keys()
.map(|k| (k.to_owned(), LocalizationEntryState::new()))
.collect();
// Find key start lines
let file_content = std::str::from_utf8(file_blob.content()).expect("Got non UTF-8 file");
let mut to_process: HashSet<&String> = localization.string_map.keys().collect();
for (line_nb, line) in file_content.lines().enumerate() {
let mut found_key = None;
for key in to_process.iter() {
if correspond(line, key) {
found_key = Some(key.to_owned());
}
}
if let Some(key) = found_key {
keys.get_mut(key).unwrap().key_line = Some(line_nb);
to_process.remove(key);
};
}
let mut error_check_set: Vec<String> = vec![];
// Find commit for each keys
repo.blame_file(path, None)
.expect("Impossible to generate the Git blame")
.iter()
.for_each(|e: git2::BlameHunk| {
for (key, state) in keys.iter_mut() {
let line = match state.key_line {
Some(l) => l,
None => {
if !error_check_set.contains(key) {
eprintln!(
"Key {} does not have a git line in it's state! Skipping key.",
key
);
error_check_set.push(key.clone());
}
continue;
},
};
if line + 1 >= e.final_start_line()
&& line + 1 < e.final_start_line() + e.lines_in_hunk()
{
state.chuck_line_range = Some((
e.final_start_line(),
e.final_start_line() + e.lines_in_hunk(),
));
state.commit_id = match state.commit_id {
Some(existing_commit) => {
match repo.graph_descendant_of(e.final_commit_id(), existing_commit) {
Ok(true) => Some(e.final_commit_id()),
Ok(false) => Some(existing_commit),
Err(err) => panic!("{}", err),
}
},
None => Some(e.final_commit_id()),
};
}
}
});
keys
}
*/

View File

@ -1,5 +1,7 @@
#[cfg(any(feature = "bin", test))]
pub mod analysis;
pub mod gitfragments;
//#[cfg(any(feature = "bin", test))]
//pub mod analysis;
pub mod raw;
pub mod verification;
@ -113,7 +115,7 @@ impl common_assets::Compound for Language {
// inside the asked_localization
let mut fragments = HashMap::new();
for fragment_asset in cache
.load_dir::<RawFragment>(asset_key, true)?
.load_dir::<RawFragment<String>>(asset_key, true)?
.iter()
{
let read = fragment_asset.read();

View File

@ -20,14 +20,14 @@ pub(crate) struct RawManifest {
/// Raw localization data from one specific file
/// These structs are meant to be merged into a Language
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub(crate) struct RawFragment {
pub(crate) string_map: HashMap<String, String>,
pub(crate) vector_map: HashMap<String, Vec<String>>,
pub(crate) struct RawFragment<T> {
pub(crate) string_map: HashMap<String, T>,
pub(crate) vector_map: HashMap<String, Vec<T>>,
}
pub(crate) struct RawLanguage {
pub(crate) struct RawLanguage<T> {
pub(crate) manifest: RawManifest,
pub(crate) fragments: HashMap<PathBuf, RawFragment>,
pub(crate) fragments: HashMap<PathBuf, RawFragment<T>>,
}
#[derive(Debug)]
@ -44,18 +44,26 @@ pub(crate) fn load_manifest(i18n_root_path: &Path, language_identifier: &str) ->
}
/// `i18n_root_path` - absolute path to i18n path which contains `en`, `de_DE`, `fr_FR` files
pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> Result<RawLanguage, common_assets::Error> {
pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) -> Result<RawLanguage<String>, common_assets::Error> {
let language_identifier = &manifest.metadata.language_identifier;
let fragments = recursive_load_raw_language(i18n_root_path, language_identifier, Path::new(""))?;
Ok(RawLanguage{
manifest,
fragments,
})
}
fn recursive_load_raw_language(i18n_root_path: &Path, language_identifier: &str, subfolder: &Path) -> Result<HashMap<PathBuf,RawFragment<String>>, common_assets::Error> {
// Walk through each file in the directory
let mut fragments = HashMap::new();
let language_identifier = &manifest.metadata.language_identifier;
let language_dir = i18n_root_path.join(language_identifier);
for fragment_file in language_dir.read_dir().unwrap().flatten() {
let search_dir = i18n_root_path.join(language_identifier).join(subfolder);
for fragment_file in search_dir.read_dir().unwrap().flatten() {
let file_type = fragment_file.file_type()?;
if file_type.is_dir() {
// TODO: recursive
continue;
}
if file_type.is_file() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&search_dir).unwrap();
fragments.extend(recursive_load_raw_language(i18n_root_path, language_identifier, relative_path)?);
} else if file_type.is_file() {
let full_path = fragment_file.path();
let relative_path = full_path.strip_prefix(&i18n_root_path).unwrap();
let f = fs::File::open(&full_path)?;
@ -63,14 +71,11 @@ pub(crate) fn load_raw_language(i18n_root_path: &Path, manifest: RawManifest) ->
fragments.insert(relative_path.to_path_buf(), fragment);
}
}
Ok(RawLanguage{
manifest,
fragments,
})
Ok(fragments)
}
impl From<RawLanguage> for Language {
fn from(raw: RawLanguage) -> Self {
impl From<RawLanguage<String>> for Language {
fn from(raw: RawLanguage<String>) -> Self {
let mut string_map = HashMap::new();
let mut vector_map = HashMap::new();
@ -129,7 +134,7 @@ impl common_assets::Asset for RawManifest {
const EXTENSION: &'static str = LANG_EXTENSION;
}
impl common_assets::Asset for RawFragment {
impl common_assets::Asset for RawFragment<String> {
type Loader = common_assets::RonLoader;
const EXTENSION: &'static str = LANG_EXTENSION;

View File

@ -31,8 +31,8 @@ pub fn verify_all_localizations(root_dir: &Path, asset_path: &Path) {
folder is empty?"
);
for i18n_directory in i18n_directories {
let display_language_identifier = i18n_directory.strip_prefix(&root_dir).unwrap().as_os_str().to_str().unwrap();
let language_identifier = i18n_directory.strip_prefix(&i18n_root_path).unwrap().as_os_str().to_str().unwrap();
let display_language_identifier = i18n_directory.strip_prefix(&root_dir).unwrap().to_str().unwrap();
let language_identifier = i18n_directory.strip_prefix(&i18n_root_path).unwrap().to_str().unwrap();
println!(
"verifying {:?}",
display_language_identifier