veloren/voxygen/i18n/src/lib.rs

511 lines
16 KiB
Rust
Raw Normal View History

mod error;
2021-07-29 09:29:15 +00:00
mod raw;
use error::ResourceErr;
#[cfg(any(feature = "bin", feature = "stat"))]
pub mod analysis;
use fluent_bundle::{bundle::FluentBundle, FluentResource};
use intl_memoizer::concurrent::IntlLangMemoizer;
use unic_langid::LanguageIdentifier;
2021-07-29 09:29:15 +00:00
2022-08-18 21:09:39 +00:00
use deunicode::deunicode;
use hashbrown::HashMap;
2021-07-23 11:32:00 +00:00
use serde::{Deserialize, Serialize};
use std::{borrow::Cow, io};
use assets::{source::DirEntry, AssetExt, AssetGuard, AssetHandle, ReloadWatcher};
2021-07-23 11:32:00 +00:00
use tracing::warn;
// Re-export because I don't like prefix
use common_assets as assets;
// Re-export for argument creation
pub use fluent::fluent_args;
pub use fluent_bundle::FluentArgs;
2021-07-23 11:32:00 +00:00
/// The reference language, aka the more up-to-date localization data.
/// Also the default language at first startup.
pub const REFERENCE_LANG: &str = "en";
/// How a language can be described
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct LanguageMetadata {
/// A human friendly language name (e.g. "English (US)")
pub language_name: String,
/// A short text identifier for this language (e.g. "en_US")
///
/// On the opposite of `language_name` that can change freely,
/// `language_identifier` value shall be stable in time as it
/// is used by setting components to store the language
/// selected by the user.
pub language_identifier: String,
}
/// Store font metadata
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Font {
/// Key to retrieve the font in the asset system
pub asset_key: String,
/// Scale ratio to resize the UI text dynamically
scale_ratio: f32,
}
impl Font {
/// Scale input size to final UI size
#[must_use]
2021-07-23 11:32:00 +00:00
pub fn scale(&self, value: u32) -> u32 { (value as f32 * self.scale_ratio).round() as u32 }
}
/// Store font metadata
pub type Fonts = HashMap<String, Font>;
/// Store internationalization data
struct Language {
/// The bundle storing all localized texts
pub(crate) bundle: FluentBundle<FluentResource, IntlLangMemoizer>,
2021-07-23 11:32:00 +00:00
/// Font configuration is stored here
pub(crate) fonts: Fonts,
pub(crate) metadata: LanguageMetadata,
}
impl Language {
2022-08-07 12:04:32 +00:00
fn try_msg<'a>(&'a self, key: &str, args: Option<&'a FluentArgs>) -> Option<Cow<'a, str>> {
let bundle = &self.bundle;
let msg = bundle.get_message(key)?;
let mut errs = Vec::new();
let msg = bundle.format_pattern(msg.value()?, args, &mut errs);
for err in errs {
tracing::error!("err: {err} for {key}");
}
2021-07-23 11:32:00 +00:00
Some(msg)
}
fn try_variation<'a>(
&'a self,
key: &str,
seed: u16,
args: Option<&'a FluentArgs>,
) -> Option<Cow<'a, str>> {
let bundle = &self.bundle;
let msg = bundle.get_message(key)?;
let mut attrs = msg.attributes();
if attrs.len() != 0 {
let idx = usize::from(seed) % attrs.len();
// unwrap is ok here, because idx is bound to attrs.len()
// by using modulo operator.
//
// For example:
// (I)
// * attributes = [.x = 5, .y = 7, z. = 4]
// * len = 3
// * seed can be 12, 50, 1
// 12 % 3 = 0, attrs.skip(0) => first element
// 50 % 3 = 2, attrs.skip(2) => third element
// 1 % 3 = 1, attrs.skip(1) => second element
// (II)
// * attributes = []
// * len = 0
// * no matter what seed is, we return None in code above
let variation = attrs.nth(idx).unwrap();
let mut errs = Vec::new();
let msg = bundle.format_pattern(variation.value(), args, &mut errs);
for err in errs {
tracing::error!("err: {err} for {key}");
}
Some(msg)
} else {
None
}
2021-07-23 11:32:00 +00:00
}
}
impl assets::Compound for Language {
fn load(cache: assets::AnyCache, path: &str) -> Result<Self, assets::BoxedError> {
2021-07-23 11:32:00 +00:00
let manifest = cache
.load::<raw::Manifest>(&[path, ".", "_manifest"].concat())?
2021-07-23 11:32:00 +00:00
.cloned();
let raw::Manifest {
convert_utf8_to_ascii,
fonts,
metadata,
} = manifest;
let lang_id: LanguageIdentifier = metadata.language_identifier.parse()?;
let mut bundle = FluentBundle::new_concurrent(vec![lang_id]);
// Here go dragons
for id in cache.load_dir::<raw::Resource>(path, true)?.ids() {
if id.ends_with("_manifest") {
continue;
}
match cache.load(id) {
Ok(handle) => {
let source: &raw::Resource = &*handle.read();
2022-08-18 21:09:39 +00:00
let src = source.src.clone();
let src = if convert_utf8_to_ascii {
deunicode(&src)
} else {
src
};
let resource = FluentResource::try_new(src).map_err(|(_ast, errs)| {
ResourceErr::parsing_error(errs, id.to_owned(), &source.src)
})?;
bundle
.add_resource(resource)
.map_err(|e| ResourceErr::BundleError(format!("{e:?}")))?;
},
Err(err) => {
// TODO: shouldn't we just panic here?
warn!("Unable to load asset {id}, error={err:?}");
},
}
2021-07-23 11:32:00 +00:00
}
Ok(Self {
bundle,
fonts,
metadata,
})
2021-07-23 11:32:00 +00:00
}
}
/// The central data structure to handle localization in Veloren
// inherit Copy + Clone from AssetHandle (what?)
#[derive(Copy, Clone)]
2021-07-23 11:32:00 +00:00
pub struct LocalizationHandle {
active: AssetHandle<Language>,
2021-12-12 19:01:52 +00:00
watcher: ReloadWatcher,
2021-07-23 11:32:00 +00:00
fallback: Option<AssetHandle<Language>>,
pub use_english_fallback: bool,
}
2022-08-07 15:09:30 +00:00
/// Read [LocalizationGuard]
// arbitrary choice to minimize changing all of veloren
pub type Localization = LocalizationGuard;
2022-08-07 15:09:30 +00:00
/// RAII guard returned from [LocalizationHandle::read()], resembles
/// [AssetGuard]
2021-07-23 11:32:00 +00:00
pub struct LocalizationGuard {
active: AssetGuard<Language>,
fallback: Option<AssetGuard<Language>>,
}
impl LocalizationGuard {
2022-08-07 12:04:32 +00:00
/// !!!DEPRECATED!!!
///
/// Get a localized text from the given key
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
/// If the key is not present in the localization object
/// then the key itself is returned.
///
2022-08-07 12:04:32 +00:00
/// # NOTE:
/// This function shouldn't be used in new code.
/// It is kept for compatibility with old code that uses
/// old style dot-separated keys and this function internally
/// replaces them with dashes.
// FIXME (i18n old style keys):
// this is deprecated, fix all usages of this asap
pub fn get(&self, key: &str) -> Cow<str> {
// Fluent uses `-` as informal separator, while in the past with our
// RON based system we used `.` for that purpose.
self.get_msg(&key.replace('.', "-"))
}
2021-07-23 11:32:00 +00:00
/// Get a localized text from the given key
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
pub fn try_msg(&self, key: &str) -> Option<Cow<str>> {
self.active
.try_msg(key, None)
.or_else(|| self.fallback.as_ref().and_then(|fb| fb.try_msg(key, None)))
}
/// Get a localized text from the given key
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
/// If the key is not present in the localization object
/// then the key itself is returned.
pub fn get_msg(&self, key: &str) -> Cow<str> {
// NOTE: we clone the key if translation was missing
// We could use borrowed version, but it would mean that
// `key`, `self`, and result should have the same lifetime.
2022-08-07 15:09:30 +00:00
// Which would make it way more awkward to use with runtime generated keys.
self.try_msg(key)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
2021-07-23 11:32:00 +00:00
}
/// Get a localized text from the given key using given arguments
2021-07-23 11:32:00 +00:00
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
pub fn try_msg_ctx<'a>(&'a self, key: &str, args: &'a FluentArgs) -> Option<Cow<'static, str>> {
// NOTE: as after using args we get our result owned (because you need
// to clone pattern during forming value from args), this conversion
2022-08-07 12:04:32 +00:00
// to Cow::Owned is no-op.
// We could use String here, but using Cow everywhere in i18n API is
// prefered for consistency.
self.active
.try_msg(key, Some(args))
.or_else(|| {
self.fallback
.as_ref()
.and_then(|fb| fb.try_msg(key, Some(args)))
})
.map(|x| {
// NOTE:
// Hack. Remove Unicode Directionality Marks, conrod doesn't support them.
let res = x.replace('\u{2068}', "").replace('\u{2069}', "");
Cow::Owned(res)
})
}
/// Get a localized text from the given key using given arguments
2021-07-23 11:32:00 +00:00
///
/// First lookup is done in the active language, second in
/// the fallback (if present).
2021-07-23 11:32:00 +00:00
/// If the key is not present in the localization object
/// then the key itself is returned.
pub fn get_msg_ctx<'a>(&'a self, key: &str, args: &'a FluentArgs) -> Cow<'static, str> {
self.try_msg_ctx(key, args)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
}
pub fn try_variation(&self, key: &str, seed: u16) -> Option<Cow<str>> {
self.active.try_variation(key, seed, None).or_else(|| {
2021-07-23 11:32:00 +00:00
self.fallback
.as_ref()
.and_then(|fb| fb.try_variation(key, seed, None))
2021-07-23 11:32:00 +00:00
})
}
pub fn get_variation(&self, key: &str, seed: u16) -> Cow<str> {
self.try_variation(key, seed)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
2021-07-23 11:32:00 +00:00
}
pub fn try_variation_ctx<'a>(
&'a self,
key: &str,
seed: u16,
args: &'a FluentArgs,
) -> Option<Cow<str>> {
self.active
.try_variation(key, seed, Some(args))
.or_else(|| {
self.fallback
.as_ref()
.and_then(|fb| fb.try_variation(key, seed, Some(args)))
})
.map(|x| {
// NOTE:
// Hack. Remove Unicode Directionality Marks, conrod doesn't support them.
let res = x.replace('\u{2068}', "").replace('\u{2069}', "");
Cow::Owned(res)
})
}
pub fn get_variation_ctx<'a>(&'a self, key: &str, seed: u16, args: &'a FluentArgs) -> Cow<str> {
self.try_variation_ctx(key, seed, args)
.unwrap_or_else(|| Cow::Owned(key.to_owned()))
2021-07-23 11:32:00 +00:00
}
#[must_use]
2021-07-23 11:32:00 +00:00
pub fn fonts(&self) -> &Fonts { &self.active.fonts }
#[must_use]
2021-07-23 11:32:00 +00:00
pub fn metadata(&self) -> &LanguageMetadata { &self.active.metadata }
}
impl LocalizationHandle {
pub fn set_english_fallback(&mut self, use_english_fallback: bool) {
self.use_english_fallback = use_english_fallback;
}
#[must_use]
2021-07-23 11:32:00 +00:00
pub fn read(&self) -> LocalizationGuard {
LocalizationGuard {
active: self.active.read(),
fallback: if self.use_english_fallback {
self.fallback.map(|f| f.read())
} else {
None
},
}
}
/// # Errors
2022-08-07 15:09:30 +00:00
/// Returns error if active of fallback language can't be loaded
pub fn load(specifier: &str) -> Result<Self, assets::Error> {
2021-07-23 11:32:00 +00:00
let default_key = ["voxygen.i18n.", REFERENCE_LANG].concat();
let language_key = ["voxygen.i18n.", specifier].concat();
let is_default = language_key == default_key;
2021-12-12 19:01:52 +00:00
let active = Language::load(&language_key)?;
2021-07-23 11:32:00 +00:00
Ok(Self {
2021-12-12 19:01:52 +00:00
active,
watcher: active.reload_watcher(),
2021-07-23 11:32:00 +00:00
fallback: if is_default {
None
} else {
Some(Language::load(&default_key)?)
2021-07-23 11:32:00 +00:00
},
use_english_fallback: false,
})
}
#[must_use]
2021-07-23 11:32:00 +00:00
pub fn load_expect(specifier: &str) -> Self {
Self::load(specifier).expect("Can't load language files")
}
2021-12-12 19:01:52 +00:00
pub fn reloaded(&mut self) -> bool { self.watcher.reloaded() }
2021-07-23 11:32:00 +00:00
}
struct FindManifests;
impl assets::DirLoadable for FindManifests {
fn select_ids<S: assets::Source + ?Sized>(
2021-07-23 11:32:00 +00:00
source: &S,
specifier: &str,
) -> io::Result<Vec<assets::SharedString>> {
2021-07-23 11:32:00 +00:00
let mut specifiers = Vec::new();
source.read_dir(specifier, &mut |entry| {
if let DirEntry::Directory(spec) = entry {
let manifest_spec = [spec, ".", "_manifest"].concat();
if source.exists(DirEntry::File(&manifest_spec, "ron")) {
2021-07-23 11:32:00 +00:00
specifiers.push(manifest_spec.into());
}
}
})?;
Ok(specifiers)
}
}
#[derive(Clone, Debug)]
struct LocalizationList(Vec<LanguageMetadata>);
impl assets::Compound for LocalizationList {
fn load(cache: assets::AnyCache, specifier: &str) -> Result<Self, assets::BoxedError> {
2021-07-23 11:32:00 +00:00
// List language directories
let languages = assets::load_dir::<FindManifests>(specifier, false)
2021-07-23 11:32:00 +00:00
.unwrap_or_else(|e| panic!("Failed to get manifests from {}: {:?}", specifier, e))
.ids()
.filter_map(|spec| cache.load::<raw::Manifest>(spec).ok())
2021-07-23 11:32:00 +00:00
.map(|localization| localization.read().metadata.clone())
.collect();
Ok(LocalizationList(languages))
}
}
/// Load all the available languages located in the voxygen asset directory
#[must_use]
2021-07-23 11:32:00 +00:00
pub fn list_localizations() -> Vec<LanguageMetadata> {
let LocalizationList(list) = LocalizationList::load_expect_cloned("voxygen.i18n");
list
2021-07-23 11:32:00 +00:00
}
#[cfg(test)]
mod tests {
use super::*;
2021-07-23 11:32:00 +00:00
#[test]
2022-08-07 12:04:32 +00:00
// Test that localization list is loaded (not empty)
fn check_localization_list() {
let list = list_localizations();
2021-07-23 11:32:00 +00:00
assert!(!list.is_empty());
}
#[test]
2022-08-07 12:04:32 +00:00
// Test that reference language can be loaded
fn validate_reference_language() { let _ = LocalizationHandle::load_expect(REFERENCE_LANG); }
2021-07-23 11:32:00 +00:00
#[test]
2022-08-07 12:04:32 +00:00
// Test to verify that all languages are valid and loadable
fn validate_all_localizations() {
let list = list_localizations();
for meta in list {
let _ = LocalizationHandle::load_expect(&meta.language_identifier);
}
2021-07-23 11:32:00 +00:00
}
#[test]
#[ignore]
#[cfg(feature = "stat")]
// Generate translation stats
2021-07-23 11:32:00 +00:00
fn test_all_localizations() {
use analysis::{Language, ReferenceLanguage};
use assets::find_root;
use std::{fs, io::Write, path::Path};
let root = find_root().unwrap();
let output = root.join("translation_analysis.csv");
let mut f = fs::File::create(output).expect("couldn't write csv file");
writeln!(
f,
"country_code,file_name,translation_key,status,git_commit"
)
.unwrap();
let i18n_directory = root.join("assets/voxygen/i18n");
let reference = ReferenceLanguage::at(&i18n_directory.join(REFERENCE_LANG));
let list = list_localizations();
let file = |filename: Option<String>| {
let file = filename
.as_ref()
.map(|s| Path::new(s))
.and_then(|p| p.file_name())
.and_then(|s| s.to_str())
.unwrap_or("None");
format!("{file}")
};
for meta in list {
let code = meta.language_identifier;
let lang = Language {
code: code.clone(),
path: i18n_directory.join(code.clone()),
};
let stats = reference.compare_with(&lang);
for key in stats.up_to_date {
let code = &code;
let filename = &file(key.file);
let key = &key.key;
writeln!(f, "{code},{filename},{key},UpToDate,None").unwrap();
}
for key in stats.not_found {
let code = &code;
let filename = &file(key.file);
let key = &key.key;
writeln!(f, "{code},{filename},{key},NotFound,None").unwrap();
}
for key in stats.unused {
let code = &code;
let filename = &file(key.file);
let key = &key.key;
writeln!(f, "{code},{filename},{key},Unused,None").unwrap();
}
}
2021-07-23 11:32:00 +00:00
}
}