summaryrefslogtreecommitdiffstats
path: root/core/taxonomy.py
diff options
context:
space:
mode:
authorDanilo M. <danix@danix.xyz>2026-05-03 09:55:27 +0200
committerDanilo M. <danix@danix.xyz>2026-05-03 09:55:27 +0200
commitcf1c58081d68fe7f8833bc32589b4feb7210165f (patch)
tree0dbeb997296640395cfe6e484812b98bfd674e76 /core/taxonomy.py
parentb4d3b526e232804e06b84d93628244cecd035df5 (diff)
downloadpublisher-cf1c58081d68fe7f8833bc32589b4feb7210165f.tar.gz
publisher-cf1c58081d68fe7f8833bc32589b4feb7210165f.zip
feat: taxonomy load/save with IT/EN pair alignment
Diffstat (limited to 'core/taxonomy.py')
-rw-r--r--core/taxonomy.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/core/taxonomy.py b/core/taxonomy.py
new file mode 100644
index 0000000..9d71a91
--- /dev/null
+++ b/core/taxonomy.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+
+@dataclass
+class TaxonomyModel:
+ it_to_en: dict[str, str]
+ orphans_it: list[str] # IT terms with no EN pair
+ orphans_en: list[str] # EN terms with no IT pair
+
+def load_taxonomy(it_path: Path, en_path: Path) -> TaxonomyModel:
+ it_terms = [l.strip() for l in it_path.read_text().splitlines() if l.strip()] if it_path.exists() else []
+ en_terms = [l.strip() for l in en_path.read_text().splitlines() if l.strip()] if en_path.exists() else []
+ min_len = min(len(it_terms), len(en_terms))
+ it_to_en = {it_terms[i]: en_terms[i] for i in range(min_len)}
+ orphans_it = it_terms[min_len:]
+ orphans_en = en_terms[min_len:]
+ return TaxonomyModel(it_to_en=it_to_en, orphans_it=orphans_it, orphans_en=orphans_en)
+
+def save_taxonomy(model: TaxonomyModel, it_path: Path, en_path: Path) -> None:
+ pairs = sorted(model.it_to_en.items(), key=lambda x: x[0])
+ it_path.write_text("\n".join(k for k, _ in pairs) + "\n")
+ en_path.write_text("\n".join(v for _, v in pairs) + "\n")