]> danix's work - publisher.git/commitdiff
feat: taxonomy load/save with IT/EN pair alignment
authorDanilo M. <redacted>
Sun, 3 May 2026 07:55:27 +0000 (09:55 +0200)
committerDanilo M. <redacted>
Sun, 3 May 2026 07:55:27 +0000 (09:55 +0200)
core/taxonomy.py [new file with mode: 0644]
tests/test_taxonomy.py [new file with mode: 0644]

diff --git a/core/taxonomy.py b/core/taxonomy.py
new file mode 100644 (file)
index 0000000..9d71a91
--- /dev/null
@@ -0,0 +1,23 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+
+@dataclass
+class TaxonomyModel:
+    it_to_en: dict[str, str]
+    orphans_it: list[str]   # IT terms with no EN pair
+    orphans_en: list[str]   # EN terms with no IT pair
+
+def load_taxonomy(it_path: Path, en_path: Path) -> TaxonomyModel:
+    it_terms = [l.strip() for l in it_path.read_text().splitlines() if l.strip()] if it_path.exists() else []
+    en_terms = [l.strip() for l in en_path.read_text().splitlines() if l.strip()] if en_path.exists() else []
+    min_len = min(len(it_terms), len(en_terms))
+    it_to_en = {it_terms[i]: en_terms[i] for i in range(min_len)}
+    orphans_it = it_terms[min_len:]
+    orphans_en = en_terms[min_len:]
+    return TaxonomyModel(it_to_en=it_to_en, orphans_it=orphans_it, orphans_en=orphans_en)
+
+def save_taxonomy(model: TaxonomyModel, it_path: Path, en_path: Path) -> None:
+    pairs = sorted(model.it_to_en.items(), key=lambda x: x[0])
+    it_path.write_text("\n".join(k for k, _ in pairs) + "\n")
+    en_path.write_text("\n".join(v for _, v in pairs) + "\n")
diff --git a/tests/test_taxonomy.py b/tests/test_taxonomy.py
new file mode 100644 (file)
index 0000000..a10531b
--- /dev/null
@@ -0,0 +1,43 @@
+# tests/test_taxonomy.py
+import pytest
+from pathlib import Path
+from core.taxonomy import TaxonomyModel, load_taxonomy, save_taxonomy
+
+def _write_pair(tmp_path, it_lines, en_lines):
+    it_file = tmp_path / "tags-it.txt"
+    en_file = tmp_path / "tags-en.txt"
+    it_file.write_text("\n".join(it_lines) + "\n")
+    en_file.write_text("\n".join(en_lines) + "\n")
+    return it_file, en_file
+
+def test_load_taxonomy_basic(tmp_path):
+    it_f, en_f = _write_pair(tmp_path, ["linux", "vita"], ["linux", "life"])
+    model = load_taxonomy(it_f, en_f)
+    assert model.it_to_en["linux"] == "linux"
+    assert model.it_to_en["vita"] == "life"
+
+def test_load_taxonomy_orphan_it(tmp_path):
+    it_f, en_f = _write_pair(tmp_path, ["linux", "vita", "extra"], ["linux", "life"])
+    model = load_taxonomy(it_f, en_f)
+    assert "extra" in model.orphans_it
+
+def test_load_taxonomy_orphan_en(tmp_path):
+    it_f, en_f = _write_pair(tmp_path, ["linux"], ["linux", "extra-en"])
+    model = load_taxonomy(it_f, en_f)
+    assert "extra-en" in model.orphans_en
+
+def test_save_taxonomy_round_trips(tmp_path):
+    it_f, en_f = _write_pair(tmp_path, ["linux"], ["linux"])
+    model = load_taxonomy(it_f, en_f)
+    model.it_to_en["vita"] = "life"
+    save_taxonomy(model, it_f, en_f)
+    model2 = load_taxonomy(it_f, en_f)
+    assert model2.it_to_en["vita"] == "life"
+
+def test_save_taxonomy_sorted(tmp_path):
+    it_f, en_f = _write_pair(tmp_path, [], [])
+    from core.taxonomy import TaxonomyModel
+    model = TaxonomyModel(it_to_en={"zzz": "zzz", "aaa": "aaa"}, orphans_it=[], orphans_en=[])
+    save_taxonomy(model, it_f, en_f)
+    lines = it_f.read_text().splitlines()
+    assert lines == sorted(lines)