From: Danilo M. Date: Sun, 3 May 2026 07:55:27 +0000 (+0200) Subject: feat: taxonomy load/save with IT/EN pair alignment X-Git-Tag: v1.0~27 X-Git-Url: https://git.danix.xyz/?a=commitdiff_plain;h=cf1c58081d68fe7f8833bc32589b4feb7210165f;p=publisher.git feat: taxonomy load/save with IT/EN pair alignment --- diff --git a/core/taxonomy.py b/core/taxonomy.py new file mode 100644 index 0000000..9d71a91 --- /dev/null +++ b/core/taxonomy.py @@ -0,0 +1,23 @@ +from __future__ import annotations +from dataclasses import dataclass, field +from pathlib import Path + +@dataclass +class TaxonomyModel: + it_to_en: dict[str, str] + orphans_it: list[str] # IT terms with no EN pair + orphans_en: list[str] # EN terms with no IT pair + +def load_taxonomy(it_path: Path, en_path: Path) -> TaxonomyModel: + it_terms = [l.strip() for l in it_path.read_text().splitlines() if l.strip()] if it_path.exists() else [] + en_terms = [l.strip() for l in en_path.read_text().splitlines() if l.strip()] if en_path.exists() else [] + min_len = min(len(it_terms), len(en_terms)) + it_to_en = {it_terms[i]: en_terms[i] for i in range(min_len)} + orphans_it = it_terms[min_len:] + orphans_en = en_terms[min_len:] + return TaxonomyModel(it_to_en=it_to_en, orphans_it=orphans_it, orphans_en=orphans_en) + +def save_taxonomy(model: TaxonomyModel, it_path: Path, en_path: Path) -> None: + pairs = sorted(model.it_to_en.items(), key=lambda x: x[0]) + it_path.write_text("\n".join(k for k, _ in pairs) + "\n") + en_path.write_text("\n".join(v for _, v in pairs) + "\n") diff --git a/tests/test_taxonomy.py b/tests/test_taxonomy.py new file mode 100644 index 0000000..a10531b --- /dev/null +++ b/tests/test_taxonomy.py @@ -0,0 +1,43 @@ +# tests/test_taxonomy.py +import pytest +from pathlib import Path +from core.taxonomy import TaxonomyModel, load_taxonomy, save_taxonomy + +def _write_pair(tmp_path, it_lines, en_lines): + it_file = tmp_path / "tags-it.txt" + en_file = tmp_path / "tags-en.txt" + it_file.write_text("\n".join(it_lines) + "\n") + en_file.write_text("\n".join(en_lines) + "\n") + return it_file, en_file + +def test_load_taxonomy_basic(tmp_path): + it_f, en_f = _write_pair(tmp_path, ["linux", "vita"], ["linux", "life"]) + model = load_taxonomy(it_f, en_f) + assert model.it_to_en["linux"] == "linux" + assert model.it_to_en["vita"] == "life" + +def test_load_taxonomy_orphan_it(tmp_path): + it_f, en_f = _write_pair(tmp_path, ["linux", "vita", "extra"], ["linux", "life"]) + model = load_taxonomy(it_f, en_f) + assert "extra" in model.orphans_it + +def test_load_taxonomy_orphan_en(tmp_path): + it_f, en_f = _write_pair(tmp_path, ["linux"], ["linux", "extra-en"]) + model = load_taxonomy(it_f, en_f) + assert "extra-en" in model.orphans_en + +def test_save_taxonomy_round_trips(tmp_path): + it_f, en_f = _write_pair(tmp_path, ["linux"], ["linux"]) + model = load_taxonomy(it_f, en_f) + model.it_to_en["vita"] = "life" + save_taxonomy(model, it_f, en_f) + model2 = load_taxonomy(it_f, en_f) + assert model2.it_to_en["vita"] == "life" + +def test_save_taxonomy_sorted(tmp_path): + it_f, en_f = _write_pair(tmp_path, [], []) + from core.taxonomy import TaxonomyModel + model = TaxonomyModel(it_to_en={"zzz": "zzz", "aaa": "aaa"}, orphans_it=[], orphans_en=[]) + save_taxonomy(model, it_f, en_f) + lines = it_f.read_text().splitlines() + assert lines == sorted(lines)