from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path @dataclass class TaxonomyModel: it_to_en: dict[str, str] orphans_it: list[str] # IT terms with no EN pair orphans_en: list[str] # EN terms with no IT pair def load_taxonomy(it_path: Path, en_path: Path) -> TaxonomyModel: it_terms = [l.strip() for l in it_path.read_text().splitlines() if l.strip()] if it_path.exists() else [] en_terms = [l.strip() for l in en_path.read_text().splitlines() if l.strip()] if en_path.exists() else [] min_len = min(len(it_terms), len(en_terms)) it_to_en = {it_terms[i]: en_terms[i] for i in range(min_len)} orphans_it = it_terms[min_len:] orphans_en = en_terms[min_len:] return TaxonomyModel(it_to_en=it_to_en, orphans_it=orphans_it, orphans_en=orphans_en) def save_taxonomy(model: TaxonomyModel, it_path: Path, en_path: Path) -> None: pairs = sorted(model.it_to_en.items(), key=lambda x: x[0]) it_path.write_text("\n".join(k for k, _ in pairs) + "\n") en_path.write_text("\n".join(v for _, v in pairs) + "\n")