summaryrefslogtreecommitdiffstats
path: root/core/taxonomy.py
blob: e72d4866b61572dc3f315e02e43fbb2a060f5edc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path

@dataclass
class TaxonomyModel:
    it_to_en: dict[str, str]
    orphans_it: list[str]   # IT terms with no EN pair
    orphans_en: list[str]   # EN terms with no IT pair

def load_taxonomy(it_path: Path, en_path: Path) -> TaxonomyModel:
    it_terms = [l.strip() for l in it_path.read_text().splitlines() if l.strip()] if it_path.exists() else []
    en_terms = [l.strip() for l in en_path.read_text().splitlines() if l.strip()] if en_path.exists() else []
    min_len = min(len(it_terms), len(en_terms))
    it_to_en = {it_terms[i]: en_terms[i] for i in range(min_len)}
    orphans_it = it_terms[min_len:]
    orphans_en = en_terms[min_len:]
    return TaxonomyModel(it_to_en=it_to_en, orphans_it=orphans_it, orphans_en=orphans_en)

def save_taxonomy(model: TaxonomyModel, it_path: Path, en_path: Path) -> None:
    """Write paired terms to disk, sorted by IT term. Orphans are NOT written — callers must resolve them first."""
    pairs = sorted(model.it_to_en.items(), key=lambda x: x[0])
    it_path.write_text("\n".join(k for k, _ in pairs) + "\n")
    en_path.write_text("\n".join(v for _, v in pairs) + "\n")

def load_categories(path: Path) -> list[str]:
    if not path.exists():
        return []
    return sorted(l.strip() for l in path.read_text().splitlines() if l.strip())

def save_categories(path: Path, categories: list[str]) -> None:
    path.write_text("\n".join(sorted(categories)) + "\n")