Spaces:
Sleeping
Sleeping
File size: 1,237 Bytes
bcb314a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# src/merge_features.py
from pathlib import Path
import pandas as pd
from src.semantic_features import add_semantic_similarity
from src.features_q4 import q4_slot_features # <- берём функцию с признаками для Q4
ROOT = Path(__file__).resolve().parents[1]
FEAT_PATH = ROOT / "data" / "processed" / "features_baseline.csv"
OUT_PATH = ROOT / "data" / "processed" / "features_with_semantics_q4.csv"
def main():
print(f"🔹 Читаю базовые фичи: {FEAT_PATH}", flush=True)
df = pd.read_csv(FEAT_PATH, encoding="utf-8-sig")
print("🔹 Добавляю семантическую близость (ruSBERT)...", flush=True)
df = add_semantic_similarity(df, batch_size=64) # будет использовать кэш
print("🔹 Добавляю rule-based признаки для Q4...", flush=True)
df = q4_slot_features(df)
print(f"🔹 Сохраняю итог: {OUT_PATH}", flush=True)
df.to_csv(OUT_PATH, index=False, encoding="utf-8-sig")
print("✅ Готово. Превью:", flush=True)
print(df[['question_number','semantic_sim','q4_slots_covered','q4_answered_personal','score']].head(), flush=True)
if __name__ == "__main__":
main()
|