Spaces:

devusman
/

analysis_tool

Sleeping

File size: 16,598 Bytes

6058f1c
 
 
42693f7
05d3a8d
42693f7
 
05d3a8d
6058f1c
05d3a8d
 
 
 
42693f7
7abf422
6058f1c
7abf422
6058f1c
 
42693f7
7abf422
6058f1c
7abf422
e90e953
 
 
 
 
 
 
 
 
6058f1c
 
e90e953
 
7abf422
5a3f6ab
 
7abf422
e8fa023
7abf422
 
e8fa023
 
 
 
 
 
 
5a3f6ab
7abf422
05d3a8d
 
7abf422
 
 
6058f1c
7abf422
96e4672
31edf0b
05d3a8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42693f7
 
7abf422
6058f1c
7abf422
6058f1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699c193
6058f1c
 
 
 
 
05d3a8d
6058f1c
 
 
 
e8fa023
96e4672
05d3a8d
6058f1c
 
e8fa023
6058f1c
e8fa023
6058f1c
 
05d3a8d
 
 
6058f1c
 
7abf422
05d3a8d
6058f1c
 
 
 
 
 
 
 
 
 
 
 
05d3a8d
 
 
6058f1c
05d3a8d
 
 
 
 
 
6058f1c
 
05d3a8d
 
6058f1c
 
 
e8fa023
 
6058f1c
05d3a8d
e8fa023
05d3a8d
6058f1c
 
05d3a8d
6058f1c
 
 
 
 
 
 
 
e8fa023
6058f1c
 
 
e8fa023
6058f1c
e8fa023
 
6058f1c
 
e8fa023
 
6058f1c
e8fa023
6058f1c
 
 
 
699c193
e8fa023
 
6058f1c
e8fa023
6058f1c
 
 
96e4672
699c193
05d3a8d
6058f1c
 
 
 
4f5a1e9
6058f1c
e8fa023
6058f1c
05d3a8d
e8fa023
96e4672
05d3a8d
 
31edf0b
6058f1c
 
 
 
 
 
e8fa023
6058f1c
05d3a8d
7abf422
6058f1c
e8fa023
05d3a8d
6058f1c
05d3a8d
4f5a1e9
6058f1c
e8fa023
 
6058f1c
e8fa023
6058f1c
 
 
 
 
 
 
e8fa023
 
6058f1c
e8fa023
6058f1c
 
7abf422
e8fa023
 
 
4f5a1e9
7abf422
6058f1c
7abf422
6058f1c
 
 
42693f7
 
e8fa023
7abf422
6058f1c
 
 
 
7abf422
 
42693f7
96e4672
6058f1c
e8fa023
05d3a8d
 
42693f7
7abf422
 
8153da1
7abf422
6058f1c
 
8153da1
96e4672
699c193
6058f1c
 
 
 
42693f7
e8fa023
7abf422
e8fa023
 
 
 
 
 
 
 
4f5a1e9
6058f1c
 
 
 
7abf422
 
 
 
e8fa023
7abf422
6058f1c
 
 
7abf422
8153da1
96e4672
6058f1c
 
4f5a1e9
6058f1c
7abf422
4f5a1e9
96e4672
7abf422
4f5a1e9
699c193
96e4672
42693f7
 
6058f1c
4f5a1e9
8153da1
4f5a1e9
 
7abf422
6058f1c

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import traceback
from flask import Flask, request, jsonify
from flask_cors import CORS

# Try to import spaCy lazily and handle missing models gracefully
try:
    import spacy
except Exception:
    spacy = None

# ------------------------------
# Config
# ------------------------------
MAX_SENTENCE_LENGTH = 2000  # characters, to avoid huge inputs
SUBORD_DEPS = {"acl:relcl", "advcl", "ccomp", "csubj", "xcomp", "acl", "parataxis"}

# ------------------------------
# Utility dictionaries (Italian)
# ------------------------------
SPIEGAZIONI_POS_IT = {
    "ADJ": "Aggettivo", "ADP": "Preposizione", "ADV": "Avverbio", "AUX": "Ausiliare",
    "CONJ": "Congiunzione", "CCONJ": "Congiunzione Coordinante", "SCONJ": "Congiunzione Subordinante",
    "DET": "Determinante", "INTJ": "Interiezione", "NOUN": "Sostantivo", "NUM": "Numerale",
    "PART": "Particella", "PRON": "Pronome", "PROPN": "Nome Proprio", "PUNCT": "Punteggiatura",
    "SPACE": "Spazio", "SYM": "Simbolo", "VERB": "Verbo", "X": "Altro",
}

SPIEGAZIONI_ENT_IT = {
    "PER": "Persona", "LOC": "Luogo", "ORG": "Organizzazione", "MISC": "Miscellanea",
    # spaCy uses many possible entity labels depending on model/lang — fallback to label itself later
}

KEY_MAP = {
    "Gender": "Genere", "Number": "Numero", "Mood": "Modo", "Tense": "Tempo",
    "Person": "Persona", "VerbForm": "Forma del Verbo", "PronType": "Tipo di Pronome",
    "Clitic": "Clitico", "Definite": "Definitezza", "Degree": "Grado",
    "Case": "Caso", "Poss": "Possessivo", "Reflex": "Riflessivo", "Aspect": "Aspetto", "Voice": "Voce",
}
VALUE_MAP = {
    "Masc": "Maschile", "Fem": "Femminile", "Sing": "Singolare", "Plur": "Plurale", "Cnd": "Condizionale",
    "Sub": "Congiuntivo", "Ind": "Indicativo", "Imp": "Imperfetto", "Inf": "Infinito", "Part": "Participio",
    "Ger": "Gerundio", "Fin": "Finita", "Pres": "Presente", "Past": "Passato", "Fut": "Futuro", "Pqp": "Trapassato",
    "1": "1ª", "2": "2ª", "3": "3ª", "Prs": "Personale", "Rel": "Relativo", "Int": "Interrogativo", "Dem": "Dimostrativo",
    "Art": "Articolativo", "Yes": "Sì", "No": "No", "Def": "Determinato", "Indef": "Indefinito", "Abs": "Assoluto",
    "Cmp": "Comparativo", "Sup": "Superlativo", "Nom": "Nominativo", "Acc": "Accusativo", "Gen": "Genitivo",
    "Dat": "Dativo", "Perf": "Perfetto", "Prog": "Progressivo", "Act": "Attiva", "Pass": "Passiva",
}
PAIR_VALUE_MAP = {
    ("Mood", "Imp"): "Imperativo", ("Tense", "Imp"): "Imperfetto",
    ("Mood", "Ind"): "Indicativo", ("Definite", "Ind"): "Indeterminato",
}

# ------------------------------
# Map dependency labels to Italian labels and explanations
# ------------------------------
MAPPA_DEP = {
    "nsubj": {"label": "Soggetto", "description": "Indica chi o cosa compie l'azione o si trova in un certo stato."},
    "nsubj:pass": {"label": "Soggetto (Passivo)", "description": "Soggetto di una frase in forma passiva."},
    "ROOT": {"label": "Predicato Verbale", "description": "Esprime l'azione, l'esistenza o lo stato del soggetto."},
    "obj": {"label": "Complemento Oggetto", "description": "Indica l'oggetto diretto dell'azione. Risponde alla domanda: chi? / che cosa?"},
    "iobj": {"label": "Complemento di Termine", "description": "Indica a chi o a cosa è destinata l'azione. Risponde alla domanda: a chi? / a che cosa?"},
    "obl": {"label": "Complemento Indiretto", "description": "Fornisce informazioni aggiuntive (luogo, tempo, modo, causa, ecc.)."},
    "obl:agent": {"label": "Complemento d'Agente", "description": "Indica chi compie l'azione in una frase passiva. Risponde alla domanda: da chi?"},
    "nmod": {"label": "Complemento di Specificazione", "description": "Specifica o definisce un altro nome. Risponde alla domanda: di chi? / di che cosa?"},
    "amod": {"label": "Attributo", "description": "Aggettivo che qualifica o descrive un nome a cui si riferisce."},
    "advmod": {"label": "Complemento Avverbiale", "description": "Modifica o precisa il significato di un verbo, aggettivo o altro avverbio."},
    "appos": {"label": "Apposizione", "description": "Sostantivo che si affianca a un altro per meglio identificarlo."},
    "acl:relcl": {"label": "Proposizione Subordinata Relativa", "description": "Frase introdotta da un pronome relativo che espande un nome."},
    "advcl": {"label": "Proposizione Subordinata Avverbiale", "description": "Frase che funziona come un complemento avverbiale per la principale."},
    "ccomp": {"label": "Proposizione Subordinata Oggettiva", "description": "Frase che funge da complemento oggetto del verbo della principale."},
    "csubj": {"label": "Proposizione Subordinata Soggettiva", "description": "Frase che funge da soggetto del verbo della principale."},
    "xcomp": {"label": "Complemento Predicativo", "description": "Completa il significato del verbo riferendosi al soggetto o all'oggetto."},
    "conj": {"label": "Elemento Coordinato", "description": "Elemento collegato a un altro con la stessa funzione logica."},
    "cc": {"label": "Congiunzione Coordinante", "description": "Congiunzione (es. e, ma, o) che collega elementi con la stessa funzione."},
    "cop": {"label": "Copula", "description": "Verbo 'essere' che collega il soggetto a un nome o aggettivo (parte nominale)."},
}

# ------------------------------
# Model load helper (non-blocking)
# ------------------------------
def load_it_model():
    """
    Try to load an Italian spaCy model in order of quality.
    Returns (nlp, model_name, error_message) where nlp may be None.
    """
    if spacy is None:
        return None, None, "La libreria spaCy non è installata. Esegui: pip install spacy"

    candidates = ["it_core_news_lg", "it_core_news_md", "it_core_news_sm"]
    last_err = None
    for name in candidates:
        try:
            nlp = spacy.load(name)
            return nlp, name, None
        except Exception as e:
            last_err = e
    suggestion = (
        "Impossibile caricare un modello italiano spaCy. "
        "Installa almeno uno tra: it_core_news_lg / it_core_news_md / it_core_news_sm.\n"
        "Esempio: python -m spacy download it_core_news_lg\n"
        f"Dettagli ultimo errore: {last_err}"
    )
    return None, None, suggestion

nlp, IT_MODEL, MODEL_LOAD_ERROR = load_it_model()

# ------------------------------
# Small helper converters
# ------------------------------
def spiega_in_italiano(tag, tipo='pos'):
    if tipo == 'pos':
        return SPIEGAZIONI_POS_IT.get(tag, tag)
    if tipo == 'ent':
        return SPIEGAZIONI_ENT_IT.get(tag, tag)
    return tag

def traduci_morfologia(morph_str: str) -> str:
    if not morph_str or morph_str == "___":
        return "Non disponibile"
    parti = morph_str.split('|')
    parti_tradotte = []
    for parte in parti:
        if '=' not in parte:
            continue
        chiave, valore = parte.split('=', 1)
        chiave_trad = KEY_MAP.get(chiave, chiave)
        valore_trad = PAIR_VALUE_MAP.get((chiave, valore), VALUE_MAP.get(valore, valore))
        parti_tradotte.append(f"{chiave_trad}: {valore_trad}")
    return ", ".join(parti_tradotte) or "Non disponibile"

def ottieni_tipo_complemento_con_dettagli(token):
    """
    Given a token that is an 'obl' or similar, inspect 'case' (preposition) children to
    return a more precise complement label (e.g. stato in luogo, di termine, ecc.)
    """
    # find child with dep_ == 'case' (a preposition)
    case_token = next((c for c in token.children if c.dep_ == 'case'), None)
    if not case_token:
        # fallback
        return MAPPA_DEP.get("obl", {"label": "Complemento", "description": "Complemento non specificato."})

    prepo = case_token.text.lower()
    # mapping by start of preposition
    mappa = {
        "di": ("Complemento di Specificazione", "Risponde alla domanda: di chi? / di che cosa?"),
        "a": ("Complemento di Termine", "Risponde alla domanda: a chi? / a che cosa?"),
        "da": ("Complemento di Moto da Luogo / Origine", "Risponde alla domanda: da dove?"),
        "in": ("Complemento di Stato in Luogo", "Risponde alla domanda: dove?"),
        "con": ("Complemento di Compagnia o Mezzo", "Risponde alla domanda: con chi? / con che cosa?"),
        "su": ("Complemento di Argomento o Luogo", "Risponde alla domanda: su chi? / su che cosa? / dove?"),
        "per": ("Complemento di Fine o Causa", "Risponde alla domanda: per quale fine? / per quale causa?"),
        "tra": ("Complemento Partitivo / Luogo", "Risponde alla domanda: tra chi? / tra cosa?"),
        "fra": ("Complemento Partitivo / Luogo", "Risponde alla domanda: fra chi? / fra cosa?"),
        "sopra": ("Complemento di Luogo", "Risponde alla domanda: dove?"),
        "sotto": ("Complemento di Luogo", "Risponde alla domanda: dove?"),
    }
    for base, (label, desc) in mappa.items():
        if prepo.startswith(base):
            # special-case: 'da' + passive aux => agente
            if base == "da" and any(c.dep_.endswith('agent') or c.dep_ == 'aux:pass' for c in token.head.children):
                return {"label": "Complemento d'Agente", "description": "Indica da chi è compiuta l'azione in una frase passiva."}
            return {"label": label, "description": desc}
    return MAPPA_DEP.get("obl", {"label": "Complemento", "description": "Complemento non specificato."})

def get_full_phrase_for_token(token):
    """
    Build a compact phrase for a head token by collecting determiners, amod, case, compounds, and simple modifiers.
    Returns (text, set(indices)).
    """
    # recursive collection but with small scope to avoid over-collecting
    collected = set()

    def collect(t):
        if t.i in collected:
            return
        collected.add(t.i)
        # Collect children that usually belong inside the noun phrase / token phrase
        for child in t.children:
            if child.dep_ in ('det', 'amod', 'case', 'compound', 'nummod', 'appos', 'fixed', 'flat', 'advmod'):
                collect(child)
    collect(token)

    # also include simple coordinated tokens (conj)
    for child in token.children:
        if child.dep_ == 'conj':
            collect(child)
            # include the coordinating conjunction token if present (cc)
            cc = next((c for c in child.children if c.dep_ == 'cc'), None)
            if cc:
                collected.add(cc.i)

    # sort by token index
    tokens = sorted(collected)
    text = " ".join(token.doc[i].text for i in tokens)
    return text, set(tokens)

def costruisci_sintagmi_con_dettagli(tokens_proposizione):
    """
    Build structured analysis for each "major" token in a clause.
    """
    risultato = []
    # tokens_proposizione assumed to be a list of spaCy tokens (no punctuation/space)
    DEPS_DA_SALTARE = {'det', 'amod', 'case', 'aux', 'aux:pass', 'cop', 'mark', 'cc', 'compound', 'appos', 'punct'}
    indici_elaborati = set()

    for token in tokens_proposizione:
        if token.i in indici_elaborati:
            continue
        # skip tokens that are primarily modifiers (we will include them as part of head tokens)
        if token.dep_ in DEPS_DA_SALTARE and token.head.i != token.i:
            continue

        testo_sintagma, indici_usati = get_full_phrase_for_token(token)

        dep = token.dep_
        if dep in ('obl', 'obl:agent', 'nmod'):
            info_etichetta = ottieni_tipo_complemento_con_dettagli(token)
        else:
            info_etichetta = MAPPA_DEP.get(dep, {"label": dep.capitalize(), "description": "Relazione non mappata."})

        token_details = {
            "lemma": getattr(token, "lemma_", token.text),
            "pos": f"{getattr(token, 'pos_', token.pos_)}: {spiega_in_italiano(getattr(token, 'pos_', token.pos_), 'pos')}",
            "tag": getattr(token, "tag_", ""),
            "morph": traduci_morfologia(str(getattr(token, "morph", "")))
        }

        risultato.append({
            "text": testo_sintagma,
            "label_info": info_etichetta,
            "token_details": token_details,
            "token_index": token.i
        })

        indici_elaborati.update(indici_usati)

    # include leftover important tokens like copula or coordinating conjunctions if not already included
    for token in tokens_proposizione:
        if token.i not in indici_elaborati and token.dep_ in ('cop', 'cc'):
            risultato.append({
                "text": token.text,
                "label_info": MAPPA_DEP.get(token.dep_, {"label": token.dep_, "description": ""}),
                "token_details": {
                    "lemma": getattr(token, "lemma_", token.text),
                    "pos": f"{getattr(token, 'pos_', token.pos_)}: {spiega_in_italiano(getattr(token, 'pos_', token.pos_), 'pos')}",
                    "tag": getattr(token, "tag_", ""),
                    "morph": traduci_morfologia(str(getattr(token, "morph", "")))
                },
                "token_index": token.i
            })
            indici_elaborati.add(token.i)

    risultato.sort(key=lambda x: x['token_index'])
    return risultato

def analizza_proposizione_con_dettagli(tokens):
    tokens_validi = [t for t in tokens if not t.is_punct and not t.is_space]
    return costruisci_sintagmi_con_dettagli(tokens_validi)

# ------------------------------
# Flask app
# ------------------------------
app = Flask(__name__)
CORS(app)

@app.route("/")
def home():
    status = "ok" if nlp else "model_missing"
    return jsonify({
        "messaggio": "API analisi logica in esecuzione",
        "modello_spacy": IT_MODEL or "Nessuno",
        "model_status": status,
        "model_error": MODEL_LOAD_ERROR
    })

@app.route('/api/analyze', methods=['POST'])
def analizza_frase():
    # Basic checks
    if not nlp:
        return jsonify({"errore": "Modello spaCy non caricato.", "dettagli": MODEL_LOAD_ERROR}), 503

    try:
        dati = request.get_json(silent=True) or {}
        frase = (dati.get('sentence') or "").strip()
        if not frase:
            return jsonify({"errore": "Frase non fornita o vuota."}), 400
        if len(frase) > MAX_SENTENCE_LENGTH:
            return jsonify({"errore": "Frase troppo lunga.", "max_length": MAX_SENTENCE_LENGTH}), 400

        doc = nlp(frase)

        proposizioni_subordinate = []
        indici_subordinate = set()

        # detect subordinate clauses via tokens that have dependency in SUBORD_DEPS
        for token in doc:
            if token.dep_ in SUBORD_DEPS and token.i not in indici_subordinate:
                subtree = list(token.subtree)
                indici_subtree = {t.i for t in subtree}
                indici_subordinate.update(indici_subtree)
                info_tipo = MAPPA_DEP.get(token.dep_, {"label": "Proposizione Subordinata", "description": "Frase che dipende da un'altra."})
                proposizioni_subordinate.append({
                    "type_info": info_tipo,
                    "text": " ".join(t.text for t in subtree if not t.is_punct).strip(),
                    "analysis": analizza_proposizione_con_dettagli(subtree)
                })

        # main clause tokens are tokens not part of subordinate clause subtrees
        token_principale = [t for t in doc if t.i not in indici_subordinate and not t.is_punct and not t.is_space]

        # named entities (unique)
        entita_nominate = []
        visti = set()
        for ent in doc.ents:
            if ent.text not in visti:
                visti.add(ent.text)
                entita_nominate.append({
                    "text": ent.text,
                    "label": ent.label_,
                    "explanation": spiega_in_italiano(ent.label_, 'ent')
                })

        analisi_finale = {
            "full_sentence": frase,
            "model": IT_MODEL,
            "main_clause": {
                "text": " ".join(t.text for t in token_principale).strip(),
                "analysis": analizza_proposizione_con_dettagli(token_principale)
            },
            "subordinate_clauses": proposizioni_subordinate,
            "named_entities": entita_nominate
        }

        return jsonify(analisi_finale)

    except Exception as e:
        # print to server log for debugging but return safe message
        traceback.print_exc()
        return jsonify({"errore": "Si è verificato un errore interno.", "dettagli": str(e)}), 500

if __name__ == '__main__':
    port = int(os.environ.get("PORT", 8080))
    # Note: debug=False for production; set to True only during development
    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)