Spaces:
Sleeping
Sleeping
| import re | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification | |
| import dateparser | |
| from datetime import datetime | |
| import spacy | |
| app = FastAPI() | |
| # Load classification and summarization models | |
| classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small") | |
| summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") | |
| # Load Indic NER (or any general one) | |
| tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER") | |
| model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER") | |
| ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
| # Labels for classification | |
| labels = [ | |
| "task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update", | |
| "sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other" | |
| ] | |
| class TextInput(BaseModel): | |
| text: str | |
| def extract_dates(text): | |
| time_expressions = re.findall( | |
| r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b', | |
| text, flags=re.IGNORECASE) | |
| parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)] | |
| return list(set(parsed)), list(set(time_expressions)) | |
| def detect_tense(parsed_dates): | |
| now = datetime.now() | |
| tenses = set() | |
| for d in parsed_dates: | |
| dt = dateparser.parse(d) | |
| if not dt: | |
| continue | |
| if dt < now: | |
| tenses.add("past") | |
| elif dt > now: | |
| tenses.add("future") | |
| else: | |
| tenses.add("present") | |
| return list(tenses) if tenses else ["unknown"] | |
| def generate_summary(text): | |
| input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids | |
| output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True) | |
| return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| def extract_people(text): | |
| ner_results = ner_pipeline(text) | |
| return list(set(ent['word'] for ent in ner_results if ent['entity_group'] == 'PER')) | |
| def estimate_mood(text): | |
| text_lower = text.lower() | |
| mood_map = { | |
| "happy": ["happy", "excited", "joy", "grateful"], | |
| "sad": ["sad", "upset", "crying", "lonely"], | |
| "angry": ["angry", "annoyed", "frustrated", "irritated"], | |
| "nervous": ["nervous", "anxious", "scared"], | |
| "unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"], | |
| "neutral": [] | |
| } | |
| for mood, keywords in mood_map.items(): | |
| for kw in keywords: | |
| if kw in text_lower: | |
| return mood | |
| return "neutral" | |
| def generate_tags(label, text): | |
| base_tags = [label] | |
| keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower()) | |
| force_tags = [] | |
| if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]): | |
| force_tags += ["sick", "leave"] | |
| if "work" in text.lower(): | |
| force_tags.append("work") | |
| return list(set(base_tags + force_tags + keywords)) | |
| async def analyze(input: TextInput): | |
| text = input.text | |
| classification = classifier(text, labels) | |
| best_label = classification['labels'][0] | |
| scores = dict(zip(classification['labels'], classification['scores'])) | |
| parsed_dates, time_mentions = extract_dates(text) | |
| tenses = detect_tense(parsed_dates) | |
| summary = generate_summary(text) | |
| people = extract_people(text) | |
| mood = estimate_mood(text) | |
| tags = generate_tags(best_label, text) | |
| return { | |
| "type": best_label, | |
| "confidence_scores": scores, | |
| "time_mentions": time_mentions, | |
| "parsed_dates": parsed_dates, | |
| "tense": tenses, | |
| "summary": summary, | |
| "people": people, | |
| "mood": mood, | |
| "tags": tags | |
| } | |