Spaces:

gmkdigitalmedia
/

ctapi

Running

Your Name Claude commited on Nov 6

Commit

0ccc0c7

1 Parent(s): d78f02a

Upgrade all 3 LLM prompts for intelligent RAG responses

MAJOR IMPROVEMENTS:

1. Entity Extraction (API Call #1):
- Comprehensive synonym generation using LLM intelligence
- Examples: "Pfizer COVID vaccine" → Comirnaty, BNT162b2, tozinameran
- Increased max_tokens: 256 → 500 for expansive synonyms
- Temperature: 0.1 → 0.3 for creative synonym brainstorming

2. Planning Agent (API Call #2):
- Defaults to SEARCH_TRIALS for data-driven answers
- Added FOCUS field to guide final answer (efficacy, safety, etc.)
- Clearer routing rules with "when in doubt, search trials"
- Better handling of edge cases

3. Final Summary (API Call #3) - COMPLETE REWRITE:
- NEVER says "no relevant trials found"
- Intelligently extracts insights from partial matches
- Context window: 6000 → 12,000 chars (2x increase)
- Max tokens: 1024 → 2,000 (comprehensive answers)
- Recognizes synonyms (Sinopharm = BBIBP-CorV)
- New structure: DIRECT ANSWER → KEY EVIDENCE → INSIGHTS → CONTEXT
- Works with imperfect matches (same drug, different disease = valuable!)

FIXES:
- "No relevant information" responses when trials exist
- Weak answers from company-filtered searches (Sinopharm example)
- Missed trials due to synonym mismatches

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

foundation_engine.py +567 -104

foundation_engine.py CHANGED Viewed

@@ -26,7 +26,7 @@ hf_token = os.getenv("HF_TOKEN")
 DATASET_FILE = Path(__file__).parent / "complete_dataset_WITH_RESULTS_FULL.txt"
 CHUNKS_FILE = Path(__file__).parent / "dataset_chunks_TRIAL_AWARE.pkl"
 EMBEDDINGS_FILE = Path(__file__).parent / "dataset_embeddings_TRIAL_AWARE_FIXED.npy"  # FIXED version to avoid cache
-INVERTED_INDEX_FILE = Path(__file__).parent / "inverted_index_TRIAL_AWARE.pkl"  # Pre-built inverted index (638MB)
 # HF Dataset containing the large files
 DATASET_REPO = "gmkdigitalmedia/foundation1.2-data"
@@ -38,6 +38,52 @@ doc_embeddings = None
 bm25_index = None  # BM25 index for fast keyword search
 inverted_index = None  # Inverted index for instant drug lookup
 # ============================================================================
 # RAG FUNCTIONS
 # ============================================================================
@@ -222,6 +268,7 @@ def load_embeddings():
     chunks_path = CHUNKS_FILE
     embeddings_path = EMBEDDINGS_FILE
     dataset_path = DATASET_FILE
     if not CHUNKS_FILE.exists():
         downloaded = download_from_dataset("dataset_chunks_TRIAL_AWARE.pkl")
@@ -235,6 +282,12 @@ def load_embeddings():
         downloaded = download_from_dataset("complete_dataset_WITH_RESULTS_FULL.txt")
         if downloaded:
             dataset_path = downloaded
     if chunks_path.exists() and embeddings_path.exists():
         try:
@@ -339,18 +392,19 @@ def load_embeddings():
             # Skip BM25 (too memory-heavy for Docker), use inverted index only
             global inverted_index
-            # Try to load pre-built inverted index (638MB) - MUCH faster than building (15 minutes)
-            if INVERTED_INDEX_FILE.exists():
-                logger.info(f"Loading pre-built inverted index from {INVERTED_INDEX_FILE.name}...")
                 try:
-                    with open(INVERTED_INDEX_FILE, 'rb') as f:
                         inverted_index = pickle.load(f)
-                    logger.info(f"✓ Loaded pre-built inverted index with {len(inverted_index):,} terms (instant vs 15min build)")
                 except Exception as e:
-                    logger.warning(f"Failed to load pre-built index: {e}, building from scratch...")
                     inverted_index = build_inverted_index(doc_chunks)
             else:
-                logger.info("Pre-built inverted index not found, building from scratch (this takes 15 minutes)...")
                 inverted_index = build_inverted_index(doc_chunks)
             logger.info("Will use inverted index + semantic search (no BM25)")
@@ -454,14 +508,19 @@ def filter_trial_for_clinical_summary(trial_text):
     return '\n'.join(filtered_lines)
-def retrieve_context_with_embeddings(query, top_k=10):
     """
-    ENTERPRISE HYBRID SEARCH: Always combines keyword + semantic scoring
-    - Extracts ALL meaningful terms from query (case-insensitive)
     - Scores each trial by keyword frequency (TF-IDF style)
     - Also gets semantic similarity scores
     - Merges both scores with weighted combination
-    - Works regardless of capitalization, language, or spelling
     """
     import time
     import re
@@ -567,6 +626,56 @@ def retrieve_context_with_embeddings(query, top_k=10):
     logger.info(f"[HYBRID] Inverted index scoring: {len(keyword_scores)} trials matched ({time.time()-t_kw:.2f}s)")
     # 2. SEMANTIC SCORING
     load_embedder()
     t_sem = time.time()
@@ -665,6 +774,7 @@ def retrieve_context_with_embeddings(query, top_k=10):
             logger.info(f"[355M RANKING] Added ranking metadata to context for final LLM")
     context = "\n\n---\n\n".join(context_chunks)  # Use --- as separator between trials
     logger.info(f"[HYBRID] TOTAL TIME: {time.time()-t0:.2f}s")
     logger.info(f"[HYBRID] Filtered context length: {len(context)} chars (was ~{sum(len(c) for c in raw_chunks)} chars)")
@@ -940,39 +1050,54 @@ def parse_query_with_llm(query, hf_token=None):
         logger.info("[QUERY PARSER] Analyzing user query with LLM...")
         client = InferenceClient(token=hf_token, timeout=30)
-        parse_prompt = f"""Extract key information from this clinical trial query.
 Query: "{query}"
-Extract and return in this EXACT format:
-DRUGS: [list drug/treatment names, or "none"]
-DISEASES: [list diseases/conditions, or "none"]
-COMPANIES: [list company/sponsor names, or "none"]
-ENDPOINTS: [list trial endpoints/outcomes, or "none"]
-SEARCH_TERMS: [optimized search keywords]
-Examples:
-Query: "What Novartis drugs treat melanoma?"
-DRUGS: none
-DISEASES: melanoma
-COMPANIES: Novartis
-ENDPOINTS: none
-SEARCH_TERMS: Novartis melanoma treatment drugs
-Query: "Tell me about Keytruda for lung cancer"
-DRUGS: Keytruda
-DISEASES: lung cancer
-COMPANIES: none
-ENDPOINTS: none
-SEARCH_TERMS: Keytruda lung cancer
-Now parse the query above:"""
         response = client.chat_completion(
             model="meta-llama/Llama-3.1-70B-Instruct",
             messages=[{"role": "user", "content": parse_prompt}],
-            max_tokens=256,
-            temperature=0.1  # Low temp for consistent parsing
         )
         parsed = response.choices[0].message.content.strip()
@@ -1024,40 +1149,224 @@ Now parse the query above:"""
             'raw_parsed': ''
         }
-def generate_llama_response(query, rag_context, hf_token=None):
     """
-    Generate response using FAST Groq API (10x faster than HF)
-    Speed comparison:
-    - HuggingFace: ~40 tokens/sec = 15 seconds
-    - Groq: ~300 tokens/sec = 2 seconds (FREE!)
     """
     try:
         # Try Groq first (much faster), fallback to HuggingFace
         groq_api_key = os.getenv("GROQ_API_KEY")
         if groq_api_key:
             logger.info("Generating response with Llama-3.1-70B via GROQ (fast)...")
             from groq import Groq
             client = Groq(api_key=groq_api_key)
-            # Simplified prompt for faster generation
-            system_prompt = """You are a medical research assistant. Answer based ONLY on the provided clinical trial data. Be concise and cite NCT IDs."""
-            user_prompt = f"""Clinical trials:
-{rag_context[:6000]}
-Question: {query}
-Provide a concise answer citing specific NCT trial IDs."""
             response = client.chat.completions.create(
-                model="llama-3.1-70b-versatile",  # Groq's optimized 70B
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt}
                 ],
-                max_tokens=512,  # Shorter for speed
                 temperature=0.3,
                 timeout=30
             )
@@ -1070,24 +1379,13 @@ Provide a concise answer citing specific NCT trial IDs."""
             from huggingface_hub import InferenceClient
             client = InferenceClient(token=hf_token, timeout=120)
-            system_prompt = """You are a medical research assistant. Answer based ONLY on the provided clinical trial data. Be concise and cite NCT IDs."""
-            user_prompt = f"""Clinical trials:
-{rag_context[:6000]}
-Question: {query}
-Provide a concise answer citing specific NCT trial IDs."""
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ]
             response = client.chat_completion(
                 model="meta-llama/Meta-Llama-3.1-70B-Instruct",
-                messages=messages,
-                max_tokens=512,  # Reduced from 2048 for speed
                 temperature=0.3
             )
@@ -1114,7 +1412,7 @@ def process_query_simple_test(conversation):
         # Try to search
         start = time.time()
-        context = retrieve_context_with_embeddings(conversation, top_k=3)
         search_time = time.time() - start
         if not context:
@@ -1134,15 +1432,16 @@ def process_query_simple_test(conversation):
 def process_query(conversation):
     """
-    Complete pipeline with LLM query parsing and natural language generation
     Flow:
     0. LLM Parser - Extract drugs, diseases, companies, endpoints (~2-3s)
-    1. RAG Search - Hybrid search using optimized query (~2s)
     2. Skipped - 355M ranking removed (was broken)
     3. LLM Response - Llama 70B generates natural language (~15s)
-    Total: ~20 seconds
     """
     import time
     import traceback
@@ -1176,26 +1475,158 @@ def process_query(conversation):
             logger.warning(error_msg)
             output_parts.append(f"{error_msg}\n")
             search_query = conversation  # Fallback to original
-        # Step 1: RAG search (using optimized search query)
         try:
-            step1_start = time.time()
-            logger.info("Step 1: RAG search...")
-            output_parts.append("✓ Step 1: RAG search started...\n")
-            context = retrieve_context_with_embeddings(search_query, top_k=3)
-            if not context:
-                return "No matching trials found in RAG search."
-            # No limit - use complete trials
-            step1_time = time.time() - step1_start
-            output_parts.append(f"✓ Step 1 Complete: Found {context.count('NCT')} trials ({step1_time:.1f}s)\n")
-            logger.info(f"RAG search successful - found trials in {step1_time:.1f}s")
         except Exception as e:
-            error_msg = f"✗ Step 1 FAILED (RAG search): {str(e)}\n{traceback.format_exc()}"
-            logger.error(error_msg)
-            return error_msg
         # Step 2: Skipped (355M ranking removed - was broken)
         output_parts.append("✓ Step 2: Skipped (using hybrid search + recency)\n")
@@ -1205,7 +1636,13 @@ def process_query(conversation):
             step3_start = time.time()
             logger.info("Step 3: Generating response with Llama-3.1-70B...")
             output_parts.append("✓ Step 3: Llama 70B generation started...\n")
-            llama_response = generate_llama_response(conversation, context, hf_token=hf_token)
             step3_time = time.time() - step3_start
             output_parts.append(f"✓ Step 3 Complete: Llama 70B response generated ({step3_time:.1f}s)\n")
             logger.info(f"Llama 70B generation successful in {step3_time:.1f}s")
@@ -1237,6 +1674,10 @@ RAG RETRIEVED TRIALS (Top 3 Most Relevant):
 ---
 Total Time: {total_time:.1f}s
 """
             return output
         except Exception as e:
             # Absolute fallback
@@ -1275,9 +1716,44 @@ System Info:
 ========================================
 """
         logger.error(master_error_msg)
         return master_error_msg
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
@@ -1304,21 +1780,8 @@ with gr.Blocks(title="Foundation 1.2") as demo:
     )
     gr.Markdown("""
-    **Production RAG Pipeline - Optimized for Clinical Accuracy**
-    **Search (3-Stage Hybrid):**
-    1. Keyword matching (70%) + Semantic search (30%) → 10 candidates
-    2. 355M Clinical Trial GPT re-ranks by relevance
-    3. Returns top 3 trials with best clinical relevance scores
-    **Generation (Qwen2.5-14B-Instruct):**
-    - 14B parameter model via HuggingFace Inference API
-    - Structured clinical summaries with clear headings
-    - Cites specific NCT trial IDs
-    - Includes actual trial results and efficacy data
-    - High-quality medical reasoning and analysis
-    *355M model used for ranking (not generation) + Qwen2.5-14B for responses*
     """)

 DATASET_FILE = Path(__file__).parent / "complete_dataset_WITH_RESULTS_FULL.txt"
 CHUNKS_FILE = Path(__file__).parent / "dataset_chunks_TRIAL_AWARE.pkl"
 EMBEDDINGS_FILE = Path(__file__).parent / "dataset_embeddings_TRIAL_AWARE_FIXED.npy"  # FIXED version to avoid cache
+INVERTED_INDEX_FILE = Path(__file__).parent / "inverted_index_COMPREHENSIVE.pkl"  # Pre-built inverted index (307MB)
 # HF Dataset containing the large files
 DATASET_REPO = "gmkdigitalmedia/foundation1.2-data"
 bm25_index = None  # BM25 index for fast keyword search
 inverted_index = None  # Inverted index for instant drug lookup
+# ============================================================================
+# ANALYTICS TRACKING
+# ============================================================================
+from collections import defaultdict, Counter
+import time as time_module
+class QueryAnalytics:
+    """Track query patterns and performance for monitoring"""
+    def __init__(self):
+        self.query_types = Counter()
+        self.response_times = defaultdict(list)
+        self.error_count = 0
+        self.total_queries = 0
+        self.start_time = time_module.time()
+    def record_query(self, query_type: str, response_time: float, success: bool = True):
+        """Record a query execution"""
+        self.total_queries += 1
+        self.query_types[query_type] += 1
+        self.response_times[query_type].append(response_time)
+        if not success:
+            self.error_count += 1
+        logger.info(f"[ANALYTICS] Recorded: {query_type}, {response_time:.2f}s, success={success}")
+    def get_stats(self):
+        """Get analytics summary"""
+        uptime = time_module.time() - self.start_time
+        stats = {
+            'total_queries': self.total_queries,
+            'uptime_seconds': uptime,
+            'error_rate': self.error_count / self.total_queries if self.total_queries > 0 else 0,
+            'query_type_distribution': dict(self.query_types),
+            'avg_response_times': {}
+        }
+        for query_type, times in self.response_times.items():
+            if times:
+                stats['avg_response_times'][query_type] = sum(times) / len(times)
+        return stats
+# Initialize global analytics
+query_analytics = QueryAnalytics()
 # ============================================================================
 # RAG FUNCTIONS
 # ============================================================================
     chunks_path = CHUNKS_FILE
     embeddings_path = EMBEDDINGS_FILE
     dataset_path = DATASET_FILE
+    index_path = INVERTED_INDEX_FILE
     if not CHUNKS_FILE.exists():
         downloaded = download_from_dataset("dataset_chunks_TRIAL_AWARE.pkl")
         downloaded = download_from_dataset("complete_dataset_WITH_RESULTS_FULL.txt")
         if downloaded:
             dataset_path = downloaded
+    # Download inverted index from dataset (307 MB, truly comprehensive)
+    if not INVERTED_INDEX_FILE.exists():
+        downloaded = download_from_dataset("inverted_index_COMPREHENSIVE.pkl")
+        if downloaded:
+            index_path = downloaded
+            logger.info(f"✓ Downloaded comprehensive inverted index from dataset")
     if chunks_path.exists() and embeddings_path.exists():
         try:
             # Skip BM25 (too memory-heavy for Docker), use inverted index only
             global inverted_index
+            # Try to load pre-built comprehensive inverted index (77MB) from dataset
+            if index_path.exists():
+                logger.info(f"Loading comprehensive inverted index from {index_path.name}...")
                 try:
+                    with open(index_path, 'rb') as f:
                         inverted_index = pickle.load(f)
+                    logger.info(f"✓ Loaded comprehensive index with {len(inverted_index):,} terms")
+                    logger.info(f"   Includes: TITLE (all words), INTERVENTION, CONDITIONS, SPONSOR, SUMMARY/DESCRIPTION (companies)")
                 except Exception as e:
+                    logger.warning(f"Failed to load comprehensive index: {e}, building basic index...")
                     inverted_index = build_inverted_index(doc_chunks)
             else:
+                logger.info("Comprehensive inverted index not found, building basic index (15 minutes)...")
                 inverted_index = build_inverted_index(doc_chunks)
             logger.info("Will use inverted index + semantic search (no BM25)")
     return '\n'.join(filtered_lines)
+def retrieve_context_with_embeddings(query, top_k=10, entities=None):
     """
+    ENTERPRISE HYBRID SEARCH with STRICT ENTITY FILTERING
+    - Enforces HARD FILTERS for companies (sponsor/collaborator)
+    - Extracts meaningful terms from query (case-insensitive)
     - Scores each trial by keyword frequency (TF-IDF style)
     - Also gets semantic similarity scores
     - Merges both scores with weighted combination
+    Args:
+        query: Search query string
+        top_k: Number of results to return
+        entities: Dict with 'drugs', 'diseases', 'companies' - if provided, STRICTLY filters
     """
     import time
     import re
     logger.info(f"[HYBRID] Inverted index scoring: {len(keyword_scores)} trials matched ({time.time()-t_kw:.2f}s)")
+    # 1.5. STRICT COMPANY FILTERING (if companies specified)
+    company_filter_failed = False
+    if entities and entities.get('companies'):
+        companies = [c.lower() for c in entities['companies']]
+        logger.info(f"[STRICT FILTER] Enforcing company filter: {companies}")
+        # Save original scores in case we need to fall back
+        original_keyword_scores = keyword_scores.copy()
+        # Filter keyword_scores to ONLY trials with these companies
+        filtered_keyword_scores = {}
+        sponsor_field_patterns = ['sponsor:', 'collaborator:', 'manufacturer:']
+        for idx, score in keyword_scores.items():
+            chunk_data = doc_chunks[idx]
+            chunk_text = chunk_data[1] if isinstance(chunk_data, tuple) else chunk_data
+            chunk_lower = chunk_text.lower()
+            # Check if ANY company appears in sponsor/collaborator/manufacturer fields
+            has_company = False
+            for company in companies:
+                # Look for company name in sponsor-related fields
+                for field in sponsor_field_patterns:
+                    if field in chunk_lower:
+                        field_start = chunk_lower.find(field)
+                        field_text = chunk_lower[field_start:field_start+500]  # Next 500 chars
+                        if company in field_text:
+                            has_company = True
+                            logger.info(f"[COMPANY MATCH] Trial {idx} has '{company}' in {field}")
+                            break
+                if has_company:
+                    break
+            if has_company:
+                filtered_keyword_scores[idx] = score * 10.0  # 10x boost for company match
+            # If no company match, EXCLUDE this trial
+        before_count = len(keyword_scores)
+        after_count = len(filtered_keyword_scores)
+        logger.info(f"[STRICT FILTER] Filtered {before_count} → {after_count} trials (only those from {companies})")
+        # If no company matches, fall back to original search but flag it
+        if len(filtered_keyword_scores) == 0:
+            logger.warning(f"[STRICT FILTER] No trials found from companies {companies}, falling back to general search")
+            company_filter_failed = True
+            keyword_scores = original_keyword_scores  # Restore original
+        else:
+            keyword_scores = filtered_keyword_scores
     # 2. SEMANTIC SCORING
     load_embedder()
     t_sem = time.time()
             logger.info(f"[355M RANKING] Added ranking metadata to context for final LLM")
     context = "\n\n---\n\n".join(context_chunks)  # Use --- as separator between trials
     logger.info(f"[HYBRID] TOTAL TIME: {time.time()-t0:.2f}s")
     logger.info(f"[HYBRID] Filtered context length: {len(context)} chars (was ~{sum(len(c) for c in raw_chunks)} chars)")
         logger.info("[QUERY PARSER] Analyzing user query with LLM...")
         client = InferenceClient(token=hf_token, timeout=30)
+        parse_prompt = f"""You are an expert in clinical trial terminology. Extract and expand entities from this query.
 Query: "{query}"
+Your task is to think creatively about ALL possible ways these entities might appear in clinical trial databases.
+For each entity type, brainstorm extensively:
+DRUGS:
+- Start with drugs explicitly mentioned
+- Add ALL possible names: brand names, generic names, research codes (like BNT162b2),
+  manufacturer+drug combos (Pfizer-BioNTech), chemical names, common abbreviations
+- Think: "What would a pharmaceutical company call this in a trial?"
+- Example: "Pfizer COVID vaccine" → ["Comirnaty", "BNT162b2", "tozinameran", "Pfizer-BioNTech COVID-19 vaccine", "mRNA-1273"]
+DISEASES:
+- Include the disease/condition mentioned
+- Add medical synonyms, ICD-10 terms, related conditions
+- Both technical and colloquial terms
+- Example: "COVID" → ["COVID-19", "SARS-CoV-2", "coronavirus disease 2019", "severe acute respiratory syndrome coronavirus 2"]
+COMPANIES:
+- Company mentioned plus parent companies, subsidiaries
+- Include previous names, merged entities, partnership names
+- Example: "Pfizer" → ["Pfizer", "Pfizer Inc.", "Pfizer-BioNTech", "BioNTech SE"]
+ENDPOINTS:
+- Any specific outcomes, measures, or endpoints mentioned
+- Include related clinical measures
+SEARCH_TERMS:
+- Comprehensive keywords combining above entities
+- Include partial matches that might be relevant
+Format EXACTLY as:
+DRUGS: [list or "none"]
+DISEASES: [list or "none"]
+COMPANIES: [list or "none"]
+ENDPOINTS: [list or "none"]
+SEARCH_TERMS: [comprehensive keyword list]
+Be expansive - more synonyms mean better trial matching."""
         response = client.chat_completion(
             model="meta-llama/Llama-3.1-70B-Instruct",
             messages=[{"role": "user", "content": parse_prompt}],
+            max_tokens=500,  # Increased for comprehensive synonyms
+            temperature=0.3  # Slightly higher for creative synonym generation
         )
         parsed = response.choices[0].message.content.strip()
             'raw_parsed': ''
         }
+def plan_query_action(query, parsed_entities, hf_token=None):
     """
+    Use HuggingFace Llama-70B to decide the best action for this query.
+    Actions:
+    - SEARCH_TRIALS: Specific drug/disease questions (use RAG with top 30 trials)
+    - COUNT_AGGREGATE: "How many" or "list all" questions (use index counts)
+    - COMPARE: Compare two or more treatments
+    - GENERAL_KNOWLEDGE: Definitions or general info (skip RAG, use LLM knowledge)
+    Returns: Dict with action, reasoning, and parameters
     """
+    try:
+        from huggingface_hub import InferenceClient
+        logger.info("[PLANNING AGENT] Deciding action with HuggingFace Llama-70B...")
+        client = InferenceClient(token=hf_token, timeout=30)
+        planning_prompt = f"""You are a clinical trial search strategist. Route this query to the best action.
+Query: "{query}"
+Extracted entities:
+- Drugs: {parsed_entities.get('drugs', [])}
+- Diseases: {parsed_entities.get('diseases', [])}
+- Companies: {parsed_entities.get('companies', [])}
+- Endpoints: {parsed_entities.get('endpoints', [])}
+ROUTING RULES:
+1. SEARCH_TRIALS (default): Any question about specific drugs, treatments, efficacy, safety, trial results, side effects, or when entities are extracted
+2. COUNT_AGGREGATE: Only when explicitly asking "how many", "list all", "total number"
+3. COMPARE: Only when explicitly comparing with "vs", "versus", "compare", "better than", "difference between"
+4. GENERAL_KNOWLEDGE: Only for pure definitions with no trial data needed
+When in doubt, choose SEARCH_TRIALS - real trial data is almost always helpful.
+Analyze the user's intent:
+- Are they asking about specific trial outcomes? → SEARCH_TRIALS
+- Do they want data about a drug/disease? → SEARCH_TRIALS
+- Are they asking for counts or lists? → COUNT_AGGREGATE
+- Are they comparing treatments? → COMPARE
+- Is this purely definitional? → GENERAL_KNOWLEDGE
+Respond with:
+ACTION: [choose one action]
+REASONING: [one clear sentence explaining why]
+SEARCH_TERMS: [refined search terms to find the most relevant trials]
+FOCUS: [what aspect to emphasize in the final answer - efficacy, safety, trial status, etc.]"""
+        response = client.chat_completion(
+            model="meta-llama/Llama-3.1-70B-Instruct",
+            messages=[{"role": "user", "content": planning_prompt}],
+            max_tokens=150,
+            temperature=0.1  # Low temp for consistent routing
+        )
+        result_text = response.choices[0].message.content.strip()
+        logger.info(f"[PLANNING AGENT] Decision:\n{result_text}")
+        # Parse the response
+        result = {
+            'action': 'SEARCH_TRIALS',  # Default fallback
+            'reasoning': 'Could not parse response',
+            'params': query,
+            'focus': 'comprehensive trial data',  # New field
+            'raw': result_text
+        }
+        lines = result_text.split('\n')
+        for line in lines:
+            line = line.strip()
+            if line.startswith('ACTION:'):
+                action = line.replace('ACTION:', '').strip()
+                if action in ['SEARCH_TRIALS', 'COUNT_AGGREGATE', 'COMPARE', 'GENERAL_KNOWLEDGE']:
+                    result['action'] = action
+            elif line.startswith('REASONING:'):
+                result['reasoning'] = line.replace('REASONING:', '').strip()
+            elif line.startswith('SEARCH_TERMS:'):
+                params = line.replace('SEARCH_TERMS:', '').strip()
+                if params.lower() != 'none':
+                    result['params'] = params
+            elif line.startswith('FOCUS:'):
+                result['focus'] = line.replace('FOCUS:', '').strip()
+        logger.info(f"[PLANNING AGENT] ✓ Action: {result['action']}, Focus: {result['focus']}, Reasoning: {result['reasoning']}")
+        return result
+    except Exception as e:
+        logger.warning(f"[PLANNING AGENT] Failed: {e}, defaulting to SEARCH_TRIALS")
+        return {
+            'action': 'SEARCH_TRIALS',
+            'reasoning': f'Planning failed: {e}',
+            'params': query,
+            'focus': 'available trial data'
+        }
+def generate_llama_response(query, rag_context, hf_token=None, parsed_entities=None, planning_context=None):
+    """
+    Intelligent synthesis that ALWAYS provides substantive answers from available data
+    Args:
+        query: User's question
+        rag_context: Retrieved trial data
+        hf_token: HuggingFace API token
+        parsed_entities: Dict with extracted entities (drugs, diseases, companies)
+        planning_context: Dict with planning agent output (action, focus, reasoning)
+    """
+    # Build entity context string for better guidance
+    entity_context = ""
+    if parsed_entities:
+        drugs_list = parsed_entities.get('drugs', [])[:10]
+        diseases_list = parsed_entities.get('diseases', [])[:10]
+        companies_list = parsed_entities.get('companies', [])[:10]
+        if drugs_list or diseases_list or companies_list:
+            entity_context = f"""
+Key entities to look for (including synonyms):
+- Drugs/Treatments: {', '.join(drugs_list) if drugs_list else 'none'}
+- Diseases: {', '.join(diseases_list) if diseases_list else 'none'}
+- Companies: {', '.join(companies_list) if companies_list else 'none'}"""
+    # Focus area from planning
+    focus_area = planning_context.get('focus', 'comprehensive analysis') if planning_context else 'comprehensive analysis'
     try:
         # Try Groq first (much faster), fallback to HuggingFace
         groq_api_key = os.getenv("GROQ_API_KEY")
+        system_prompt = """You are a leading clinical trials analyst. Your role is to provide the most helpful, informative answer possible using available trial data. You excel at finding connections and insights even from imperfect data matches.
+CORE PRINCIPLES:
+1. ALWAYS provide substantive, useful answers
+2. Find relevant information even in partially-matching trials
+3. Extract specific numbers, dates, phases, outcomes wherever available
+4. Connect information across trials to build comprehensive insights
+5. Never say "no relevant trials found" - work with what you have"""
+        user_prompt = f"""Question: {query}
+Focus for this analysis: {focus_area}
+{entity_context}
+Available Clinical Trial Data:
+{rag_context[:12000]}
+YOUR MISSION:
+Provide the most comprehensive, helpful answer possible by intelligently analyzing ALL available trials.
+ANALYSIS APPROACH:
+1. SCAN all trials for ANY relevance to the query:
+   - Direct matches (same drug + disease) → Primary focus
+   - Same drug, different disease → Still valuable (shows drug profile)
+   - Same disease, different drug → Provides treatment landscape context
+   - Same company → Shows research pipeline
+   - Similar mechanisms/drug classes → Offers comparative insights
+2. EXTRACT concrete information:
+   - Trial phases, enrollment numbers, completion dates
+   - Efficacy percentages, response rates, survival data
+   - Safety profiles, adverse events, tolerability
+   - Dosing regimens, administration routes
+   - Patient populations, inclusion/exclusion criteria
+3. SYNTHESIZE intelligently:
+   - If asking about Drug X for Disease Y but only find Drug X for Disease Z,
+     discuss what this reveals about Drug X's mechanism and potential
+   - Find patterns across trials (e.g., consistent safety profile)
+   - Note trial progression (Phase 1 → 2 → 3) showing development status
+## YOUR RESPONSE STRUCTURE:
+### DIRECT ANSWER
+[Immediately address the query with the best available information. Be confident and helpful.
+If asking about "Sinopharm COVID vaccine" and trials mention "BBIBP-CorV" - recognize these as the same.
+Lead with what you KNOW, not what you don't know.]
+### KEY CLINICAL TRIALS EVIDENCE
+[For each relevant trial, extract meaningful information:]
+- **NCT#####**: [Specific findings relevant to query - be detailed with numbers/outcomes]
+- **NCT#####**: [What this tells us - phases, enrollment, results if available]
+[Include even partially relevant trials with appropriate context]
+### CLINICAL INSIGHTS
+[Synthesize patterns and meaningful conclusions:]
+- What do these trials collectively reveal?
+- Treatment landscape and development status
+- Efficacy signals or safety patterns
+- How different trials complement each other
+- Comparison with similar drugs/approaches if relevant
+### ADDITIONAL CONTEXT
+[Brief, if needed - but keep positive and informative:]
+- If data is from different indications, explain transferable insights
+- If only early phase data, discuss what this means for development
+- Focus on what the data DOES tell us
+REMEMBER:
+- Users want actionable information, not disclaimers
+- Even Phase 1 safety data is valuable information
+- Cross-indication data provides mechanism insights
+- Company trial portfolios reveal strategic priorities
+- Similar drug classes offer comparative context
+- ALWAYS find something valuable to report"""
         if groq_api_key:
             logger.info("Generating response with Llama-3.1-70B via GROQ (fast)...")
             from groq import Groq
             client = Groq(api_key=groq_api_key)
             response = client.chat.completions.create(
+                model="llama-3.1-70b-versatile",
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt}
                 ],
+                max_tokens=2000,  # Increased for comprehensive answers
                 temperature=0.3,
                 timeout=30
             )
             from huggingface_hub import InferenceClient
             client = InferenceClient(token=hf_token, timeout=120)
             response = client.chat_completion(
                 model="meta-llama/Meta-Llama-3.1-70B-Instruct",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                max_tokens=2000,  # Increased for comprehensive answers
                 temperature=0.3
             )
         # Try to search
         start = time.time()
+        context = retrieve_context_with_embeddings(conversation, top_k=30)
         search_time = time.time() - start
         if not context:
 def process_query(conversation):
     """
+    Complete pipeline with LLM query parsing, planning agent, and natural language generation
     Flow:
     0. LLM Parser - Extract drugs, diseases, companies, endpoints (~2-3s)
+    0.5. Planning Agent - Decide action: SEARCH_TRIALS / COUNT_AGGREGATE / GENERAL_KNOWLEDGE (~1s)
+    1. Execute Action - Based on plan: RAG search, index count, or skip to LLM (~2s)
     2. Skipped - 355M ranking removed (was broken)
     3. LLM Response - Llama 70B generates natural language (~15s)
+    Total: ~21 seconds
     """
     import time
     import traceback
             logger.warning(error_msg)
             output_parts.append(f"{error_msg}\n")
             search_query = conversation  # Fallback to original
+            parsed_query = {'drugs': [], 'diseases': [], 'companies': []}
+        # Step 0.5: Planning agent decides action
         try:
+            planning_start = time.time()
+            logger.info("Step 0.5: Planning agent deciding action...")
+            output_parts.append("✓ Step 0.5: Planning agent started...\n")
+            plan = plan_query_action(conversation, parsed_query, hf_token=hf_token)
+            planning_time = time.time() - planning_start
+            output_parts.append(f"✓ Step 0.5 Complete: Action decided ({planning_time:.1f}s)\n")
+            output_parts.append(f"  Action: {plan['action']}\n")
+            output_parts.append(f"  Reasoning: {plan['reasoning']}\n")
+            logger.info(f"Planning complete: {plan['action']} - {plan['reasoning']}")
         except Exception as e:
+            error_msg = f"✗ Step 0.5 WARNING (Planning): {str(e)}, defaulting to SEARCH_TRIALS"
+            logger.warning(error_msg)
+            output_parts.append(f"{error_msg}\n")
+            plan = {'action': 'SEARCH_TRIALS', 'reasoning': 'Planning failed', 'params': search_query}
+        # Step 1: Execute action based on plan
+        if plan['action'] == 'GENERAL_KNOWLEDGE':
+            # Skip RAG entirely, go straight to LLM
+            try:
+                step1_start = time.time()
+                logger.info("Step 1: GENERAL_KNOWLEDGE - Skipping RAG...")
+                output_parts.append("✓ Step 1: Skipped RAG (general knowledge query)\n")
+                context = ""  # Empty context
+                step1_time = time.time() - step1_start
+                output_parts.append(f"✓ Step 1 Complete: Using LLM knowledge only ({step1_time:.1f}s)\n")
+            except Exception as e:
+                error_msg = f"✗ Step 1 FAILED: {str(e)}"
+                logger.error(error_msg)
+                return error_msg
+        elif plan['action'] == 'COUNT_AGGREGATE':
+            # Use index to count, pass summary to LLM
+            try:
+                step1_start = time.time()
+                logger.info("Step 1: COUNT_AGGREGATE - Using inverted index...")
+                output_parts.append("✓ Step 1: Count/aggregation started...\n")
+                # Get search terms from plan
+                search_terms = plan['params'].lower().split()
+                # Find matching trials from inverted index
+                global inverted_index
+                matching_trial_ids = set()
+                if inverted_index:
+                    for term in search_terms:
+                        if term in inverted_index:
+                            matching_trial_ids.update(inverted_index[term])
+                            logger.info(f"  Found {len(inverted_index[term])} trials for '{term}'")
+                # Create summary context
+                if matching_trial_ids:
+                    context = f"Found {len(matching_trial_ids)} trials matching the query.\n\n"
+                    context += f"Note: This is an aggregate count. For detailed information about specific trials, "
+                    context += f"please ask a more specific question about individual drugs or treatments."
+                else:
+                    context = "No trials found matching the query."
+                step1_time = time.time() - step1_start
+                output_parts.append(f"✓ Step 1 Complete: Found {len(matching_trial_ids)} matching trials ({step1_time:.1f}s)\n")
+                logger.info(f"Count aggregation complete - {len(matching_trial_ids)} trials in {step1_time:.1f}s")
+            except Exception as e:
+                error_msg = f"✗ Step 1 FAILED (Count): {str(e)}\n{traceback.format_exc()}"
+                logger.error(error_msg)
+                return error_msg
+        elif plan['action'] == 'COMPARE':
+            # Compare treatments - retrieve trials for each and let LLM analyze
+            try:
+                step1_start = time.time()
+                logger.info("Step 1: COMPARE - Retrieving trials for comparison...")
+                output_parts.append("✓ Step 1: Comparison search started...\n")
+                # Extract treatments to compare from parsed drugs
+                treatments = parsed_query.get('drugs', [])
+                if len(treatments) < 2:
+                    # Try to extract from query text if not in parsed drugs
+                    import re
+                    compare_patterns = [
+                        r'(\w+)\s+(?:vs|versus|vs\.)\s+(\w+)',
+                        r'compare\s+(\w+)\s+(?:and|with|to)\s+(\w+)'
+                    ]
+                    for pattern in compare_patterns:
+                        match = re.search(pattern, conversation.lower())
+                        if match:
+                            treatments = [match.group(1), match.group(2)]
+                            break
+                if len(treatments) < 2:
+                    context = "Could not identify two treatments to compare. Please specify which treatments you'd like to compare."
+                else:
+                    logger.info(f"[COMPARE] Comparing: {treatments[0]} vs {treatments[1]}")
+                    # Search for trials for each treatment
+                    context_parts = []
+                    for i, treatment in enumerate(treatments[:2], 1):  # Compare first 2
+                        logger.info(f"[COMPARE] Searching trials for {treatment}...")
+                        treatment_trials = retrieve_context_with_embeddings(treatment, top_k=30, entities=parsed_query)
+                        if treatment_trials:
+                            context_parts.append(f"=== TRIALS FOR {treatment.upper()} ===\n{treatment_trials}\n")
+                        else:
+                            context_parts.append(f"=== TRIALS FOR {treatment.upper()} ===\nNo trials found.\n")
+                    # Combine all trials for LLM comparison
+                    context = "\n".join(context_parts)
+                    context += f"\n\nPLEASE COMPARE: {treatments[0]} vs {treatments[1]}\n"
+                    context += "Analyze the trials above and provide a side-by-side comparison including:\n"
+                    context += "- Number of trials for each\n"
+                    context += "- Key indications/diseases studied\n"
+                    context += "- Trial phases\n"
+                    context += "- Notable efficacy or safety findings\n"
+                    context += "- Head-to-head comparison trials (if any)"
+                step1_time = time.time() - step1_start
+                output_parts.append(f"✓ Step 1 Complete: Retrieved comparison data ({step1_time:.1f}s)\n")
+                logger.info(f"Comparison search complete in {step1_time:.1f}s")
+            except Exception as e:
+                error_msg = f"✗ Step 1 FAILED (Compare): {str(e)}\n{traceback.format_exc()}"
+                logger.error(error_msg)
+                return error_msg
+        else:  # SEARCH_TRIALS - normal RAG search (using optimized search query)
+            try:
+                step1_start = time.time()
+                logger.info("Step 1: RAG search...")
+                output_parts.append("✓ Step 1: RAG search started...\n")
+                # Pass entities for STRICT company filtering
+                context = retrieve_context_with_embeddings(search_query, top_k=30, entities=parsed_query)
+                if not context:
+                    return "No matching trials found in RAG search."
+                # No limit - use complete trials
+                step1_time = time.time() - step1_start
+                output_parts.append(f"✓ Step 1 Complete: Found {context.count('NCT')} trials ({step1_time:.1f}s)\n")
+                logger.info(f"RAG search successful - found trials in {step1_time:.1f}s")
+            except Exception as e:
+                error_msg = f"✗ Step 1 FAILED (RAG search): {str(e)}\n{traceback.format_exc()}"
+                logger.error(error_msg)
+                return error_msg
         # Step 2: Skipped (355M ranking removed - was broken)
         output_parts.append("✓ Step 2: Skipped (using hybrid search + recency)\n")
             step3_start = time.time()
             logger.info("Step 3: Generating response with Llama-3.1-70B...")
             output_parts.append("✓ Step 3: Llama 70B generation started...\n")
+            llama_response = generate_llama_response(
+                conversation,
+                context,
+                hf_token=hf_token,
+                parsed_entities=parsed_query,
+                planning_context=plan
+            )
             step3_time = time.time() - step3_start
             output_parts.append(f"✓ Step 3 Complete: Llama 70B response generated ({step3_time:.1f}s)\n")
             logger.info(f"Llama 70B generation successful in {step3_time:.1f}s")
 ---
 Total Time: {total_time:.1f}s
 """
+            # Record analytics
+            query_type = plan.get('action', 'UNKNOWN') if 'plan' in locals() else 'UNKNOWN'
+            query_analytics.record_query(query_type, total_time, success=True)
             return output
         except Exception as e:
             # Absolute fallback
 ========================================
 """
         logger.error(master_error_msg)
+        # Record analytics for error
+        elapsed_time = time.time() - start_time if 'start_time' in locals() else 0
+        query_analytics.record_query('ERROR', elapsed_time, success=False)
         return master_error_msg
+def get_analytics_report():
+    """
+    Get analytics report for monitoring
+    Returns formatted string with query statistics
+    """
+    stats = query_analytics.get_stats()
+    uptime_hours = stats['uptime_seconds'] / 3600
+    report = f"""
+=== ANALYTICS REPORT ===
+Uptime: {uptime_hours:.1f} hours
+Total Queries: {stats['total_queries']}
+Error Rate: {stats['error_rate']*100:.1f}%
+Query Type Distribution:
+"""
+    for query_type, count in stats['query_type_distribution'].items():
+        percentage = (count / stats['total_queries'] * 100) if stats['total_queries'] > 0 else 0
+        avg_time = stats['avg_response_times'].get(query_type, 0)
+        report += f"  {query_type}: {count} queries ({percentage:.1f}%) - avg {avg_time:.2f}s\n"
+    report += "\n=== END REPORT ===\n"
+    return report
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
     )
     gr.Markdown("""
+    **Production Pipeline - Optimized for Clinical Accuracy**
     """)