Spaces:

biyootiful
/

cv-chatbot

Running

App Files Files Community

Bi Yoo commited on Nov 7

Commit

aae1ac3

1 Parent(s): 588ba9b

adding context window

Browse files

Files changed (3) hide show

app.py +35 -13
config.py +1 -1
cv_data.json +56 -89

app.py CHANGED Viewed

@@ -120,8 +120,13 @@ app.add_middleware(
 )
 # Pydantic models
 class ChatRequest(BaseModel):
     message: str
 class ChatResponse(BaseModel):
     response: str
@@ -278,6 +283,11 @@ def load_cv_data(file_path: str = "cv_data.json") -> str:
     if "summary" in data:
         text_parts.append(f"Professional Summary: {data['summary']}")
     # Skills
     if "skills" in data:
         for category, items in data["skills"].items():
@@ -477,7 +487,7 @@ def retrieve_relevant_chunks(query: str, top_k: int = TOP_K_RESULTS) -> List[str
     return relevant_chunks
-def generate_response_beam(system_prompt: str, user_prompt: str) -> str:
     """Generate response using Beam vLLM service (Qwen3 4B Instruct on GPU)."""
     import requests
@@ -486,13 +496,16 @@ def generate_response_beam(system_prompt: str, user_prompt: str) -> str:
     api_url = f"{BEAM_API_URL.rstrip('/')}/v1/chat/completions"
     payload = {
         "model": "Qwen/Qwen3-4B-Instruct-2507",
-        "messages": [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ],
-        "max_tokens": 100,
         "temperature": 0.3,
         "top_p": 0.7,
     }
@@ -560,7 +573,7 @@ def generate_response_huggingface(prompt: str) -> str:
         raise HTTPException(status_code=500, detail=f"HuggingFace API error: {str(e)}")
-def generate_response_local(system_prompt: str, user_prompt: str) -> str:
     """Generate response using a locally hosted quantized model."""
     global llm_client
@@ -572,11 +585,15 @@ def generate_response_local(system_prompt: str, user_prompt: str) -> str:
             if os.getenv("DEBUG_LOCAL_PROMPT", "0") == "1":
                 preview = user_prompt if len(user_prompt) < 400 else user_prompt[:400] + "..."
                 print("Local prompt =>", preview)
             completion = llm_client.create_chat_completion(
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt},
-                ],
                 max_tokens=LOCAL_MODEL_MAX_OUTPUT_TOKENS,
                 temperature=0.3,
                 top_p=0.7,
@@ -605,6 +622,7 @@ def generate_response(
     question: str,
     original_question: str | None = None,
     assistant_query: bool = False,
 ) -> str:
     """Generate response using configured LLM provider"""
     system_prompt = SYSTEM_PROMPT.strip()
@@ -619,11 +637,11 @@ Answer:"""
     combined_prompt = f"{system_prompt}\n\n{user_prompt}"
     if LLM_PROVIDER == "beam":
-        return generate_response_beam(system_prompt, user_prompt)
     elif LLM_PROVIDER == "huggingface":
         return generate_response_huggingface(combined_prompt)
     elif LLM_PROVIDER == "local":
-        return generate_response_local(system_prompt, user_prompt)
     else:
         raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}")
@@ -667,11 +685,15 @@ async def chat(request: ChatRequest, _: None = Depends(verify_client_access)):
         # Build context from chunks
         context = "\n\n".join(relevant_chunks)
         # Generate response
         response = generate_response(
             context,
             request.message,
             original_question=request.message,
         )
         return ChatResponse(

 )
 # Pydantic models
+class ChatMessage(BaseModel):
+    role: str  # "user" or "assistant"
+    content: str
 class ChatRequest(BaseModel):
     message: str
+    history: List[ChatMessage] = []  # Optional chat history (sliding window)
 class ChatResponse(BaseModel):
     response: str
     if "summary" in data:
         text_parts.append(f"Professional Summary: {data['summary']}")
+    # Capabilities (pre-defined chunks for direct embedding)
+    if "capabilities" in data:
+        for cap in data["capabilities"]:
+            text_parts.append(f"Capability: {cap['text']}")
     # Skills
     if "skills" in data:
         for category, items in data["skills"].items():
     return relevant_chunks
+def generate_response_beam(system_prompt: str, user_prompt: str, history: List[Dict] = None) -> str:
     """Generate response using Beam vLLM service (Qwen3 4B Instruct on GPU)."""
     import requests
     api_url = f"{BEAM_API_URL.rstrip('/')}/v1/chat/completions"
+    # Build messages array with history
+    messages = [{"role": "system", "content": system_prompt}]
+    if history:
+        messages.extend(history)
+    messages.append({"role": "user", "content": user_prompt})
     payload = {
         "model": "Qwen/Qwen3-4B-Instruct-2507",
+        "messages": messages,
+        "max_tokens": 200,  # Increased for complete responses on GPU
         "temperature": 0.3,
         "top_p": 0.7,
     }
         raise HTTPException(status_code=500, detail=f"HuggingFace API error: {str(e)}")
+def generate_response_local(system_prompt: str, user_prompt: str, history: List[Dict] = None) -> str:
     """Generate response using a locally hosted quantized model."""
     global llm_client
             if os.getenv("DEBUG_LOCAL_PROMPT", "0") == "1":
                 preview = user_prompt if len(user_prompt) < 400 else user_prompt[:400] + "..."
                 print("Local prompt =>", preview)
+            # Build messages array with history
+            messages = [{"role": "system", "content": system_prompt}]
+            if history:
+                messages.extend(history)
+            messages.append({"role": "user", "content": user_prompt})
             completion = llm_client.create_chat_completion(
+                messages=messages,
                 max_tokens=LOCAL_MODEL_MAX_OUTPUT_TOKENS,
                 temperature=0.3,
                 top_p=0.7,
     question: str,
     original_question: str | None = None,
     assistant_query: bool = False,
+    history: List[Dict] = None,
 ) -> str:
     """Generate response using configured LLM provider"""
     system_prompt = SYSTEM_PROMPT.strip()
     combined_prompt = f"{system_prompt}\n\n{user_prompt}"
     if LLM_PROVIDER == "beam":
+        return generate_response_beam(system_prompt, user_prompt, history)
     elif LLM_PROVIDER == "huggingface":
         return generate_response_huggingface(combined_prompt)
     elif LLM_PROVIDER == "local":
+        return generate_response_local(system_prompt, user_prompt, history)
     else:
         raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}")
         # Build context from chunks
         context = "\n\n".join(relevant_chunks)
+        # Convert history to dict format
+        history_dicts = [{"role": msg.role, "content": msg.content} for msg in request.history] if request.history else None
         # Generate response
         response = generate_response(
             context,
             request.message,
             original_question=request.message,
+            history=history_dicts,
         )
         return ChatResponse(

config.py CHANGED Viewed

@@ -41,7 +41,7 @@ SESSION_TOKEN_TTL_SECONDS = int(os.getenv("SESSION_TOKEN_TTL_SECONDS", "600"))
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Fast, lightweight
 CHUNK_SIZE = 300  # Characters per chunk (reduced for faster inference)
 CHUNK_OVERLAP = 30  # Overlap between chunks
-TOP_K_RESULTS = 1  # Fewer chunks lowers prompt size on small CPU tiers
 # System prompt for the chatbot
 SYSTEM_PROMPT = """Answer questions about Bi using the provided context. Keep answers short and direct. Always refer to Bi by name."""

 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Fast, lightweight
 CHUNK_SIZE = 300  # Characters per chunk (reduced for faster inference)
 CHUNK_OVERLAP = 30  # Overlap between chunks
+TOP_K_RESULTS = 3  # Retrieve top 3 most relevant chunks (more context for GPU inference)
 # System prompt for the chatbot
 SYSTEM_PROMPT = """Answer questions about Bi using the provided context. Keep answers short and direct. Always refer to Bi by name."""

cv_data.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "personal_info": {
     "name": "Bi Yoo",
     "title": "Lead Software Engineer & Technical Lead",
-    "bio": "Seasoned full-stack and machine learning-focused tech lead building revenue-driving ad tech platforms, data products, and developer tooling.",
     "location": "Minnesota, USA",
     "email": "yoobi.dev@gmail.com",
     "phone": "952-567-3505",
@@ -13,15 +13,23 @@
     "gender": "male"
   },
-  "summary": "Tech lead with a decade of experience shipping large-scale ad tech, data, and ML systems. Drives architecture across Java, Go, and Python services, mentors multi-disciplinary teams, and delivers measurable revenue impact through experimentation and applied machine learning.",
   "skills": {
-    "programming_languages": ["Python", "JavaScript", "TypeScript", "Java", "Go", "SQL"],
-    "frameworks": ["React", "React Native", "Vue", "Angular", "Spring Boot", "Express", "FastAPI", "Django"],
-    "ml_and_data": ["RAG pipelines", "Forecasting models", "TTS/STT", "Vector search", "Feature engineering"],
-    "datastores": ["Snowflake", "Apache Druid", "MongoDB", "PostgreSQL", "MySQL", "OracleSQL"],
-    "tools": ["AWS", "Kubernetes", "Docker", "Airflow", "Kafka", "CircleCI", "Jenkins", "Git", "Terraform"],
-    "soft_skills": ["Technical leadership", "Cross-functional collaboration", "Mentoring", "Strategic planning", "Stakeholder communication"]
   },
   "experience": [
@@ -30,15 +38,15 @@
       "company": "Insticator",
       "location": "Remote, USA",
       "duration": "Dec 2021 - Present",
-      "description": "Tech lead overseeing ad monetization platforms, ML initiatives, and full-stack delivery for publisher revenue products.",
       "achievements": [
-        "Architected ML wrappers that power interactive site experiences, including multimodal RAG pipelines for content generation and campaign insights.",
-        "Delivered ad performance forecasting models that inform bidding strategies and revenue planning across 2,000+ publisher properties.",
-        "Built and productionized Go-based services for ad exchange bidding and real-time pixel tracking, integrating with existing Java services.",
-        "Designed analytics workflows that combine Snowflake and Apache Druid to surface revenue, engagement, and latency KPI dashboards with sub-second query times.",
-        "Authored and maintained Airflow DAGs and Kafka streaming jobs that ingest SSP and ad server payout data, automating onboarding and reconciliation tasks.",
-        "Drove engineering excellence by mentoring a distributed team of developers, reviewing architecture, and increasing sprint throughput by ~20% through codebase modernization.",
-        "Partnered with product and revenue stakeholders to prioritize experimentation, including AWS Lambda@Edge-based A/B testing for header bidding clients that lifted revenue per ad unit by ~30%."
       ]
     },
     {
@@ -46,13 +54,13 @@
       "company": "Protenus",
       "location": "Baltimore, MD (Remote)",
       "duration": "Aug 2020 - Dec 2021",
-      "description": "Module lead for healthcare compliance analytics platform spanning UI, API, and data pipelines.",
       "achievements": [
-        "Led development of mission-critical React and Spring Boot features that processed high-volume EHR data from Epic and Cerner systems.",
-        "Raised average automated test coverage from near-zero to 80% across front-end and API codebases through tooling, reviews, and mentoring.",
-        "Architected hospital workforce analytics dashboards, surfacing ETL pipeline health and anomaly detection insights for compliance teams.",
-        "Optimized MongoDB-backed services to reduce response times and improve reliability for clinical operations users.",
-        "Collaborated with data science teams to productionize ML features and delivered developer tooling that accelerated release cadence."
       ]
     },
     {
@@ -60,89 +68,48 @@
       "company": "PreciseTarget",
       "location": "Washington, D.C.",
       "duration": "Jan 2018 - Aug 2020",
-      "description": "Full-stack engineer building retail recommendation systems and large-scale data processing pipelines.",
       "achievements": [
-        "Developed React and Vue applications surfacing >50M SKUs with advanced filtering, analytics, and personalization.",
-        "Implemented Node.js and Python services for catalog ingestion, event tracking, and data validation.",
-        "Created end-to-end integration test frameworks within CircleCI to safeguard complex merchandising logic.",
-        "Refined PostgreSQL middleware to improve query speed, data integrity, and resilience for retail data pipelines.",
-        "Mentored junior engineers and codified best practices for front-end architecture and deployment workflows."
-      ]
-    },
-    {
-      "title": "Full-stack Engineer & Consultant (Various Contracts)",
-      "company": "Meaningful Gigs, SL Technology, Brivo, The Washington Post, AList Magazine",
-      "location": "Washington, D.C. Metro Area",
-      "duration": "Apr 2014 - Jan 2019",
-      "description": "Delivered end-to-end web and mobile solutions across media, design, and manufacturing clients.",
-      "achievements": [
-        "Shipped responsive web applications using React, Laravel, AWS Lambda, and MongoDB to modernize content workflows.",
-        "Designed reusable component libraries, testing frameworks, and CI/CD pipelines to accelerate delivery for client teams.",
-        "Built internal tooling in Objective-C, PHP, and Python to automate content publishing and analytics.",
-        "Partnered with stakeholders to define product strategy, manage releases, and mentor cross-functional contributors."
       ]
     }
   ],
-  "education": [
-    {
-      "degree": "Master of Science, Computer Science (Software Engineering)",
-      "institution": "University of Maryland Global Campus",
-      "location": "Maryland, USA"
-    },
-    {
-      "degree": "Bachelor of Arts, Digital Communication (Cum Laude)",
-      "institution": "University of Missouri",
-      "location": "Missouri, USA"
-    },
-    {
-      "degree": "Bachelor of Fine Arts, Product Design",
-      "institution": "Hongik University",
-      "location": "Seoul, South Korea"
-    }
-  ],
   "projects": [
     {
-      "name": "SaladDays (Mobile App)",
-      "description": "A health and nutrition companion app using computer vision and vector embeddings to provide calorie estimates, alongside an LLM-powered coaching chat experience.",
-      "technologies": ["React Native", "Python", "Vision AI", "Vector embeddings", "LLM"],
-      "link": "",
       "highlights": [
-        "Applies multimodal inference to improve food recognition accuracy and calorie estimation.",
-        "Integrates conversational coaching that adapts to user goals and nutrition insights.",
-        "Currently in App Store review with launch-ready onboarding and retention flows."
       ]
     },
     {
       "name": "ML Benchmarking Portal",
-      "description": "In-progress internal site to evaluate emerging ML models and track performance across ad optimization workloads.",
-      "technologies": ["FastAPI", "React", "Faiss", "LLM evaluation tooling"],
-      "link": "",
       "highlights": [
-        "Aggregates dataset benchmarks, latency metrics, and cost curves for rapid model comparison.",
-        "Supports plug-and-play evaluation harnesses for new third-party and in-house models."
-      ]
-    },
-    {
-      "name": "Speech Applications (TTS/STT)",
-      "description": "Side projects experimenting with text-to-speech and speech-to-text pipelines for accessibility and creative tooling.",
-      "technologies": ["Python", "Hugging Face Transformers", "Whisper", "Tacotron"],
-      "link": "",
-      "highlights": [
-        "Built custom wrappers and deployment patterns to streamline multimodal experimentation.",
-        "Evaluated latency vs. quality trade-offs for productionizing voice-driven experiences."
       ]
     }
   ],
-  "certifications": [],
-  "interests": [
-    "Applied machine learning for ad tech",
-    "Developer mentorship and leadership",
-    "Data visualization and storytelling",
-    "Digital health and wellness products",
-    "Scaling high-throughput platforms"
   ]
 }

   "personal_info": {
     "name": "Bi Yoo",
     "title": "Lead Software Engineer & Technical Lead",
+    "bio": "Senior engineer delivering production AI systems, large-scale ad tech platforms, and data pipelines with measurable revenue impact.",
     "location": "Minnesota, USA",
     "email": "yoobi.dev@gmail.com",
     "phone": "952-567-3505",
     "gender": "male"
   },
+  "summary": "Senior software engineer and technical lead with deep experience building revenue-focused ad tech systems, high-throughput backend services, data pipelines, and production-ready machine learning applications. Strong end-to-end execution: architecture, implementation, deployment, and product integration. Proven track record in raising system reliability, improving developer velocity, and shipping pragmatic AI-powered features into real products.",
+  "core_competencies": [
+    "Applied Machine Learning (RAG pipelines, embeddings, vector search, latency optimization)",
+    "Backend & Distributed Systems (high-throughput services, microservices, streaming data)",
+    "Ad Tech Systems (header bidding, forecasting, payout reconciliation, analytics surfaces)",
+    "MLOps & Data Engineering (Airflow, Kafka, Snowflake, Druid, feature generation)",
+    "Technical Leadership (mentoring, architecture direction, cross-team collaboration)"
+  ],
   "skills": {
+    "programming_languages": ["Python", "Go", "Java", "TypeScript", "JavaScript", "SQL", "Rust", "Swift"],
+    "ml_and_data": ["RAG architectures", "Vector search (Faiss/Milvus/pgvector)", "Embedding evaluation", "Whisper/TTS", "Forecasting models", "Feature engineering"],
+    "frameworks": ["FastAPI", "Django", "Spring Boot", "React", "React Native"],
+    "datastores": ["Snowflake", "Apache Druid", "PostgreSQL", "MongoDB"],
+    "tools": ["AWS", "Kubernetes", "Docker", "Terraform", "Kafka", "Airflow", "CircleCI", "Jenkins", "Git"],
+    "soft_skills": ["Technical leadership", "Mentoring", "Roadmapping", "Stakeholder alignment"]
   },
   "experience": [
       "company": "Insticator",
       "location": "Remote, USA",
       "duration": "Dec 2021 - Present",
+      "description": "Tech lead for ad monetization and AI-driven revenue intelligence platforms.",
       "achievements": [
+        "Designed and shipped multiple RAG pipelines, including embedding stores, retrieval logic, prompt orchestration, and evaluation harnesses.",
+        "Built high-throughput Go and Java services for ad exchange bidding and real-time pixel/event collection.",
+        "Delivered revenue forecasting models informing bidding and inventory planning across thousands of publisher properties.",
+        "Built combined Snowflake + Druid analytics layers enabling sub-second revenue and engagement queries.",
+        "Developed streaming ingestion pipelines using Kafka and Airflow to automate payout reconciliation and reporting.",
+        "Mentored distributed engineering team and raised delivery velocity through architecture workflows and refactors.",
+        "Led experimentation frameworks using Lambda@Edge to run A/B header bidding strategies, increasing revenue per ad unit."
       ]
     },
     {
       "company": "Protenus",
       "location": "Baltimore, MD (Remote)",
       "duration": "Aug 2020 - Dec 2021",
+      "description": "Module lead for healthcare compliance analytics platform.",
       "achievements": [
+        "Led development of React + Spring Boot features processing large-scale EHR data streams.",
+        "Increased automated test coverage to ~80% across UI and API systems.",
+        "Shipped workforce analytics dashboards surfacing anomaly detection and ETL health signals.",
+        "Improved MongoDB-backed services for performance and reliability in clinical operations workflows.",
+        "Integrated ML-driven features into product surfaces through repeatable deployment patterns."
       ]
     },
     {
       "company": "PreciseTarget",
       "location": "Washington, D.C.",
       "duration": "Jan 2018 - Aug 2020",
+      "description": "Full-stack engineer working on retail recommendation systems and data ingestion.",
       "achievements": [
+        "Built React/Vue analytics surfaces surfacing large SKU sets and personalization insights.",
+        "Implemented ingestion, event tracking, and validation services in Node.js and Python.",
+        "Improved PostgreSQL efficiency and reliability for merchandising data pipelines.",
+        "Established CI-driven integration testing frameworks to safeguard core algorithms."
       ]
     }
   ],
   "projects": [
     {
+      "name": "SaladDays",
+      "description": "Mobile nutrition companion with computer vision calorie estimation and LLM coaching.",
+      "technologies": ["React Native", "Python", "Vision models", "Vector embeddings", "LLM"],
       "highlights": [
+        "Uses multimodal inference to improve food recognition accuracy.",
+        "Includes embedded coaching tuned for long-term nutritional adherence."
       ]
     },
     {
       "name": "ML Benchmarking Portal",
+      "description": "System for evaluating emerging ML models against ad optimization workloads.",
+      "technologies": ["FastAPI", "React", "Faiss", "Evaluation harnesses"],
       "highlights": [
+        "Tracks latency, cost, and quality metrics for drop-in model comparisons."
       ]
     }
   ],
+  "education": [
+    {
+      "degree": "M.S. Computer Science (Software Engineering)",
+      "institution": "University of Maryland Global Campus"
+    },
+    {
+      "degree": "B.A. Digital Communication (Cum Laude)",
+      "institution": "University of Missouri"
+    },
+    {
+      "degree": "B.F.A. Product Design",
+      "institution": "Hongik University"
+    }
   ]
 }