Bi Yoo commited on
Commit
aae1ac3
·
1 Parent(s): 588ba9b

adding context window

Browse files
Files changed (3) hide show
  1. app.py +35 -13
  2. config.py +1 -1
  3. cv_data.json +56 -89
app.py CHANGED
@@ -120,8 +120,13 @@ app.add_middleware(
120
  )
121
 
122
  # Pydantic models
 
 
 
 
123
  class ChatRequest(BaseModel):
124
  message: str
 
125
 
126
  class ChatResponse(BaseModel):
127
  response: str
@@ -278,6 +283,11 @@ def load_cv_data(file_path: str = "cv_data.json") -> str:
278
  if "summary" in data:
279
  text_parts.append(f"Professional Summary: {data['summary']}")
280
 
 
 
 
 
 
281
  # Skills
282
  if "skills" in data:
283
  for category, items in data["skills"].items():
@@ -477,7 +487,7 @@ def retrieve_relevant_chunks(query: str, top_k: int = TOP_K_RESULTS) -> List[str
477
  return relevant_chunks
478
 
479
 
480
- def generate_response_beam(system_prompt: str, user_prompt: str) -> str:
481
  """Generate response using Beam vLLM service (Qwen3 4B Instruct on GPU)."""
482
  import requests
483
 
@@ -486,13 +496,16 @@ def generate_response_beam(system_prompt: str, user_prompt: str) -> str:
486
 
487
  api_url = f"{BEAM_API_URL.rstrip('/')}/v1/chat/completions"
488
 
 
 
 
 
 
 
489
  payload = {
490
  "model": "Qwen/Qwen3-4B-Instruct-2507",
491
- "messages": [
492
- {"role": "system", "content": system_prompt},
493
- {"role": "user", "content": user_prompt},
494
- ],
495
- "max_tokens": 100,
496
  "temperature": 0.3,
497
  "top_p": 0.7,
498
  }
@@ -560,7 +573,7 @@ def generate_response_huggingface(prompt: str) -> str:
560
  raise HTTPException(status_code=500, detail=f"HuggingFace API error: {str(e)}")
561
 
562
 
563
- def generate_response_local(system_prompt: str, user_prompt: str) -> str:
564
  """Generate response using a locally hosted quantized model."""
565
  global llm_client
566
 
@@ -572,11 +585,15 @@ def generate_response_local(system_prompt: str, user_prompt: str) -> str:
572
  if os.getenv("DEBUG_LOCAL_PROMPT", "0") == "1":
573
  preview = user_prompt if len(user_prompt) < 400 else user_prompt[:400] + "..."
574
  print("Local prompt =>", preview)
 
 
 
 
 
 
 
575
  completion = llm_client.create_chat_completion(
576
- messages=[
577
- {"role": "system", "content": system_prompt},
578
- {"role": "user", "content": user_prompt},
579
- ],
580
  max_tokens=LOCAL_MODEL_MAX_OUTPUT_TOKENS,
581
  temperature=0.3,
582
  top_p=0.7,
@@ -605,6 +622,7 @@ def generate_response(
605
  question: str,
606
  original_question: str | None = None,
607
  assistant_query: bool = False,
 
608
  ) -> str:
609
  """Generate response using configured LLM provider"""
610
  system_prompt = SYSTEM_PROMPT.strip()
@@ -619,11 +637,11 @@ Answer:"""
619
  combined_prompt = f"{system_prompt}\n\n{user_prompt}"
620
 
621
  if LLM_PROVIDER == "beam":
622
- return generate_response_beam(system_prompt, user_prompt)
623
  elif LLM_PROVIDER == "huggingface":
624
  return generate_response_huggingface(combined_prompt)
625
  elif LLM_PROVIDER == "local":
626
- return generate_response_local(system_prompt, user_prompt)
627
  else:
628
  raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}")
629
 
@@ -667,11 +685,15 @@ async def chat(request: ChatRequest, _: None = Depends(verify_client_access)):
667
  # Build context from chunks
668
  context = "\n\n".join(relevant_chunks)
669
 
 
 
 
670
  # Generate response
671
  response = generate_response(
672
  context,
673
  request.message,
674
  original_question=request.message,
 
675
  )
676
 
677
  return ChatResponse(
 
120
  )
121
 
122
  # Pydantic models
123
+ class ChatMessage(BaseModel):
124
+ role: str # "user" or "assistant"
125
+ content: str
126
+
127
  class ChatRequest(BaseModel):
128
  message: str
129
+ history: List[ChatMessage] = [] # Optional chat history (sliding window)
130
 
131
  class ChatResponse(BaseModel):
132
  response: str
 
283
  if "summary" in data:
284
  text_parts.append(f"Professional Summary: {data['summary']}")
285
 
286
+ # Capabilities (pre-defined chunks for direct embedding)
287
+ if "capabilities" in data:
288
+ for cap in data["capabilities"]:
289
+ text_parts.append(f"Capability: {cap['text']}")
290
+
291
  # Skills
292
  if "skills" in data:
293
  for category, items in data["skills"].items():
 
487
  return relevant_chunks
488
 
489
 
490
+ def generate_response_beam(system_prompt: str, user_prompt: str, history: List[Dict] = None) -> str:
491
  """Generate response using Beam vLLM service (Qwen3 4B Instruct on GPU)."""
492
  import requests
493
 
 
496
 
497
  api_url = f"{BEAM_API_URL.rstrip('/')}/v1/chat/completions"
498
 
499
+ # Build messages array with history
500
+ messages = [{"role": "system", "content": system_prompt}]
501
+ if history:
502
+ messages.extend(history)
503
+ messages.append({"role": "user", "content": user_prompt})
504
+
505
  payload = {
506
  "model": "Qwen/Qwen3-4B-Instruct-2507",
507
+ "messages": messages,
508
+ "max_tokens": 200, # Increased for complete responses on GPU
 
 
 
509
  "temperature": 0.3,
510
  "top_p": 0.7,
511
  }
 
573
  raise HTTPException(status_code=500, detail=f"HuggingFace API error: {str(e)}")
574
 
575
 
576
+ def generate_response_local(system_prompt: str, user_prompt: str, history: List[Dict] = None) -> str:
577
  """Generate response using a locally hosted quantized model."""
578
  global llm_client
579
 
 
585
  if os.getenv("DEBUG_LOCAL_PROMPT", "0") == "1":
586
  preview = user_prompt if len(user_prompt) < 400 else user_prompt[:400] + "..."
587
  print("Local prompt =>", preview)
588
+
589
+ # Build messages array with history
590
+ messages = [{"role": "system", "content": system_prompt}]
591
+ if history:
592
+ messages.extend(history)
593
+ messages.append({"role": "user", "content": user_prompt})
594
+
595
  completion = llm_client.create_chat_completion(
596
+ messages=messages,
 
 
 
597
  max_tokens=LOCAL_MODEL_MAX_OUTPUT_TOKENS,
598
  temperature=0.3,
599
  top_p=0.7,
 
622
  question: str,
623
  original_question: str | None = None,
624
  assistant_query: bool = False,
625
+ history: List[Dict] = None,
626
  ) -> str:
627
  """Generate response using configured LLM provider"""
628
  system_prompt = SYSTEM_PROMPT.strip()
 
637
  combined_prompt = f"{system_prompt}\n\n{user_prompt}"
638
 
639
  if LLM_PROVIDER == "beam":
640
+ return generate_response_beam(system_prompt, user_prompt, history)
641
  elif LLM_PROVIDER == "huggingface":
642
  return generate_response_huggingface(combined_prompt)
643
  elif LLM_PROVIDER == "local":
644
+ return generate_response_local(system_prompt, user_prompt, history)
645
  else:
646
  raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}")
647
 
 
685
  # Build context from chunks
686
  context = "\n\n".join(relevant_chunks)
687
 
688
+ # Convert history to dict format
689
+ history_dicts = [{"role": msg.role, "content": msg.content} for msg in request.history] if request.history else None
690
+
691
  # Generate response
692
  response = generate_response(
693
  context,
694
  request.message,
695
  original_question=request.message,
696
+ history=history_dicts,
697
  )
698
 
699
  return ChatResponse(
config.py CHANGED
@@ -41,7 +41,7 @@ SESSION_TOKEN_TTL_SECONDS = int(os.getenv("SESSION_TOKEN_TTL_SECONDS", "600"))
41
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Fast, lightweight
42
  CHUNK_SIZE = 300 # Characters per chunk (reduced for faster inference)
43
  CHUNK_OVERLAP = 30 # Overlap between chunks
44
- TOP_K_RESULTS = 1 # Fewer chunks lowers prompt size on small CPU tiers
45
 
46
  # System prompt for the chatbot
47
  SYSTEM_PROMPT = """Answer questions about Bi using the provided context. Keep answers short and direct. Always refer to Bi by name."""
 
41
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Fast, lightweight
42
  CHUNK_SIZE = 300 # Characters per chunk (reduced for faster inference)
43
  CHUNK_OVERLAP = 30 # Overlap between chunks
44
+ TOP_K_RESULTS = 3 # Retrieve top 3 most relevant chunks (more context for GPU inference)
45
 
46
  # System prompt for the chatbot
47
  SYSTEM_PROMPT = """Answer questions about Bi using the provided context. Keep answers short and direct. Always refer to Bi by name."""
cv_data.json CHANGED
@@ -2,7 +2,7 @@
2
  "personal_info": {
3
  "name": "Bi Yoo",
4
  "title": "Lead Software Engineer & Technical Lead",
5
- "bio": "Seasoned full-stack and machine learning-focused tech lead building revenue-driving ad tech platforms, data products, and developer tooling.",
6
  "location": "Minnesota, USA",
7
  "email": "yoobi.dev@gmail.com",
8
  "phone": "952-567-3505",
@@ -13,15 +13,23 @@
13
  "gender": "male"
14
  },
15
 
16
- "summary": "Tech lead with a decade of experience shipping large-scale ad tech, data, and ML systems. Drives architecture across Java, Go, and Python services, mentors multi-disciplinary teams, and delivers measurable revenue impact through experimentation and applied machine learning.",
 
 
 
 
 
 
 
 
17
 
18
  "skills": {
19
- "programming_languages": ["Python", "JavaScript", "TypeScript", "Java", "Go", "SQL"],
20
- "frameworks": ["React", "React Native", "Vue", "Angular", "Spring Boot", "Express", "FastAPI", "Django"],
21
- "ml_and_data": ["RAG pipelines", "Forecasting models", "TTS/STT", "Vector search", "Feature engineering"],
22
- "datastores": ["Snowflake", "Apache Druid", "MongoDB", "PostgreSQL", "MySQL", "OracleSQL"],
23
- "tools": ["AWS", "Kubernetes", "Docker", "Airflow", "Kafka", "CircleCI", "Jenkins", "Git", "Terraform"],
24
- "soft_skills": ["Technical leadership", "Cross-functional collaboration", "Mentoring", "Strategic planning", "Stakeholder communication"]
25
  },
26
 
27
  "experience": [
@@ -30,15 +38,15 @@
30
  "company": "Insticator",
31
  "location": "Remote, USA",
32
  "duration": "Dec 2021 - Present",
33
- "description": "Tech lead overseeing ad monetization platforms, ML initiatives, and full-stack delivery for publisher revenue products.",
34
  "achievements": [
35
- "Architected ML wrappers that power interactive site experiences, including multimodal RAG pipelines for content generation and campaign insights.",
36
- "Delivered ad performance forecasting models that inform bidding strategies and revenue planning across 2,000+ publisher properties.",
37
- "Built and productionized Go-based services for ad exchange bidding and real-time pixel tracking, integrating with existing Java services.",
38
- "Designed analytics workflows that combine Snowflake and Apache Druid to surface revenue, engagement, and latency KPI dashboards with sub-second query times.",
39
- "Authored and maintained Airflow DAGs and Kafka streaming jobs that ingest SSP and ad server payout data, automating onboarding and reconciliation tasks.",
40
- "Drove engineering excellence by mentoring a distributed team of developers, reviewing architecture, and increasing sprint throughput by ~20% through codebase modernization.",
41
- "Partnered with product and revenue stakeholders to prioritize experimentation, including AWS Lambda@Edge-based A/B testing for header bidding clients that lifted revenue per ad unit by ~30%."
42
  ]
43
  },
44
  {
@@ -46,13 +54,13 @@
46
  "company": "Protenus",
47
  "location": "Baltimore, MD (Remote)",
48
  "duration": "Aug 2020 - Dec 2021",
49
- "description": "Module lead for healthcare compliance analytics platform spanning UI, API, and data pipelines.",
50
  "achievements": [
51
- "Led development of mission-critical React and Spring Boot features that processed high-volume EHR data from Epic and Cerner systems.",
52
- "Raised average automated test coverage from near-zero to 80% across front-end and API codebases through tooling, reviews, and mentoring.",
53
- "Architected hospital workforce analytics dashboards, surfacing ETL pipeline health and anomaly detection insights for compliance teams.",
54
- "Optimized MongoDB-backed services to reduce response times and improve reliability for clinical operations users.",
55
- "Collaborated with data science teams to productionize ML features and delivered developer tooling that accelerated release cadence."
56
  ]
57
  },
58
  {
@@ -60,89 +68,48 @@
60
  "company": "PreciseTarget",
61
  "location": "Washington, D.C.",
62
  "duration": "Jan 2018 - Aug 2020",
63
- "description": "Full-stack engineer building retail recommendation systems and large-scale data processing pipelines.",
64
  "achievements": [
65
- "Developed React and Vue applications surfacing >50M SKUs with advanced filtering, analytics, and personalization.",
66
- "Implemented Node.js and Python services for catalog ingestion, event tracking, and data validation.",
67
- "Created end-to-end integration test frameworks within CircleCI to safeguard complex merchandising logic.",
68
- "Refined PostgreSQL middleware to improve query speed, data integrity, and resilience for retail data pipelines.",
69
- "Mentored junior engineers and codified best practices for front-end architecture and deployment workflows."
70
- ]
71
- },
72
- {
73
- "title": "Full-stack Engineer & Consultant (Various Contracts)",
74
- "company": "Meaningful Gigs, SL Technology, Brivo, The Washington Post, AList Magazine",
75
- "location": "Washington, D.C. Metro Area",
76
- "duration": "Apr 2014 - Jan 2019",
77
- "description": "Delivered end-to-end web and mobile solutions across media, design, and manufacturing clients.",
78
- "achievements": [
79
- "Shipped responsive web applications using React, Laravel, AWS Lambda, and MongoDB to modernize content workflows.",
80
- "Designed reusable component libraries, testing frameworks, and CI/CD pipelines to accelerate delivery for client teams.",
81
- "Built internal tooling in Objective-C, PHP, and Python to automate content publishing and analytics.",
82
- "Partnered with stakeholders to define product strategy, manage releases, and mentor cross-functional contributors."
83
  ]
84
  }
85
  ],
86
 
87
- "education": [
88
- {
89
- "degree": "Master of Science, Computer Science (Software Engineering)",
90
- "institution": "University of Maryland Global Campus",
91
- "location": "Maryland, USA"
92
- },
93
- {
94
- "degree": "Bachelor of Arts, Digital Communication (Cum Laude)",
95
- "institution": "University of Missouri",
96
- "location": "Missouri, USA"
97
- },
98
- {
99
- "degree": "Bachelor of Fine Arts, Product Design",
100
- "institution": "Hongik University",
101
- "location": "Seoul, South Korea"
102
- }
103
- ],
104
-
105
  "projects": [
106
  {
107
- "name": "SaladDays (Mobile App)",
108
- "description": "A health and nutrition companion app using computer vision and vector embeddings to provide calorie estimates, alongside an LLM-powered coaching chat experience.",
109
- "technologies": ["React Native", "Python", "Vision AI", "Vector embeddings", "LLM"],
110
- "link": "",
111
  "highlights": [
112
- "Applies multimodal inference to improve food recognition accuracy and calorie estimation.",
113
- "Integrates conversational coaching that adapts to user goals and nutrition insights.",
114
- "Currently in App Store review with launch-ready onboarding and retention flows."
115
  ]
116
  },
117
  {
118
  "name": "ML Benchmarking Portal",
119
- "description": "In-progress internal site to evaluate emerging ML models and track performance across ad optimization workloads.",
120
- "technologies": ["FastAPI", "React", "Faiss", "LLM evaluation tooling"],
121
- "link": "",
122
  "highlights": [
123
- "Aggregates dataset benchmarks, latency metrics, and cost curves for rapid model comparison.",
124
- "Supports plug-and-play evaluation harnesses for new third-party and in-house models."
125
- ]
126
- },
127
- {
128
- "name": "Speech Applications (TTS/STT)",
129
- "description": "Side projects experimenting with text-to-speech and speech-to-text pipelines for accessibility and creative tooling.",
130
- "technologies": ["Python", "Hugging Face Transformers", "Whisper", "Tacotron"],
131
- "link": "",
132
- "highlights": [
133
- "Built custom wrappers and deployment patterns to streamline multimodal experimentation.",
134
- "Evaluated latency vs. quality trade-offs for productionizing voice-driven experiences."
135
  ]
136
  }
137
  ],
138
 
139
- "certifications": [],
140
-
141
- "interests": [
142
- "Applied machine learning for ad tech",
143
- "Developer mentorship and leadership",
144
- "Data visualization and storytelling",
145
- "Digital health and wellness products",
146
- "Scaling high-throughput platforms"
 
 
 
 
 
147
  ]
148
  }
 
2
  "personal_info": {
3
  "name": "Bi Yoo",
4
  "title": "Lead Software Engineer & Technical Lead",
5
+ "bio": "Senior engineer delivering production AI systems, large-scale ad tech platforms, and data pipelines with measurable revenue impact.",
6
  "location": "Minnesota, USA",
7
  "email": "yoobi.dev@gmail.com",
8
  "phone": "952-567-3505",
 
13
  "gender": "male"
14
  },
15
 
16
+ "summary": "Senior software engineer and technical lead with deep experience building revenue-focused ad tech systems, high-throughput backend services, data pipelines, and production-ready machine learning applications. Strong end-to-end execution: architecture, implementation, deployment, and product integration. Proven track record in raising system reliability, improving developer velocity, and shipping pragmatic AI-powered features into real products.",
17
+
18
+ "core_competencies": [
19
+ "Applied Machine Learning (RAG pipelines, embeddings, vector search, latency optimization)",
20
+ "Backend & Distributed Systems (high-throughput services, microservices, streaming data)",
21
+ "Ad Tech Systems (header bidding, forecasting, payout reconciliation, analytics surfaces)",
22
+ "MLOps & Data Engineering (Airflow, Kafka, Snowflake, Druid, feature generation)",
23
+ "Technical Leadership (mentoring, architecture direction, cross-team collaboration)"
24
+ ],
25
 
26
  "skills": {
27
+ "programming_languages": ["Python", "Go", "Java", "TypeScript", "JavaScript", "SQL", "Rust", "Swift"],
28
+ "ml_and_data": ["RAG architectures", "Vector search (Faiss/Milvus/pgvector)", "Embedding evaluation", "Whisper/TTS", "Forecasting models", "Feature engineering"],
29
+ "frameworks": ["FastAPI", "Django", "Spring Boot", "React", "React Native"],
30
+ "datastores": ["Snowflake", "Apache Druid", "PostgreSQL", "MongoDB"],
31
+ "tools": ["AWS", "Kubernetes", "Docker", "Terraform", "Kafka", "Airflow", "CircleCI", "Jenkins", "Git"],
32
+ "soft_skills": ["Technical leadership", "Mentoring", "Roadmapping", "Stakeholder alignment"]
33
  },
34
 
35
  "experience": [
 
38
  "company": "Insticator",
39
  "location": "Remote, USA",
40
  "duration": "Dec 2021 - Present",
41
+ "description": "Tech lead for ad monetization and AI-driven revenue intelligence platforms.",
42
  "achievements": [
43
+ "Designed and shipped multiple RAG pipelines, including embedding stores, retrieval logic, prompt orchestration, and evaluation harnesses.",
44
+ "Built high-throughput Go and Java services for ad exchange bidding and real-time pixel/event collection.",
45
+ "Delivered revenue forecasting models informing bidding and inventory planning across thousands of publisher properties.",
46
+ "Built combined Snowflake + Druid analytics layers enabling sub-second revenue and engagement queries.",
47
+ "Developed streaming ingestion pipelines using Kafka and Airflow to automate payout reconciliation and reporting.",
48
+ "Mentored distributed engineering team and raised delivery velocity through architecture workflows and refactors.",
49
+ "Led experimentation frameworks using Lambda@Edge to run A/B header bidding strategies, increasing revenue per ad unit."
50
  ]
51
  },
52
  {
 
54
  "company": "Protenus",
55
  "location": "Baltimore, MD (Remote)",
56
  "duration": "Aug 2020 - Dec 2021",
57
+ "description": "Module lead for healthcare compliance analytics platform.",
58
  "achievements": [
59
+ "Led development of React + Spring Boot features processing large-scale EHR data streams.",
60
+ "Increased automated test coverage to ~80% across UI and API systems.",
61
+ "Shipped workforce analytics dashboards surfacing anomaly detection and ETL health signals.",
62
+ "Improved MongoDB-backed services for performance and reliability in clinical operations workflows.",
63
+ "Integrated ML-driven features into product surfaces through repeatable deployment patterns."
64
  ]
65
  },
66
  {
 
68
  "company": "PreciseTarget",
69
  "location": "Washington, D.C.",
70
  "duration": "Jan 2018 - Aug 2020",
71
+ "description": "Full-stack engineer working on retail recommendation systems and data ingestion.",
72
  "achievements": [
73
+ "Built React/Vue analytics surfaces surfacing large SKU sets and personalization insights.",
74
+ "Implemented ingestion, event tracking, and validation services in Node.js and Python.",
75
+ "Improved PostgreSQL efficiency and reliability for merchandising data pipelines.",
76
+ "Established CI-driven integration testing frameworks to safeguard core algorithms."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ]
78
  }
79
  ],
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  "projects": [
82
  {
83
+ "name": "SaladDays",
84
+ "description": "Mobile nutrition companion with computer vision calorie estimation and LLM coaching.",
85
+ "technologies": ["React Native", "Python", "Vision models", "Vector embeddings", "LLM"],
 
86
  "highlights": [
87
+ "Uses multimodal inference to improve food recognition accuracy.",
88
+ "Includes embedded coaching tuned for long-term nutritional adherence."
 
89
  ]
90
  },
91
  {
92
  "name": "ML Benchmarking Portal",
93
+ "description": "System for evaluating emerging ML models against ad optimization workloads.",
94
+ "technologies": ["FastAPI", "React", "Faiss", "Evaluation harnesses"],
 
95
  "highlights": [
96
+ "Tracks latency, cost, and quality metrics for drop-in model comparisons."
 
 
 
 
 
 
 
 
 
 
 
97
  ]
98
  }
99
  ],
100
 
101
+ "education": [
102
+ {
103
+ "degree": "M.S. Computer Science (Software Engineering)",
104
+ "institution": "University of Maryland Global Campus"
105
+ },
106
+ {
107
+ "degree": "B.A. Digital Communication (Cum Laude)",
108
+ "institution": "University of Missouri"
109
+ },
110
+ {
111
+ "degree": "B.F.A. Product Design",
112
+ "institution": "Hongik University"
113
+ }
114
  ]
115
  }