llm evals
Browse files
app.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
| 3 |
-
from tool import rival_product
|
| 4 |
-
from graphrag import marketingPlan
|
| 5 |
-
from knowledge import graph
|
| 6 |
-
from pii import derisk
|
| 7 |
from classify import judge
|
| 8 |
from entity import resolve
|
|
|
|
| 9 |
from human import email, feedback
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Define the Google Analytics script
|
| 12 |
head = """
|
|
@@ -94,11 +95,12 @@ Other Links:
|
|
| 94 |
|
| 95 |
gr.Examples(
|
| 96 |
[
|
| 97 |
-
[
|
|
|
|
| 98 |
],
|
| 99 |
[in_verbatim]
|
| 100 |
)
|
| 101 |
-
btn_recommend=gr.Button("Recommend")
|
| 102 |
btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
|
| 103 |
|
| 104 |
gr.Markdown("""
|
|
@@ -254,7 +256,7 @@ Representative: "Confirmed. Your next payment of $200 will process May 1st. A co
|
|
| 254 |
|
| 255 |
Customer: "No, thank you."
|
| 256 |
"""
|
| 257 |
-
|
| 258 |
],
|
| 259 |
[in_verbatim]
|
| 260 |
)
|
|
@@ -262,7 +264,6 @@ Customer: "No, thank you."
|
|
| 262 |
btn_clear = gr.ClearButton(components=[out_product])
|
| 263 |
btn_recommend.click(fn=graph, inputs=[in_verbatim, out_product], outputs=out_product)
|
| 264 |
|
| 265 |
-
|
| 266 |
gr.Markdown("""
|
| 267 |
Example of Customer Profile in Graph
|
| 268 |
=================
|
|
@@ -306,15 +307,15 @@ Once created, knowledge graphs can be repurposed across multiple use cases (e.g.
|
|
| 306 |
gr.Examples(
|
| 307 |
[
|
| 308 |
[
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
],
|
| 319 |
[in_verbatim]
|
| 320 |
)
|
|
@@ -333,7 +334,6 @@ Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and
|
|
| 333 |
Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
|
| 334 |
""")
|
| 335 |
|
| 336 |
-
|
| 337 |
with gr.Tab("Segmentation"):
|
| 338 |
gr.Markdown("""
|
| 339 |
Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
|
|
@@ -353,14 +353,14 @@ Allows downstream tasks (like sentiment analysis or topic modeling) to focus on
|
|
| 353 |
gr.Examples(
|
| 354 |
[
|
| 355 |
[
|
| 356 |
-
|
| 357 |
-
"The online portal makes managing my mortgage payments so convenient.";
|
| 358 |
-
"RBC offer great mortgage for my home with competitive rate thank you";
|
| 359 |
-
"Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
|
| 360 |
-
"The mobile check deposit feature saves me so much time. Banking made easy!";
|
| 361 |
-
"Affordable premiums with great coverage. Switched from my old provider and saved!"
|
| 362 |
-
|
| 363 |
-
|
| 364 |
],
|
| 365 |
[in_verbatim]
|
| 366 |
)
|
|
@@ -444,7 +444,7 @@ Customer: "No, thank you."
|
|
| 444 |
],
|
| 445 |
[in_verbatim]
|
| 446 |
)
|
| 447 |
-
btn_recommend=gr.Button("Resolve")
|
| 448 |
btn_recommend.click(fn=resolve, inputs=in_verbatim, outputs=out_product)
|
| 449 |
|
| 450 |
gr.Markdown("""
|
|
@@ -483,7 +483,9 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
|
|
| 483 |
|
| 484 |
gr.Examples(
|
| 485 |
[
|
| 486 |
-
[
|
|
|
|
|
|
|
| 487 |
["my credit card limit is too low, I need a card with bigger limit and low fee", "CARD"]
|
| 488 |
],
|
| 489 |
[in_verbatim, in_campaign]
|
|
@@ -541,50 +543,50 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
|
|
| 541 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
| 542 |
|
| 543 |
gr.Markdown("""
|
| 544 |
-
Companies pour millions into product catalogs, marketing funnels, and user acquisition—yet many still face the same challenge:
|
| 545 |
-
==================
|
| 546 |
-
### 📉 Pain points:
|
| 547 |
-
- High bounce rates and low conversion despite heavy traffic
|
| 548 |
-
- Customers struggle to find relevant products on their own
|
| 549 |
-
- One-size-fits-all promotions result in wasted ad spend and poor ROI
|
| 550 |
-
|
| 551 |
-
### 🧩 The real question:
|
| 552 |
-
What if your product catalog could *adapt itself* to each user in real time—just like your best salesperson would?
|
| 553 |
-
|
| 554 |
-
### 🎯 The customer need:
|
| 555 |
-
Businesses need a way to dynamically personalize product discovery, so every customer sees the most relevant items—without manually configuring hundreds of rules.
|
| 556 |
-
|
| 557 |
-
## ✅ Enter: Product Recommender Systems
|
| 558 |
-
|
| 559 |
-
By analyzing behavioral data, preferences, and historical purchases, a recommender engine surfaces what each user is most likely to want—boosting engagement and revenue.
|
| 560 |
-
|
| 561 |
-
### 📌 Real-world use cases:
|
| 562 |
-
- **Amazon** attributes up to 35% of its revenue to its recommender system, which tailors the home page, emails, and checkout cross-sells per user.
|
| 563 |
-
- **Netflix** leverages personalized content recommendations to reduce churn and increase watch time—saving the company over $1B annually in retention value.
|
| 564 |
-
- **Stitch Fix** uses machine learning-powered recommendations to curate clothing boxes tailored to individual style profiles—scaling personal styling.
|
| 565 |
-
|
| 566 |
-
### 💡 Business benefits:
|
| 567 |
-
- Higher conversion rates through relevant discovery
|
| 568 |
-
- Increased average order value (AOV) via cross-sell and upsell
|
| 569 |
-
- Improved retention and lower customer acquisition cost (CAC)
|
| 570 |
-
|
| 571 |
-
If your product discovery experience isn’t working as hard as your marketing budget, it’s time to make your catalog intelligent—with recommendations that convert.
|
| 572 |
""")
|
| 573 |
|
| 574 |
-
with gr.Tab("
|
| 575 |
gr.Markdown("""
|
| 576 |
-
🏦 LLMs for Application Security in Personal Banking
|
| 577 |
-
====================
|
| 578 |
-
What happens when your generative AI exposes customer data before you even launch?
|
| 579 |
-
|
| 580 |
-
LLM evals reduce security risks in generative AI banking apps by identifying vulnerabilities and guiding secure fixes.
|
| 581 |
-
|
| 582 |
-
Personal banking apps increasingly rely on generative AI—but insecure logic and hallucinations expose sensitive customer data. LLM evals help assess code and AI-generated responses for correctness, task completion, hallucination risk, and safety—enabling proactive guardrails against vulnerabilities before deployment.
|
| 583 |
-
|
| 584 |
-
I’ve led cross-functional model risk initiatives, building pipelines that transform LLM evaluations into automated alerts and remediation workflows—strengthening regulatory compliance and protecting customer trust.
|
| 585 |
-
|
| 586 |
-
Using open-source frameworks, I identify flaws in LLM prompt and translate risks into explainable insights for business, risk, and engineering stakeholders.
|
| 587 |
-
https://postimg.cc/3WtG4ZK2
|
| 588 |
""")
|
| 589 |
|
| 590 |
-
demo.launch(allowed_paths=["."])
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from classify import judge
|
| 4 |
from entity import resolve
|
| 5 |
+
from graphrag import marketingPlan
|
| 6 |
from human import email, feedback
|
| 7 |
+
from knowledge import graph
|
| 8 |
+
from pii import derisk
|
| 9 |
+
from rag import rbc_product
|
| 10 |
+
from tool import rival_product
|
| 11 |
|
| 12 |
# Define the Google Analytics script
|
| 13 |
head = """
|
|
|
|
| 95 |
|
| 96 |
gr.Examples(
|
| 97 |
[
|
| 98 |
+
[
|
| 99 |
+
"Low APR and great customer service. I would highly recommend if you’re looking for a great credit card company and looking to rebuild your credit. I have had my credit limit increased annually and the annual fee is very low."]
|
| 100 |
],
|
| 101 |
[in_verbatim]
|
| 102 |
)
|
| 103 |
+
btn_recommend = gr.Button("Recommend")
|
| 104 |
btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
|
| 105 |
|
| 106 |
gr.Markdown("""
|
|
|
|
| 256 |
|
| 257 |
Customer: "No, thank you."
|
| 258 |
"""
|
| 259 |
+
]
|
| 260 |
],
|
| 261 |
[in_verbatim]
|
| 262 |
)
|
|
|
|
| 264 |
btn_clear = gr.ClearButton(components=[out_product])
|
| 265 |
btn_recommend.click(fn=graph, inputs=[in_verbatim, out_product], outputs=out_product)
|
| 266 |
|
|
|
|
| 267 |
gr.Markdown("""
|
| 268 |
Example of Customer Profile in Graph
|
| 269 |
=================
|
|
|
|
| 307 |
gr.Examples(
|
| 308 |
[
|
| 309 |
[
|
| 310 |
+
"""
|
| 311 |
+
He Hua (Hua Hua) Director
|
| 312 |
+
hehua@chengdu.com
|
| 313 |
+
+86-28-83505513
|
| 314 |
+
|
| 315 |
+
Alternative Address Format:
|
| 316 |
+
Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
|
| 317 |
+
"""
|
| 318 |
+
]
|
| 319 |
],
|
| 320 |
[in_verbatim]
|
| 321 |
)
|
|
|
|
| 334 |
Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
|
| 335 |
""")
|
| 336 |
|
|
|
|
| 337 |
with gr.Tab("Segmentation"):
|
| 338 |
gr.Markdown("""
|
| 339 |
Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
|
|
|
|
| 353 |
gr.Examples(
|
| 354 |
[
|
| 355 |
[
|
| 356 |
+
"""
|
| 357 |
+
"The online portal makes managing my mortgage payments so convenient.";
|
| 358 |
+
"RBC offer great mortgage for my home with competitive rate thank you";
|
| 359 |
+
"Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
|
| 360 |
+
"The mobile check deposit feature saves me so much time. Banking made easy!";
|
| 361 |
+
"Affordable premiums with great coverage. Switched from my old provider and saved!"
|
| 362 |
+
"""
|
| 363 |
+
]
|
| 364 |
],
|
| 365 |
[in_verbatim]
|
| 366 |
)
|
|
|
|
| 444 |
],
|
| 445 |
[in_verbatim]
|
| 446 |
)
|
| 447 |
+
btn_recommend = gr.Button("Resolve")
|
| 448 |
btn_recommend.click(fn=resolve, inputs=in_verbatim, outputs=out_product)
|
| 449 |
|
| 450 |
gr.Markdown("""
|
|
|
|
| 483 |
|
| 484 |
gr.Examples(
|
| 485 |
[
|
| 486 |
+
[
|
| 487 |
+
"""My mortgage was assumed by Bank of America when Countrywide mortgages ceased to do business. My mortgage increased without any explanation. When I inquired, they stumbled and gave me the run around. I’d NEVER do business with Bank of America again""",
|
| 488 |
+
"MORT"],
|
| 489 |
["my credit card limit is too low, I need a card with bigger limit and low fee", "CARD"]
|
| 490 |
],
|
| 491 |
[in_verbatim, in_campaign]
|
|
|
|
| 543 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
| 544 |
|
| 545 |
gr.Markdown("""
|
| 546 |
+
Companies pour millions into product catalogs, marketing funnels, and user acquisition—yet many still face the same challenge:
|
| 547 |
+
==================
|
| 548 |
+
### 📉 Pain points:
|
| 549 |
+
- High bounce rates and low conversion despite heavy traffic
|
| 550 |
+
- Customers struggle to find relevant products on their own
|
| 551 |
+
- One-size-fits-all promotions result in wasted ad spend and poor ROI
|
| 552 |
+
|
| 553 |
+
### 🧩 The real question:
|
| 554 |
+
What if your product catalog could *adapt itself* to each user in real time—just like your best salesperson would?
|
| 555 |
+
|
| 556 |
+
### 🎯 The customer need:
|
| 557 |
+
Businesses need a way to dynamically personalize product discovery, so every customer sees the most relevant items—without manually configuring hundreds of rules.
|
| 558 |
+
|
| 559 |
+
## ✅ Enter: Product Recommender Systems
|
| 560 |
+
|
| 561 |
+
By analyzing behavioral data, preferences, and historical purchases, a recommender engine surfaces what each user is most likely to want—boosting engagement and revenue.
|
| 562 |
+
|
| 563 |
+
### 📌 Real-world use cases:
|
| 564 |
+
- **Amazon** attributes up to 35% of its revenue to its recommender system, which tailors the home page, emails, and checkout cross-sells per user.
|
| 565 |
+
- **Netflix** leverages personalized content recommendations to reduce churn and increase watch time—saving the company over $1B annually in retention value.
|
| 566 |
+
- **Stitch Fix** uses machine learning-powered recommendations to curate clothing boxes tailored to individual style profiles—scaling personal styling.
|
| 567 |
+
|
| 568 |
+
### 💡 Business benefits:
|
| 569 |
+
- Higher conversion rates through relevant discovery
|
| 570 |
+
- Increased average order value (AOV) via cross-sell and upsell
|
| 571 |
+
- Improved retention and lower customer acquisition cost (CAC)
|
| 572 |
+
|
| 573 |
+
If your product discovery experience isn’t working as hard as your marketing budget, it’s time to make your catalog intelligent—with recommendations that convert.
|
| 574 |
""")
|
| 575 |
|
| 576 |
+
with gr.Tab("Eval"):
|
| 577 |
gr.Markdown("""
|
| 578 |
+
🏦 LLMs for Application Security in Personal Banking
|
| 579 |
+
====================
|
| 580 |
+
What happens when your generative AI exposes customer data before you even launch?
|
| 581 |
+
|
| 582 |
+
LLM evals reduce security risks in generative AI banking apps by identifying vulnerabilities and guiding secure fixes.
|
| 583 |
+
|
| 584 |
+
Personal banking apps increasingly rely on generative AI—but insecure logic and hallucinations expose sensitive customer data. LLM evals help assess code and AI-generated responses for correctness, task completion, hallucination risk, and safety—enabling proactive guardrails against vulnerabilities before deployment.
|
| 585 |
+
|
| 586 |
+
I’ve led cross-functional model risk initiatives, building pipelines that transform LLM evaluations into automated alerts and remediation workflows—strengthening regulatory compliance and protecting customer trust.
|
| 587 |
+
|
| 588 |
+
Using open-source frameworks, I identify flaws in LLM prompt and translate risks into explainable insights for business, risk, and engineering stakeholders.
|
| 589 |
+
https://postimg.cc/3WtG4ZK2
|
| 590 |
""")
|
| 591 |
|
| 592 |
+
demo.launch(allowed_paths=["."])
|