Tulitula commited on
Commit
db76cb7
·
verified ·
1 Parent(s): 8dac605

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +282 -0
app.py CHANGED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import ast
4
+ import json
5
+ import math
6
+ import time
7
+ import faiss
8
+ import numpy as np
9
+ import pandas as pd
10
+ import matplotlib.pyplot as plt
11
+ from typing import List, Tuple
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sentence_transformers import SentenceTransformer
14
+ import gradio as gr
15
+
16
+ # --------------------
17
+ # Finance parameters
18
+ # --------------------
19
+ TICKERS = ["VOO","QQQ","VNQ","IEF","HYG","GLD","EEM","XLP","XLK","XLE"]
20
+ BETAS = [1.00, 1.25, 0.60, 0.10, 0.40, 0.10, 1.10, 0.70, 1.20, 1.10]
21
+ SIGMAS = [0.16, 0.25, 0.18, 0.05, 0.10, 0.14, 0.22, 0.12, 0.20, 0.22]
22
+ DEFAULT_RF = 0.03
23
+ DEFAULT_MKT_PREM = 0.05
24
+ DEFAULT_CORR = 0.2
25
+
26
+ DATA_PATH = "data"
27
+ CSV_PATH = os.path.join(DATA_PATH, "portfolios.csv")
28
+
29
+ # --------------------
30
+ # Helpers
31
+ # --------------------
32
+ def ensure_data_dir():
33
+ os.makedirs(DATA_PATH, exist_ok=True)
34
+
35
+ def normalize_weights(w: np.ndarray) -> np.ndarray:
36
+ w = np.clip(np.array(w, dtype=float), 0.0, None)
37
+ s = w.sum()
38
+ if s <= 0:
39
+ return np.ones_like(w) / len(w)
40
+ return w / s
41
+
42
+ def portfolio_sigma(weights: np.ndarray, sigmas: List[float], corr: float = DEFAULT_CORR) -> float:
43
+ sig = np.array(sigmas, dtype=float)
44
+ w = np.array(weights, dtype=float)
45
+ cov = np.outer(sig, sig) * corr
46
+ np.fill_diagonal(cov, sig**2)
47
+ var = float(w @ cov @ w)
48
+ return math.sqrt(var)
49
+
50
+ def portfolio_beta(weights: np.ndarray, betas: List[float]) -> float:
51
+ return float(np.dot(weights, np.array(betas, dtype=float)))
52
+
53
+ def capm_expected_return(beta: float, rf: float, mkt_prem: float) -> float:
54
+ return float(rf + beta * mkt_prem)
55
+
56
+ def synth_profile(seed: int) -> str:
57
+ rng = np.random.default_rng(seed)
58
+ risk = rng.choice(["cautious", "balanced", "moderate", "growth", "aggressive"])
59
+ horizon = rng.choice(["three years", "five years", "seven years", "ten years", "fifteen years"])
60
+ goal = rng.choice([
61
+ "retirement savings",
62
+ "first home",
63
+ "education fund",
64
+ "wealth building",
65
+ "travel fund",
66
+ "emergency buffer"
67
+ ])
68
+ return f"{risk} investor, {horizon} horizon, goal is {goal}."
69
+
70
+ def make_one_row(pid: int, rf: float, mkt_prem: float, corr: float) -> dict:
71
+ w = np.random.dirichlet(np.ones(len(TICKERS)))
72
+ b = portfolio_beta(w, BETAS)
73
+ er = capm_expected_return(b, rf, mkt_prem)
74
+ s = portfolio_sigma(w, SIGMAS, corr=corr)
75
+ return {
76
+ "id": pid,
77
+ "profile_text": synth_profile(1000 + pid),
78
+ "tickers": ",".join(TICKERS),
79
+ "weights": ",".join(f"{x:.4f}" for x in w),
80
+ "beta_p": b,
81
+ "er_p": er,
82
+ "sigma_p": s
83
+ }
84
+
85
+ def generate_small_dataset(n: int = 300,
86
+ rf: float = DEFAULT_RF,
87
+ mkt_prem: float = DEFAULT_MKT_PREM,
88
+ corr: float = DEFAULT_CORR) -> pd.DataFrame:
89
+ rows = [make_one_row(i, rf, mkt_prem, corr) for i in range(n)]
90
+ return pd.DataFrame(rows)
91
+
92
+ def load_or_build_csv() -> pd.DataFrame:
93
+ ensure_data_dir()
94
+ if os.path.exists(CSV_PATH):
95
+ df = pd.read_csv(CSV_PATH)
96
+ # Backward compatibility if weights stored as list text
97
+ if isinstance(df.get("weights", pd.Series([None])).iloc[0], str) and "[" in str(df["weights"].iloc[0]):
98
+ df["weights"] = df["weights"].apply(lambda s: ",".join(f"{float(x):.4f}" for x in ast.literal_eval(s)))
99
+ return df
100
+ # Build a small dataset so the Space is usable without uploads
101
+ df = generate_small_dataset()
102
+ df.to_csv(CSV_PATH, index=False)
103
+ return df
104
+
105
+ # --------------------
106
+ # Embeddings and index
107
+ # --------------------
108
+ class Recommender:
109
+ def __init__(self, df: pd.DataFrame):
110
+ self.df = df.copy()
111
+ self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
112
+ self.scaler = StandardScaler()
113
+ self.index = None
114
+ self.feature_dim = None
115
+ self.vecs = None
116
+ self._build()
117
+
118
+ def _text_embed(self, texts: List[str]) -> np.ndarray:
119
+ return self.model.encode(texts, normalize_embeddings=True)
120
+
121
+ def _build(self):
122
+ texts = self.df["profile_text"].astype(str).tolist()
123
+ text_vecs = self._text_embed(texts)
124
+ nums = self.df[["er_p","sigma_p","beta_p"]].to_numpy(dtype=float)
125
+ nums = self.scaler.fit_transform(nums)
126
+ feats = np.hstack([text_vecs, nums])
127
+ faiss.normalize_L2(feats)
128
+ self.vecs = feats.astype("float32")
129
+ self.feature_dim = self.vecs.shape[1]
130
+ self.index = faiss.IndexFlatIP(self.feature_dim)
131
+ self.index.add(self.vecs)
132
+
133
+ def query(self, profile_text: str, er_p: float, sigma_p: float, beta_p: float, topk: int = 3):
134
+ text_vec = self._text_embed([profile_text])
135
+ nums = np.array([[er_p, sigma_p, beta_p]], dtype=float)
136
+ nums = self.scaler.transform(nums)
137
+ q = np.hstack([text_vec, nums]).astype("float32")
138
+ faiss.normalize_L2(q)
139
+ D, I = self.index.search(q, topk)
140
+ idxs = I[0].tolist()
141
+ scores = D[0].tolist()
142
+ out = self.df.iloc[idxs].copy()
143
+ out["score"] = scores
144
+ return out
145
+
146
+ # --------------------
147
+ # Plot CML
148
+ # --------------------
149
+ def plot_cml(rf: float, mkt_prem: float, market_sigma: float, port_sigma: float, port_er: float):
150
+ fig = plt.figure(figsize=(5, 4), dpi=120)
151
+ xs = np.linspace(0, max(market_sigma*1.4, port_sigma*1.2, 0.25), 50)
152
+ cml = rf + (mkt_prem / market_sigma) * xs
153
+ plt.plot(xs, cml, label="CML")
154
+ plt.scatter([0.0], [rf], label="Risk free")
155
+ plt.scatter([market_sigma], [rf + mkt_prem], label="Market")
156
+ plt.scatter([port_sigma], [port_er], label="Your portfolio")
157
+ plt.xlabel("Standard deviation")
158
+ plt.ylabel("Expected return")
159
+ plt.legend()
160
+ buf = io.BytesIO()
161
+ plt.tight_layout()
162
+ plt.savefig(buf, format="png")
163
+ plt.close(fig)
164
+ buf.seek(0)
165
+ return buf
166
+
167
+ # --------------------
168
+ # App state
169
+ # --------------------
170
+ DF = load_or_build_csv()
171
+ RECO = Recommender(DF)
172
+
173
+ # --------------------
174
+ # Gradio logic
175
+ # --------------------
176
+ def sum_to_one(*w_list):
177
+ w = np.array([float(x) for x in w_list], dtype=float)
178
+ w = normalize_weights(w)
179
+ return [float(f"{x:.4f}") for x in w]
180
+
181
+ def compute_and_recommend(goal_text: str,
182
+ rf: float,
183
+ mkt_prem: float,
184
+ mkt_sigma: float,
185
+ *weights) -> Tuple[str, pd.DataFrame, gr.Image, str]:
186
+ w = normalize_weights(np.array(weights, dtype=float))
187
+ b = portfolio_beta(w, BETAS)
188
+ er = capm_expected_return(b, rf, mkt_prem)
189
+ s = portfolio_sigma(w, SIGMAS, corr=DEFAULT_CORR)
190
+
191
+ # Query top 3
192
+ q_text = goal_text.strip() or "balanced investor, five years horizon, goal is retirement savings."
193
+ recs = RECO.query(q_text, er, s, b, topk=3).reset_index(drop=True)
194
+
195
+ # Prepare nice table
196
+ show = recs[["profile_text","er_p","sigma_p","beta_p","score"]].copy()
197
+ show.columns = ["profile", "er", "sigma", "beta", "similarity"]
198
+
199
+ # Plot
200
+ img_buf = plot_cml(rf, mkt_prem, mkt_sigma, s, er)
201
+
202
+ summary = (
203
+ f"Expected return {er:.2%}. "
204
+ f"Risk or sigma {s:.2%}. "
205
+ f"Beta {b:.2f}. "
206
+ f"Weights order {', '.join(TICKERS)}. "
207
+ f"Weights {', '.join(f'{x:.2%}' for x in w)}."
208
+ )
209
+
210
+ return summary, show, img_buf, q_text
211
+
212
+ def upload_csv(file):
213
+ global DF, RECO
214
+ if file is None:
215
+ return "No file received."
216
+ try:
217
+ df = pd.read_csv(file.name)
218
+ required = {"profile_text","weights","er_p","sigma_p","beta_p"}
219
+ if not required.issubset(set(df.columns)):
220
+ return f"CSV must have columns {sorted(required)}"
221
+ DF = df.copy()
222
+ RECO = Recommender(DF)
223
+ return f"Loaded {len(DF)} rows and rebuilt index."
224
+ except Exception as e:
225
+ return f"Failed to load CSV. {e}"
226
+
227
+ # --------------------
228
+ # UI
229
+ # --------------------
230
+ with gr.Blocks(title="Personal Portfolio Risk Return Analyzer") as demo:
231
+ gr.Markdown(
232
+ "## Personal Portfolio Risk Return Analyzer\n"
233
+ "Enter a goal sentence and set weights. The app computes expected return, risk, and beta, "
234
+ "then shows three similar portfolios from the dataset."
235
+ )
236
+
237
+ with gr.Row():
238
+ with gr.Column(scale=1):
239
+ goal = gr.Textbox(
240
+ label="Goal or profile sentence",
241
+ value="balanced investor, five years horizon, goal is retirement savings."
242
+ )
243
+ rf_in = gr.Number(label="Risk free rate", value=DEFAULT_RF, precision=4)
244
+ prem_in = gr.Number(label="Market premium", value=DEFAULT_MKT_PREM, precision=4)
245
+ mkt_sigma_in = gr.Number(label="Market sigma for CML plot", value=0.17, precision=4)
246
+
247
+ gr.Markdown("#### Weights, must sum to one")
248
+ sliders = []
249
+ for t in TICKERS:
250
+ sliders.append(gr.Slider(minimum=0.0, maximum=1.0, step=0.001, value=0.1, label=t))
251
+ sum_btn = gr.Button("Normalize weights to one")
252
+
253
+ upload = gr.File(label="Upload portfolios.csv to replace dataset", file_count="single")
254
+ status = gr.Markdown()
255
+
256
+ with gr.Column(scale=1):
257
+ summary = gr.Textbox(label="Your portfolio summary", interactive=False)
258
+ table = gr.Dataframe(headers=["profile","er","sigma","beta","similarity"], row_count=3)
259
+ plot = gr.Image(label="Capital Market Line", type="pil")
260
+ used_text = gr.Textbox(label="Query text used for retrieval", interactive=False)
261
+
262
+ sum_btn.click(fn=sum_to_one, inputs=sliders, outputs=sliders)
263
+ upload.upload(fn=upload_csv, inputs=upload, outputs=status)
264
+
265
+ compute_btn = gr.Button("Compute and recommend")
266
+ compute_btn.click(
267
+ fn=compute_and_recommend,
268
+ inputs=[goal, rf_in, prem_in, mkt_sigma_in] + sliders,
269
+ outputs=[summary, table, plot, used_text]
270
+ )
271
+
272
+ gr.Examples(
273
+ examples=[
274
+ ["cautious investor, ten years horizon, goal is education fund.", 0.03, 0.05, 0.17] + [0.1]*10,
275
+ ["aggressive investor, seven years horizon, goal is wealth building.", 0.03, 0.05, 0.17] + [0.05,0.15,0.05,0.05,0.05,0.05,0.2,0.1,0.2,0.1],
276
+ ["balanced investor, five years horizon, goal is first home.", 0.03, 0.05, 0.17] + [0.12,0.12,0.10,0.06,0.08,0.06,0.12,0.06,0.16,0.12],
277
+ ],
278
+ inputs=[goal, rf_in, prem_in, mkt_sigma_in] + sliders
279
+ )
280
+
281
+ if __name__ == "__main__":
282
+ demo.launch()