Tulitula commited on
Commit
7785336
·
verified ·
1 Parent(s): 11b6164

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +395 -268
app.py CHANGED
@@ -1,5 +1,7 @@
1
- # app.py
2
- import os, io, math, warnings
 
 
3
  warnings.filterwarnings("ignore")
4
 
5
  from typing import List, Tuple, Dict, Optional
@@ -12,13 +14,16 @@ from PIL import Image
12
  import requests
13
  import yfinance as yf
14
 
 
 
 
15
  # ---------------- config ----------------
16
  DATA_DIR = "data"
17
  DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
18
 
19
  MAX_TICKERS = 30
20
  DEFAULT_LOOKBACK_YEARS = 5
21
- MARKET_TICKER = "VOO"
22
 
23
  POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
24
  SUG_COLS = ["ticker", "suggested_weight_pct"]
@@ -35,16 +40,44 @@ FRED_MAP = [
35
  (100, "DGS30"),
36
  ]
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # ---------------- helpers ----------------
39
  def ensure_data_dir():
40
  os.makedirs(DATA_DIR, exist_ok=True)
41
 
 
42
  def empty_positions_df():
43
  return pd.DataFrame(columns=POS_COLS)
44
 
 
45
  def empty_suggest_df():
46
  return pd.DataFrame(columns=SUG_COLS)
47
 
 
 
 
 
 
48
  def fred_series_for_horizon(years: float) -> str:
49
  y = max(1.0, min(100.0, float(years)))
50
  for cutoff, code in FRED_MAP:
@@ -52,6 +85,7 @@ def fred_series_for_horizon(years: float) -> str:
52
  return code
53
  return "DGS30"
54
 
 
55
  def fetch_fred_yield_annual(code: str) -> float:
56
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
57
  try:
@@ -63,31 +97,8 @@ def fetch_fred_yield_annual(code: str) -> float:
63
  except Exception:
64
  return 0.03
65
 
66
- def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
67
- start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
68
- end = pd.Timestamp.today(tz="UTC")
69
- syms = [str(t).upper().strip() for t in dict.fromkeys(tickers)]
70
- df = yf.download(
71
- syms, start=start.date(), end=end.date(),
72
- interval="1mo", auto_adjust=True, progress=False
73
- )["Close"]
74
- if isinstance(df, pd.Series):
75
- df = df.to_frame()
76
- df = df.dropna(how="all").fillna(method="ffill")
77
- # columns become single Index if single ticker
78
- if isinstance(df.columns, pd.MultiIndex):
79
- df.columns = [c[1] if isinstance(c, tuple) else c for c in df.columns]
80
- return df
81
-
82
- def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
83
- return prices.pct_change().dropna()
84
-
85
- def annualize_mean(m):
86
- return np.asarray(m, dtype=float) * 12.0
87
-
88
- def annualize_sigma(s):
89
- return np.asarray(s, dtype=float) * math.sqrt(12.0)
90
 
 
91
  def yahoo_search(query: str):
92
  if not query or len(query.strip()) == 0:
93
  return []
@@ -111,30 +122,128 @@ def yahoo_search(query: str):
111
  except Exception:
112
  return [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n a"}]
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
115
- ok, df = [], fetch_prices_monthly(list(set(symbols)), years)
116
- for s in symbols:
117
- if s in df.columns:
118
- ok.append(s)
119
  return ok
120
 
 
121
  # -------------- aligned moments --------------
122
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
123
- uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
124
- tickers = uniq + [MARKET_TICKER]
 
 
 
 
 
 
125
  px = fetch_prices_monthly(tickers, years)
 
 
 
 
 
 
 
 
 
 
126
  rets = monthly_returns(px)
127
- cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
128
- R = rets[cols].dropna(how="any")
 
 
 
 
129
  return R.loc[:, ~R.columns.duplicated()]
130
 
 
131
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
132
  R = get_aligned_monthly_returns(symbols, years)
133
- if MARKET_TICKER not in R.columns or R.shape[0] < 3:
134
- raise ValueError("Could not align data with market or not enough rows.")
135
- rf_m = rf_ann / 12.0
136
 
137
- m = R[MARKET_TICKER]
 
138
  if isinstance(m, pd.DataFrame):
139
  m = m.iloc[:, 0].squeeze()
140
 
@@ -144,23 +253,25 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
144
 
145
  ex_m = m - rf_m
146
  var_m = float(np.var(ex_m.values, ddof=1))
147
- var_m = max(var_m, 1e-6)
148
 
149
  betas: Dict[str, float] = {}
150
- for s in [c for c in R.columns if c != MARKET_TICKER]:
151
  ex_s = R[s] - rf_m
152
  betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
153
- betas[MARKET_TICKER] = 1.0
154
 
155
- asset_cols = [c for c in R.columns if c != MARKET_TICKER]
156
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
157
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
158
 
159
- return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
 
160
 
161
  def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
162
  return float(rf_ann + beta * erp_ann)
163
 
 
164
  def portfolio_stats(weights: Dict[str, float],
165
  cov_ann: pd.DataFrame,
166
  betas: Dict[str, float],
@@ -180,20 +291,23 @@ def portfolio_stats(weights: Dict[str, float],
180
  sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
181
  return beta_p, er_p, sigma_p
182
 
183
- # -------------- CML helpers + plot (percent axes) --------------
 
184
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
185
  if sigma_mkt <= 1e-12:
186
  return 0.0, 1.0, rf_ann
187
  a = sigma_target / sigma_mkt
188
  return a, 1.0 - a, rf_ann + a * erp_ann
189
 
 
190
  def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
191
  if abs(erp_ann) <= 1e-12:
192
  return 0.0, 1.0, rf_ann
193
  a = (mu_target - rf_ann) / erp_ann
194
  return a, 1.0 - a, abs(a) * sigma_mkt
195
 
196
- def plot_cml_percent(
 
197
  rf_ann, erp_ann, sigma_mkt,
198
  pt_sigma, pt_mu,
199
  same_sigma_sigma, same_sigma_mu,
@@ -206,34 +320,29 @@ def plot_cml_percent(
206
  0.3,
207
  sigma_mkt * 2.0,
208
  pt_sigma * 1.4,
209
- same_mu_sigma * 1.4,
210
- same_sigma_sigma * 1.4,
211
  (targ_sigma or 0.0) * 1.4,
212
  )
213
  xs = np.linspace(0, xmax, 160)
214
  slope = erp_ann / max(sigma_mkt, 1e-12)
215
  cml = rf_ann + slope * xs
 
216
 
217
- def pct(x): return 100.0 * np.asarray(x)
 
 
 
218
 
219
- plt.plot(pct(xs), pct(cml), label="CML through VOO")
220
-
221
- plt.scatter([0.0], [pct(rf_ann)], label="Risk free")
222
- plt.scatter([pct(sigma_mkt)], [pct(rf_ann + erp_ann)], label="Market VOO")
223
- plt.scatter([pct(pt_sigma)], [pct(pt_mu)], label="Your portfolio")
224
- plt.scatter([pct(same_sigma_sigma)], [pct(same_sigma_mu)], label="Efficient same sigma")
225
- plt.scatter([pct(same_mu_sigma)], [pct(same_mu_mu)], label="Efficient same return")
226
  if targ_sigma is not None and targ_mu is not None:
227
- plt.scatter([pct(targ_sigma)], [pct(targ_mu)], label="Target suggestion")
228
-
229
- # Guides (keep simple)
230
- plt.plot([pct(pt_sigma), pct(same_sigma_sigma)], [pct(pt_mu), pct(same_sigma_mu)],
231
- linestyle="--", linewidth=1.0, alpha=0.7, color="gray")
232
- plt.plot([pct(pt_sigma), pct(same_mu_sigma)], [pct(pt_mu), pct(same_mu_mu)],
233
- linestyle="--", linewidth=1.0, alpha=0.7, color="gray")
234
 
235
- plt.xlabel("Standard deviation (%)")
236
- plt.ylabel("Expected return (%)")
237
  plt.legend(loc="best")
238
  plt.tight_layout()
239
 
@@ -243,128 +352,193 @@ def plot_cml_percent(
243
  buf.seek(0)
244
  return Image.open(buf)
245
 
246
- # -------------- dataset over *current* tickers --------------
247
- def dirichlet_mixture(n: int, k: int, allow_shorts: bool, rng: np.random.Generator) -> np.ndarray:
248
- """Return n weight vectors (exposures) across k assets; sum |w| = 1."""
249
- out = []
250
- n1 = int(n * 0.6) # diversified
251
- n2 = n - n1 # concentrated
252
- for _ in range(n1):
253
- w = rng.dirichlet(np.ones(k))
254
- if allow_shorts:
255
- signs = rng.choice([-1.0, 1.0], size=k, p=[0.25, 0.75])
256
- w = w * signs
257
- out.append(w)
258
- for _ in range(n2):
259
- hot = rng.integers(0, k)
260
- alpha = np.ones(k) * 0.3
261
- alpha[hot] = 3.0
262
- w = rng.dirichlet(alpha)
263
- if allow_shorts:
264
- signs = rng.choice([-1.0, 1.0], size=k, p=[0.35, 0.65])
265
- w = w * signs
266
- out.append(w)
267
- W = np.vstack(out)
268
- # normalize to exposure space (sum |w| = 1)
269
- denom = np.sum(np.abs(W), axis=1, keepdims=True)
270
- denom[denom == 0] = 1.0
271
- return W / denom
272
-
273
- def build_fixed_universe_dataset(
274
- symbols: List[str], years: int, rf_ann: float, erp_ann: float,
275
- covA: pd.DataFrame, betas: Dict[str, float],
276
- allow_shorts: bool, n_rows: int = 1000
277
- ) -> pd.DataFrame:
278
- rng = np.random.default_rng(12345)
279
- k = len(symbols)
280
- W = dirichlet_mixture(n_rows, k, allow_shorts, rng)
281
 
282
- rows = []
283
- for i in range(W.shape[0]):
284
- w = W[i]
285
- wmap = {symbols[j]: float(w[j]) for j in range(k)}
286
- beta_p, er_p, sigma_p = portfolio_stats(wmap, covA, betas, rf_ann, erp_ann)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  rows.append({
288
  "id": i,
289
- "tickers": ",".join(symbols),
290
  "weights": ",".join(f"{x:.6f}" for x in w),
291
- "beta_p": beta_p,
292
  "er_p": er_p,
293
- "sigma_p": sigma_p
 
294
  })
 
295
  return pd.DataFrame(rows)
296
 
297
- def save_dataset_csv(df: pd.DataFrame, path: str = DATASET_PATH):
 
298
  os.makedirs(os.path.dirname(path), exist_ok=True)
299
  df.to_csv(path, index=False)
300
 
301
- def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
302
- try:
303
- ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
304
- ws = [float(x) for x in str(row["weights"]).split(",")]
305
- wmap = {t: ws[i] for i, t in enumerate(ts) if i < len(ws)}
306
- x = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
307
- g = float(np.sum(np.abs(x)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  if g <= 1e-12:
309
- return None
310
- return x / g
311
- except Exception:
312
- return None
 
 
 
 
 
 
 
313
 
314
  def pick_low_med_high(csv_path: str, universe: List[str]):
315
  df = pd.read_csv(csv_path)
316
  rows = []
317
  for _, r in df.iterrows():
318
- x = _row_to_exposures(r, universe)
319
- if x is None:
 
 
 
 
320
  continue
 
321
  rows.append((x, float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"])))
322
  if not rows:
323
  return None
324
  rows_sorted = sorted(rows, key=lambda t: t[2]) # by sigma
325
- lo = rows_sorted[0]
326
- hi = rows_sorted[-1]
327
- med = rows_sorted[len(rows_sorted)//2]
328
- return {"low": lo, "medium": med, "high": hi}
329
 
330
- # -------------- summary builder --------------
331
- def fmt_pct(x: float) -> str:
332
- return f"{x*100:.2f}%"
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
335
  beta_p, er_p, sigma_p,
336
  a_sigma, b_sigma, mu_eff_sigma,
337
- a_mu, b_mu, sigma_eff_mu,
338
- ds_info: str) -> str:
339
  lines = []
340
  lines.append("### Inputs")
341
- lines.append(f"- Lookback years: {lookback}")
342
- lines.append(f"- Horizon years: {int(round(horizon))}")
343
- lines.append(f"- Risk free: {fmt_pct(rf)} from {rf_code}")
344
- lines.append(f"- Market ERP: {fmt_pct(erp)}")
345
- lines.append(f"- Market sigma: {fmt_pct(sigma_mkt)}")
346
  lines.append("")
347
- lines.append("### Your portfolio")
348
- lines.append(f"- Beta: {beta_p:.2f}")
349
- lines.append(f"- Sigma: {fmt_pct(sigma_p)}")
350
- lines.append(f"- Expected return: {fmt_pct(er_p)}")
351
  lines.append("")
352
  lines.append("### Efficient alternatives on CML")
353
- lines.append(f"- Same sigma Market {a_sigma:.2f} , Bills {b_sigma:.2f} , ER {fmt_pct(mu_eff_sigma)}")
354
- lines.append(f"- Same return ⇒ Market {a_mu:.2f} , Bills {b_mu:.2f} , Sigma {fmt_pct(sigma_eff_mu)}")
 
355
  lines.append("")
356
- lines.append("### Dataset for risk suggestions")
357
- lines.append(ds_info)
 
358
  return "\n".join(lines)
359
 
360
- # -------------- globals to carry session state --------------
361
- LAST_MOMS = None
362
- LAST_BASE = None
363
- LAST_UNIVERSE = []
364
- LAST_DATASET_PATH = None
365
- HORIZON_YEARS = 5.0
366
- RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
367
- RF_ANN = fetch_fred_yield_annual(RF_CODE)
368
 
369
  # -------------- gradio callbacks --------------
370
  def search_tickers_cb(q: str):
@@ -374,6 +548,7 @@ def search_tickers_cb(q: str):
374
  opts = [f"{h['symbol']} | {h['name']} | {h['exchange']}" for h in hits]
375
  return "Select a symbol and click Add", opts
376
 
 
377
  def add_symbol(selection: str, table: pd.DataFrame):
378
  if not selection:
379
  return table, "Pick a row from Matches first"
@@ -395,6 +570,7 @@ def add_symbol(selection: str, table: pd.DataFrame):
395
  msg = f"Reached max of {MAX_TICKERS}"
396
  return new_table, msg
397
 
 
398
  def lock_ticker_column(tb: pd.DataFrame):
399
  if tb is None or len(tb) == 0:
400
  return pd.DataFrame(columns=["ticker", "amount_usd"])
@@ -405,6 +581,7 @@ def lock_ticker_column(tb: pd.DataFrame):
405
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
406
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
407
 
 
408
  def set_horizon(years: float):
409
  y = max(1.0, min(100.0, float(years)))
410
  code = fred_series_for_horizon(y)
@@ -413,67 +590,63 @@ def set_horizon(years: float):
413
  HORIZON_YEARS = y
414
  RF_CODE = code
415
  RF_ANN = rf
416
- return f"Risk free {fmt_pct(rf)} from {code}. Will be used on Compute."
 
417
 
418
  def compute(years_lookback: int, table: pd.DataFrame):
 
 
 
419
  df = table.dropna()
420
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
421
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
422
 
423
  symbols = [t for t in df["ticker"].tolist() if t]
424
  if len(symbols) == 0:
425
- return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), "", None
426
 
427
  symbols = validate_tickers(symbols, years_lookback)
428
  if len(symbols) == 0:
429
- return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), empty_suggest_df(), "", None
 
 
 
430
 
431
  df = df[df["ticker"].isin(symbols)].copy()
432
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
433
- allow_shorts = any(v < 0 for v in amounts.values())
434
  rf_ann = RF_ANN
435
 
436
- # moments
437
- moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
438
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
439
 
440
  gross = sum(abs(v) for v in amounts.values())
441
  if gross == 0:
442
- return None, "All amounts are zero", "Universe ok", empty_positions_df(), empty_suggest_df(), "", None
443
  weights = {k: v / gross for k, v in amounts.items()}
 
444
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
445
 
446
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
447
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
448
 
449
- # dataset strictly over *these* symbols
450
- ensure_data_dir()
451
- ds = build_fixed_universe_dataset(
452
- symbols=symbols, years=years_lookback, rf_ann=rf_ann, erp_ann=erp_ann,
453
- covA=covA.loc[symbols, symbols], betas=betas, allow_shorts=allow_shorts, n_rows=1000
454
- )
455
- save_dataset_csv(ds, DATASET_PATH)
456
- ds_info = f"- Built {len(ds)} simulated mixes over current tickers ({'shorts allowed' if allow_shorts else 'long-only'})."
457
-
458
- # plot + summary
459
- img = plot_cml_percent(
460
  rf_ann, erp_ann, sigma_mkt,
461
  sigma_p, er_p,
462
  sigma_p, mu_eff_sigma,
463
  sigma_eff_mu, er_p,
464
  targ_sigma=None, targ_mu=None
465
  )
 
466
  info = build_summary_md(
467
  years_lookback, HORIZON_YEARS, rf_ann, RF_CODE, erp_ann, sigma_mkt,
468
  beta_p, er_p, sigma_p,
469
  a_sigma, b_sigma, mu_eff_sigma,
470
- a_mu, b_mu, sigma_eff_mu,
471
- ds_info=ds_info
472
  )
473
 
474
  rows = []
475
- for t in symbols:
476
- beta_val = 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
477
  rows.append({
478
  "ticker": t,
479
  "amount_usd": amounts.get(t, 0.0),
@@ -481,78 +654,38 @@ def compute(years_lookback: int, table: pd.DataFrame):
481
  "beta": beta_val,
482
  })
483
  pos_table = pd.DataFrame(rows, columns=POS_COLS)
484
- pos_table["weight_exposure"] = pos_table["weight_exposure"].astype(float)
485
-
486
- uni_msg = f"Universe set to {', '.join(symbols)}"
487
- # store globals for Suggest buttons
488
- global LAST_MOMS, LAST_BASE, LAST_UNIVERSE, LAST_DATASET_PATH
489
- LAST_MOMS = {"betas": betas, "covA": covA, "erp_ann": erp_ann, "sigma_mkt": sigma_mkt}
490
- LAST_BASE = {"rf_ann": rf_ann, "er_p": er_p, "sigma_p": sigma_p}
491
- LAST_UNIVERSE = list(symbols)
492
- LAST_DATASET_PATH = DATASET_PATH
493
 
494
- return img, info, uni_msg, pos_table, empty_suggest_df(), ds_info, DATASET_PATH
 
 
495
 
496
- def _overlay_plot_with_suggestion(sigma_s, er_s):
497
- if not LAST_MOMS or not LAST_BASE:
498
- return None
499
- rf_ann = LAST_BASE["rf_ann"]
500
- erp_ann = LAST_MOMS["erp_ann"]
501
- sigma_mkt = LAST_MOMS["sigma_mkt"]
502
- sigma_p = LAST_BASE["sigma_p"]
503
- er_p = LAST_BASE["er_p"]
504
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
505
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
506
- return plot_cml_percent(
507
- rf_ann, erp_ann, sigma_mkt,
508
- sigma_p, er_p,
509
- sigma_p, mu_eff_sigma,
510
- sigma_eff_mu, er_p,
511
- targ_sigma=sigma_s, targ_mu=er_s
512
- )
513
 
514
- def suggest_level(level: str):
515
- if not LAST_DATASET_PATH or not os.path.exists(LAST_DATASET_PATH) or not LAST_UNIVERSE:
516
- return empty_suggest_df(), "Run Compute first.", None
517
- picks = pick_low_med_high(LAST_DATASET_PATH, LAST_UNIVERSE)
518
- if picks is None or level not in picks:
519
- return empty_suggest_df(), "No suggestion available.", None
520
- x, er_p, sig_p, beta_p = picks[level]
521
- # build table in percent
522
- rows = [{"ticker": LAST_UNIVERSE[i], "suggested_weight_pct": float(x[i]) * 100.0} for i in range(len(LAST_UNIVERSE))]
523
- df = pd.DataFrame(rows, columns=SUG_COLS)
524
- msg = f"{level.capitalize()} risk → ER {fmt_pct(er_p)}, Sigma {fmt_pct(sig_p)}, Beta {beta_p:.2f}"
525
- img = _overlay_plot_with_suggestion(sig_p, er_p)
526
- return df, msg, img
527
 
528
- def apply_suggestion_to_amounts(level: str, table: pd.DataFrame):
529
- if table is None or len(table) == 0:
530
- return table
531
- df_sug, _, _ = suggest_level(level)
532
- if df_sug is None or len(df_sug) == 0:
533
- return table
534
- # compute gross dollars (use total |amounts|; if zero, default to 10,000)
535
- t = table.copy()
536
- t["ticker"] = t["ticker"].astype(str).str.upper().str.strip()
537
- t["amount_usd"] = pd.to_numeric(t["amount_usd"], errors="coerce").fillna(0.0)
538
- gross = float(np.sum(np.abs(t["amount_usd"].values)))
539
- if gross <= 1e-9:
540
- gross = 10000.0
541
- w = {r["ticker"]: float(r["suggested_weight_pct"]) / 100.0 for _, r in df_sug.iterrows()}
542
- # map to amounts using current ticker order; missing → 0
543
- new_amounts = [gross * w.get(sym.upper(), 0.0) for sym in t["ticker"].tolist()]
544
- t["amount_usd"] = new_amounts
545
- return t
546
 
547
  # -------------- UI --------------
548
  ensure_data_dir()
 
 
 
 
549
 
550
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
551
  gr.Markdown(
552
  "## Efficient Portfolio Advisor\n"
553
  "Search symbols, enter dollar amounts, set your horizon. "
554
- "Prices: Yahoo Finance. Risk free: FRED. "
555
- "Suggestions (Low/Medium/High) come **only** from the 1,000-portfolio dataset built over your tickers."
556
  )
557
 
558
  with gr.Row():
@@ -560,11 +693,10 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
560
  q = gr.Textbox(label="Search symbol")
561
  search_note = gr.Markdown()
562
  matches = gr.Dropdown(choices=[], label="Matches")
563
- with gr.Row():
564
- search_btn = gr.Button("Search")
565
- add_btn = gr.Button("Add selected to portfolio")
566
 
567
- gr.Markdown("### Portfolio positions (type dollar amounts, negatives allowed for shorts)")
568
  table = gr.Dataframe(
569
  headers=["ticker", "amount_usd"],
570
  datatype=["str", "number"],
@@ -573,26 +705,20 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
573
  )
574
 
575
  horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
576
- lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta & sigma")
577
 
578
- with gr.Row():
579
- run_btn = gr.Button("Compute (build dataset)", variant="primary")
580
 
581
- gr.Markdown("### Risk tolerance suggestions (dataset-based only)")
582
  with gr.Row():
583
- btn_low = gr.Button("Low risk")
584
- btn_med = gr.Button("Medium risk")
585
- btn_high = gr.Button("High risk")
586
- with gr.Row():
587
- apply_low = gr.Button("Apply Low → $")
588
- apply_med = gr.Button("Apply Medium → $")
589
- apply_high = gr.Button("Apply High → $")
590
 
591
  with gr.Column(scale=1):
592
- plot = gr.Image(label="Capital Market Line", type="pil")
593
  summary = gr.Markdown(label="Summary")
594
- universe_msg = gr.Textbox(label="Universe status", interactive=False)
595
- dataset_info = gr.Markdown(label="Dataset info", value="")
596
  positions = gr.Dataframe(
597
  label="Computed positions",
598
  headers=POS_COLS,
@@ -602,15 +728,15 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
602
  interactive=False
603
  )
604
  suggestions = gr.Dataframe(
605
- label="Suggested weights (percent of exposure)",
606
  headers=SUG_COLS,
607
  datatype=["str", "number"],
608
  col_count=(len(SUG_COLS), "fixed"),
609
  value=empty_suggest_df(),
610
  interactive=False
611
  )
612
- sugg_msg = gr.Markdown("")
613
- dl = gr.File(label="Session dataset CSV", value=None, visible=True)
614
 
615
  # wiring
616
  def do_search(query):
@@ -625,23 +751,24 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
625
  run_btn.click(
626
  fn=compute,
627
  inputs=[lookback, table],
628
- outputs=[plot, summary, universe_msg, positions, suggestions, dataset_info, dl]
629
  )
630
 
631
- # suggest buttons
632
- def wrap_suggest(level):
633
- df, msg, img = suggest_level(level)
634
- img_out = img if img is not None else gr.update()
635
- return df, msg, img_out
 
 
636
 
637
- btn_low.click(lambda: wrap_suggest("low"), outputs=[suggestions, sugg_msg, plot])
638
- btn_med.click(lambda: wrap_suggest("medium"), outputs=[suggestions, sugg_msg, plot])
639
- btn_high.click(lambda: wrap_suggest("high"), outputs=[suggestions, sugg_msg, plot])
640
 
641
- # apply buttons (only updates the table; user can hit Compute again)
642
- apply_low.click(lambda tb: apply_suggestion_to_amounts("low", tb), inputs=table, outputs=table)
643
- apply_med.click(lambda tb: apply_suggestion_to_amounts("medium", tb), inputs=table, outputs=table)
644
- apply_high.click(lambda tb: apply_suggestion_to_amounts("high", tb), inputs=table, outputs=table)
645
 
646
  if __name__ == "__main__":
647
  demo.launch()
 
1
+ import os
2
+ import io
3
+ import math
4
+ import warnings
5
  warnings.filterwarnings("ignore")
6
 
7
  from typing import List, Tuple, Dict, Optional
 
14
  import requests
15
  import yfinance as yf
16
 
17
+ # Embeddings
18
+ from sentence_transformers import SentenceTransformer, util
19
+
20
  # ---------------- config ----------------
21
  DATA_DIR = "data"
22
  DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
23
 
24
  MAX_TICKERS = 30
25
  DEFAULT_LOOKBACK_YEARS = 5
26
+ MARKET_TICKER = "VOO" # will auto-fallback to SPY if VOO missing
27
 
28
  POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
29
  SUG_COLS = ["ticker", "suggested_weight_pct"]
 
40
  (100, "DGS30"),
41
  ]
42
 
43
+ # Embedding model cfg
44
+ EMB_MODEL_NAME = "FinLang/finance-embeddings-investopedia"
45
+
46
+ # ---------------- globals (runtime) ----------------
47
+ HORIZON_YEARS = 5.0
48
+ RF_CODE = "DGS5"
49
+ RF_ANN = 0.03
50
+
51
+ UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "XLK", "XLP", "XLE", "VNQ", "IEF", "HYG", "GLD", "EEM"]
52
+
53
+ LAST_DATASET_PATH: Optional[str] = None
54
+ LAST_UNIVERSE: Optional[List[str]] = None
55
+ LAST_PLOT_STATE: Optional[Dict[str, float]] = None
56
+
57
+ # embedding caches
58
+ _EMB_MODEL = None
59
+ _DS_TEXTS = None
60
+ _DS_EMBS = None
61
+ _DS_CACHE_KEY = None # (csv_path, tuple(universe))
62
+
63
+
64
  # ---------------- helpers ----------------
65
  def ensure_data_dir():
66
  os.makedirs(DATA_DIR, exist_ok=True)
67
 
68
+
69
  def empty_positions_df():
70
  return pd.DataFrame(columns=POS_COLS)
71
 
72
+
73
  def empty_suggest_df():
74
  return pd.DataFrame(columns=SUG_COLS)
75
 
76
+
77
+ def fmt_pct(x: float) -> str:
78
+ return f"{x*100:.2f}%"
79
+
80
+
81
  def fred_series_for_horizon(years: float) -> str:
82
  y = max(1.0, min(100.0, float(years)))
83
  for cutoff, code in FRED_MAP:
 
85
  return code
86
  return "DGS30"
87
 
88
+
89
  def fetch_fred_yield_annual(code: str) -> float:
90
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
91
  try:
 
97
  except Exception:
98
  return 0.03
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ # -------- Yahoo symbol search ----------
102
  def yahoo_search(query: str):
103
  if not query or len(query.strip()) == 0:
104
  return []
 
122
  except Exception:
123
  return [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n a"}]
124
 
125
+
126
+ # --------- prices / returns ----------
127
+ def _extract_close(df: pd.DataFrame, tickers: List[str]) -> pd.DataFrame:
128
+ """
129
+ Robustly extract a (date x ticker) Close DataFrame regardless of yf's column layout.
130
+ """
131
+ if isinstance(df.columns, pd.MultiIndex):
132
+ lv0 = df.columns.get_level_values(0)
133
+ lv1 = df.columns.get_level_values(1)
134
+ if "Close" in lv0:
135
+ close = df["Close"]
136
+ elif "Adj Close" in lv0:
137
+ close = df["Adj Close"]
138
+ elif "Close" in lv1:
139
+ close = df.xs("Close", level=1, axis=1)
140
+ elif "Adj Close" in lv1:
141
+ close = df.xs("Adj Close", level=1, axis=1)
142
+ else:
143
+ # fallback: if first level are tickers
144
+ # try to select 'Close' under each
145
+ try:
146
+ close = df.xs("Close", level=1, axis=1)
147
+ except Exception:
148
+ close = df.copy()
149
+ else:
150
+ # Single ticker case
151
+ if "Close" in df.columns:
152
+ s = df["Close"].copy()
153
+ elif "Adj Close" in df.columns:
154
+ s = df["Adj Close"].copy()
155
+ else:
156
+ # last resort: take any one numeric column
157
+ s = df.select_dtypes(include=[np.number]).iloc[:, 0]
158
+ # ensure column named as ticker
159
+ name = tickers[0] if len(tickers) else "T0"
160
+ close = s.to_frame(name=name)
161
+
162
+ # Reindex columns to requested order where possible
163
+ # If some symbols missing, they simply won't be present
164
+ close = close.dropna(how="all").ffill()
165
+ # Keep only requested tickers, in order
166
+ cols = [c for c in tickers if c in close.columns]
167
+ if not cols: # if nothing matched, keep whatever is there
168
+ close = close.copy()
169
+ else:
170
+ close = close[cols]
171
+ return close
172
+
173
+
174
+ def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
175
+ start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
176
+ end = pd.Timestamp.today(tz="UTC")
177
+ dl = yf.download(
178
+ list(dict.fromkeys(tickers)),
179
+ start=start.date(),
180
+ end=end.date(),
181
+ interval="1mo",
182
+ auto_adjust=True,
183
+ progress=False
184
+ )
185
+ close = _extract_close(dl, tickers)
186
+ return close
187
+
188
+
189
+ def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
190
+ return prices.pct_change().dropna(how="all")
191
+
192
+
193
+ def annualize_mean(m):
194
+ return np.asarray(m, dtype=float) * 12.0
195
+
196
+
197
+ def annualize_sigma(s):
198
+ return np.asarray(s, dtype=float) * math.sqrt(12.0)
199
+
200
+
201
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
202
+ uniq = list(dict.fromkeys(symbols))
203
+ df = fetch_prices_monthly(uniq, years)
204
+ ok = [s for s in uniq if s in df.columns]
 
205
  return ok
206
 
207
+
208
  # -------------- aligned moments --------------
209
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
210
+ uniq = [c for c in dict.fromkeys(symbols) if c]
211
+ tickers = uniq.copy()
212
+
213
+ # Ensure market present (try MARKET_TICKER then fallback to SPY)
214
+ market_ok = MARKET_TICKER in tickers
215
+ if not market_ok:
216
+ tickers.append(MARKET_TICKER)
217
+
218
  px = fetch_prices_monthly(tickers, years)
219
+ if MARKET_TICKER not in px.columns:
220
+ # fallback to SPY if VOO missing
221
+ if "SPY" not in tickers:
222
+ tickers.append("SPY")
223
+ px2 = fetch_prices_monthly(tickers, years)
224
+ if "SPY" in px2.columns:
225
+ px = px2
226
+ else:
227
+ pass # keep px as-is
228
+
229
  rets = monthly_returns(px)
230
+ keep = [c for c in uniq if c in rets.columns]
231
+ if MARKET_TICKER in rets.columns:
232
+ keep += [MARKET_TICKER]
233
+ elif "SPY" in rets.columns:
234
+ keep += ["SPY"]
235
+ R = rets[keep].dropna(how="any")
236
  return R.loc[:, ~R.columns.duplicated()]
237
 
238
+
239
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
240
  R = get_aligned_monthly_returns(symbols, years)
241
+ mkt_col = MARKET_TICKER if MARKET_TICKER in R.columns else ("SPY" if "SPY" in R.columns else None)
242
+ if mkt_col is None or R.shape[0] < 3:
243
+ raise ValueError("Not enough aligned data including market")
244
 
245
+ rf_m = rf_ann / 12.0
246
+ m = R[mkt_col]
247
  if isinstance(m, pd.DataFrame):
248
  m = m.iloc[:, 0].squeeze()
249
 
 
253
 
254
  ex_m = m - rf_m
255
  var_m = float(np.var(ex_m.values, ddof=1))
256
+ var_m = max(var_m, 1e-8)
257
 
258
  betas: Dict[str, float] = {}
259
+ for s in [c for c in R.columns if c != mkt_col]:
260
  ex_s = R[s] - rf_m
261
  betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
262
+ betas[mkt_col] = 1.0 # definition
263
 
264
+ asset_cols = [c for c in R.columns if c != mkt_col]
265
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
266
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
267
 
268
+ return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann, "mkt_col": mkt_col}
269
+
270
 
271
  def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
272
  return float(rf_ann + beta * erp_ann)
273
 
274
+
275
  def portfolio_stats(weights: Dict[str, float],
276
  cov_ann: pd.DataFrame,
277
  betas: Dict[str, float],
 
291
  sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
292
  return beta_p, er_p, sigma_p
293
 
294
+
295
+ # -------------- CML helpers --------------
296
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
297
  if sigma_mkt <= 1e-12:
298
  return 0.0, 1.0, rf_ann
299
  a = sigma_target / sigma_mkt
300
  return a, 1.0 - a, rf_ann + a * erp_ann
301
 
302
+
303
  def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
304
  if abs(erp_ann) <= 1e-12:
305
  return 0.0, 1.0, rf_ann
306
  a = (mu_target - rf_ann) / erp_ann
307
  return a, 1.0 - a, abs(a) * sigma_mkt
308
 
309
+
310
+ def plot_cml(
311
  rf_ann, erp_ann, sigma_mkt,
312
  pt_sigma, pt_mu,
313
  same_sigma_sigma, same_sigma_mu,
 
320
  0.3,
321
  sigma_mkt * 2.0,
322
  pt_sigma * 1.4,
323
+ (same_mu_sigma or 0.0) * 1.4,
324
+ (same_sigma_sigma or 0.0) * 1.4,
325
  (targ_sigma or 0.0) * 1.4,
326
  )
327
  xs = np.linspace(0, xmax, 160)
328
  slope = erp_ann / max(sigma_mkt, 1e-12)
329
  cml = rf_ann + slope * xs
330
+ plt.plot(xs, cml, label="CML via Market", linewidth=2.0)
331
 
332
+ # key points
333
+ plt.scatter([0.0], [rf_ann], label="Risk-free (FRED)")
334
+ plt.scatter([sigma_mkt], [rf_ann + erp_ann], label="Market")
335
+ plt.scatter([pt_sigma], [pt_mu], label="Your portfolio", marker="D")
336
 
337
+ if same_sigma_sigma is not None and same_sigma_mu is not None:
338
+ plt.scatter([same_sigma_sigma], [same_sigma_mu], label="Efficient same sigma", marker="o")
339
+ if same_mu_sigma is not None and same_mu_mu is not None:
340
+ plt.scatter([same_mu_sigma], [same_mu_mu], label="Efficient same return", marker="o")
 
 
 
341
  if targ_sigma is not None and targ_mu is not None:
342
+ plt.scatter([targ_sigma], [targ_mu], label="Suggestion", marker="X", s=70)
 
 
 
 
 
 
343
 
344
+ plt.xlabel("σ (annualized)")
345
+ plt.ylabel("Expected return (annual)")
346
  plt.legend(loc="best")
347
  plt.tight_layout()
348
 
 
352
  buf.seek(0)
353
  return Image.open(buf)
354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ def _overlay_plot_with_suggestion(sigma_sugg: Optional[float], mu_sugg: Optional[float]) -> Optional[Image.Image]:
357
+ if LAST_PLOT_STATE is None:
358
+ return None
359
+ s = LAST_PLOT_STATE
360
+ return plot_cml(
361
+ s["rf_ann"], s["erp_ann"], s["sigma_mkt"],
362
+ s["pt_sigma"], s["pt_mu"],
363
+ s["pt_sigma"], s["mu_eff_sigma"],
364
+ s["sigma_eff_mu"], s["pt_mu"],
365
+ targ_sigma=sigma_sugg, targ_mu=mu_sugg
366
+ )
367
+
368
+
369
+ # -------------- synthetic dataset (1,000 rows over *current* universe) --------------
370
+ def build_synthetic_dataset(universe: List[str], years: int, rf_ann: float, erp_ann: float) -> pd.DataFrame:
371
+ # Always include market column used in cov/beta (if present)
372
+ symbols = list(sorted(set([s for s in universe if s] )))[:MAX_TICKERS]
373
+ moms = estimate_all_moments_aligned(symbols, years, rf_ann)
374
+ covA, betas = moms["cov_ann"], moms["betas"]
375
+
376
+ rows, rng = [], np.random.default_rng(123)
377
+ n = 1000
378
+ for i in range(n):
379
+ k = rng.integers(low=min(2, len(symbols)), high=min(8, len(symbols)) + 1)
380
+ picks = list(rng.choice(symbols, size=k, replace=False))
381
+ signs = rng.choice([-1.0, 1.0], size=k, p=[0.20, 0.80])
382
+ raw = rng.dirichlet(np.ones(k))
383
+ gross = 1.0 + float(rng.gamma(2.0, 0.5))
384
+ w = gross * signs * raw
385
+ # compute stats from CAPM + cov
386
+ beta_p, er_p, sigma_p = portfolio_stats({picks[j]: w[j] for j in range(k)}, covA, betas, rf_ann, erp_ann)
387
  rows.append({
388
  "id": i,
389
+ "tickers": ",".join(picks),
390
  "weights": ",".join(f"{x:.6f}" for x in w),
 
391
  "er_p": er_p,
392
+ "sigma_p": sigma_p,
393
+ "beta_p": beta_p
394
  })
395
+
396
  return pd.DataFrame(rows)
397
 
398
+
399
+ def save_synth_csv(df: pd.DataFrame, path: str = DATASET_PATH):
400
  os.makedirs(os.path.dirname(path), exist_ok=True)
401
  df.to_csv(path, index=False)
402
 
403
+
404
+ # ---------------- Embeddings helpers ----------------
405
+ def _get_emb_model():
406
+ global _EMB_MODEL
407
+ if _EMB_MODEL is None:
408
+ _EMB_MODEL = SentenceTransformer(EMB_MODEL_NAME)
409
+ return _EMB_MODEL
410
+
411
+
412
+ def _weights_top_phrase(universe, w, top=4):
413
+ pairs = sorted([(universe[i], abs(float(w[i]))) for i in range(len(universe))],
414
+ key=lambda t: -t[1])[:top]
415
+ parts = [f"{t} {p*100:.1f}%" for t, p in pairs if p > 1e-4]
416
+ return ", ".join(parts)
417
+
418
+
419
+ def portfolio_to_sentence(universe, w, er, sigma, beta):
420
+ return (f"portfolio with volatility {sigma*100:.2f} percent, "
421
+ f"expected return {er*100:.2f} percent, beta {beta:.2f}, "
422
+ f"weights mostly in {_weights_top_phrase(universe, w)}")
423
+
424
+
425
+ def build_ds_embeddings(csv_path: str, universe: list):
426
+ global _DS_TEXTS, _DS_EMBS, _DS_CACHE_KEY
427
+ cache_key = (csv_path, tuple(universe))
428
+ if _DS_EMBS is not None and _DS_CACHE_KEY == cache_key:
429
+ return _DS_TEXTS, _DS_EMBS
430
+
431
+ df = pd.read_csv(csv_path)
432
+ texts = []
433
+ rows = []
434
+ for _, r in df.iterrows():
435
+ ws = np.array([float(x) for x in str(r["weights"]).split(",")], dtype=float)
436
+ ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
437
+ wmap = {ts[i]: ws[i] for i in range(min(len(ts), len(ws)))}
438
+ w = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
439
+ g = np.sum(np.abs(w))
440
  if g <= 1e-12:
441
+ continue
442
+ w = w / g
443
+ er = float(r["er_p"]); sigma = float(r["sigma_p"]); beta = float(r["beta_p"])
444
+ txt = portfolio_to_sentence(universe, w, er, sigma, beta)
445
+ texts.append(txt); rows.append((w, er, sigma, beta))
446
+
447
+ model = _get_emb_model()
448
+ embs = model.encode(texts, normalize_embeddings=True, show_progress_bar=False)
449
+ _DS_TEXTS, _DS_EMBS, _DS_CACHE_KEY = (rows, embs, cache_key)
450
+ return _DS_TEXTS, _DS_EMBS
451
+
452
 
453
  def pick_low_med_high(csv_path: str, universe: List[str]):
454
  df = pd.read_csv(csv_path)
455
  rows = []
456
  for _, r in df.iterrows():
457
+ ws = [float(x) for x in str(r["weights"]).split(",")]
458
+ ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
459
+ wmap = {ts[i]: ws[i] for i in range(min(len(ts), len(ws)))}
460
+ x = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
461
+ g = float(np.sum(np.abs(x)))
462
+ if g <= 1e-12:
463
  continue
464
+ x = x / g
465
  rows.append((x, float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"])))
466
  if not rows:
467
  return None
468
  rows_sorted = sorted(rows, key=lambda t: t[2]) # by sigma
469
+ return rows_sorted
 
 
 
470
 
 
 
 
471
 
472
+ def _band_indices(n, level):
473
+ if level == "low":
474
+ return range(0, max(1, int(0.25 * n)))
475
+ if level == "medium":
476
+ a, b = int(0.375 * n), int(0.625 * n)
477
+ return range(max(0, a), min(n, b))
478
+ return range(max(0, int(0.75 * n)), n) # high
479
+
480
+
481
+ def suggest_level(level: str):
482
+ if not LAST_DATASET_PATH or not os.path.exists(LAST_DATASET_PATH) or not LAST_UNIVERSE:
483
+ return empty_suggest_df(), "Run Compute first.", None
484
+ rows_texts, embs = build_ds_embeddings(LAST_DATASET_PATH, LAST_UNIVERSE)
485
+ if not rows_texts:
486
+ return empty_suggest_df(), "No dataset rows.", None
487
+
488
+ n = len(rows_texts)
489
+ band = list(_band_indices(n, level))
490
+ if not band:
491
+ return empty_suggest_df(), "No rows in band.", None
492
+
493
+ prompts = {
494
+ "low": "conservative low-risk portfolio with low volatility and low beta",
495
+ "medium": "balanced moderate-risk portfolio with moderate volatility and beta around 1",
496
+ "high": "aggressive high-risk growth portfolio with high volatility and beta above 1",
497
+ }
498
+ q = prompts.get(level, "balanced portfolio")
499
+ model = _get_emb_model()
500
+ q_emb = model.encode([q], normalize_embeddings=True)
501
+ band_embs = embs[band]
502
+ sims = util.cos_sim(q_emb, band_embs).cpu().numpy()[0]
503
+ best_idx_in_band = int(np.argmax(sims))
504
+ x, er_p, sig_p, beta_p = rows_texts[band[best_idx_in_band]]
505
+
506
+ rows_df = [{"ticker": LAST_UNIVERSE[i], "suggested_weight_pct": float(x[i]) * 100.0}
507
+ for i in range(len(LAST_UNIVERSE))]
508
+ df = pd.DataFrame(rows_df, columns=SUG_COLS).sort_values("suggested_weight_pct", ascending=False)
509
+ msg = f"{level.capitalize()} risk (embedding-ranked) → ER {fmt_pct(er_p)}, Sigma {fmt_pct(sig_p)}, Beta {beta_p:.2f}"
510
+ img = _overlay_plot_with_suggestion(sig_p, er_p)
511
+ return df, msg, img
512
+
513
+
514
+ # -------------- summary builder --------------
515
  def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
516
  beta_p, er_p, sigma_p,
517
  a_sigma, b_sigma, mu_eff_sigma,
518
+ a_mu, b_mu, sigma_eff_mu) -> str:
 
519
  lines = []
520
  lines.append("### Inputs")
521
+ lines.append(f"- Lookback years **{lookback}**")
522
+ lines.append(f"- Horizon years **{int(round(horizon))}**")
523
+ lines.append(f"- Risk free **{fmt_pct(rf)}** from **{rf_code}**")
524
+ lines.append(f"- Market ERP **{fmt_pct(erp)}**")
525
+ lines.append(f"- Market σ **{fmt_pct(sigma_mkt)}**")
526
  lines.append("")
527
+ lines.append("### Your portfolio (CAPM expectations)")
528
+ lines.append(f"- Beta **{beta_p:.2f}**")
529
+ lines.append(f"- σ **{fmt_pct(sigma_p)}**")
530
+ lines.append(f"- Expected return **{fmt_pct(er_p)}**")
531
  lines.append("")
532
  lines.append("### Efficient alternatives on CML")
533
+ lines.append("**Same σ as your portfolio**")
534
+ lines.append(f"- Market weight **{a_sigma:.2f}**, Bills weight **{b_sigma:.2f}**")
535
+ lines.append(f"- Expected return **{fmt_pct(mu_eff_sigma)}**")
536
  lines.append("")
537
+ lines.append("**Same expected return as your portfolio**")
538
+ lines.append(f"- Market weight **{a_mu:.2f}**, Bills weight **{b_mu:.2f}**")
539
+ lines.append(f"- σ **{fmt_pct(sigma_eff_mu)}**")
540
  return "\n".join(lines)
541
 
 
 
 
 
 
 
 
 
542
 
543
  # -------------- gradio callbacks --------------
544
  def search_tickers_cb(q: str):
 
548
  opts = [f"{h['symbol']} | {h['name']} | {h['exchange']}" for h in hits]
549
  return "Select a symbol and click Add", opts
550
 
551
+
552
  def add_symbol(selection: str, table: pd.DataFrame):
553
  if not selection:
554
  return table, "Pick a row from Matches first"
 
570
  msg = f"Reached max of {MAX_TICKERS}"
571
  return new_table, msg
572
 
573
+
574
  def lock_ticker_column(tb: pd.DataFrame):
575
  if tb is None or len(tb) == 0:
576
  return pd.DataFrame(columns=["ticker", "amount_usd"])
 
581
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
582
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
583
 
584
+
585
  def set_horizon(years: float):
586
  y = max(1.0, min(100.0, float(years)))
587
  code = fred_series_for_horizon(y)
 
590
  HORIZON_YEARS = y
591
  RF_CODE = code
592
  RF_ANN = rf
593
+ return f"Risk free series {code}. Latest annual rate {rf:.2%}. Will be used for CAPM and CML."
594
+
595
 
596
  def compute(years_lookback: int, table: pd.DataFrame):
597
+ if table is None or len(table) == 0:
598
+ return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), None
599
+
600
  df = table.dropna()
601
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
602
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
603
 
604
  symbols = [t for t in df["ticker"].tolist() if t]
605
  if len(symbols) == 0:
606
+ return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), None
607
 
608
  symbols = validate_tickers(symbols, years_lookback)
609
  if len(symbols) == 0:
610
+ return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), empty_suggest_df(), None
611
+
612
+ global UNIVERSE
613
+ UNIVERSE = list(sorted(set(symbols)))[:MAX_TICKERS]
614
 
615
  df = df[df["ticker"].isin(symbols)].copy()
616
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
 
617
  rf_ann = RF_ANN
618
 
619
+ moms = estimate_all_moments_aligned(UNIVERSE, years_lookback, rf_ann)
 
620
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
621
 
622
  gross = sum(abs(v) for v in amounts.values())
623
  if gross == 0:
624
+ return None, "All amounts are zero", "Universe ok", empty_positions_df(), empty_suggest_df(), None
625
  weights = {k: v / gross for k, v in amounts.items()}
626
+
627
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
628
 
629
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
630
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
631
 
632
+ img = plot_cml(
 
 
 
 
 
 
 
 
 
 
633
  rf_ann, erp_ann, sigma_mkt,
634
  sigma_p, er_p,
635
  sigma_p, mu_eff_sigma,
636
  sigma_eff_mu, er_p,
637
  targ_sigma=None, targ_mu=None
638
  )
639
+
640
  info = build_summary_md(
641
  years_lookback, HORIZON_YEARS, rf_ann, RF_CODE, erp_ann, sigma_mkt,
642
  beta_p, er_p, sigma_p,
643
  a_sigma, b_sigma, mu_eff_sigma,
644
+ a_mu, b_mu, sigma_eff_mu
 
645
  )
646
 
647
  rows = []
648
+ for t in UNIVERSE:
649
+ beta_val = 1.0 if abs(betas.get(t, 0.0) - 1.0) < 1e-6 else betas.get(t, np.nan)
650
  rows.append({
651
  "ticker": t,
652
  "amount_usd": amounts.get(t, 0.0),
 
654
  "beta": beta_val,
655
  })
656
  pos_table = pd.DataFrame(rows, columns=POS_COLS)
 
 
 
 
 
 
 
 
 
657
 
658
+ # build 1,000-row dataset over CURRENT universe
659
+ synth_df = build_synthetic_dataset(UNIVERSE, years=DEFAULT_LOOKBACK_YEARS, rf_ann=rf_ann, erp_ann=erp_ann)
660
+ save_synth_csv(synth_df, DATASET_PATH)
661
 
662
+ # update globals for suggestion buttons
663
+ global LAST_DATASET_PATH, LAST_UNIVERSE, LAST_PLOT_STATE
664
+ LAST_DATASET_PATH = DATASET_PATH
665
+ LAST_UNIVERSE = UNIVERSE.copy()
666
+ LAST_PLOT_STATE = {
667
+ "rf_ann": rf_ann, "erp_ann": erp_ann, "sigma_mkt": sigma_mkt,
668
+ "pt_sigma": sigma_p, "pt_mu": er_p,
669
+ "mu_eff_sigma": mu_eff_sigma, "sigma_eff_mu": sigma_eff_mu
670
+ }
 
 
 
 
 
 
 
 
671
 
672
+ uni_msg = f"Universe set to: {', '.join(UNIVERSE)} — dataset generated with 1,000 mixes."
673
+ return img, info, uni_msg, pos_table, empty_suggest_df(), DATASET_PATH
 
 
 
 
 
 
 
 
 
 
 
674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
 
676
  # -------------- UI --------------
677
  ensure_data_dir()
678
+ # initial RF based on default horizon
679
+ HORIZON_YEARS = 5.0
680
+ RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
681
+ RF_ANN = fetch_fred_yield_annual(RF_CODE)
682
 
683
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
684
  gr.Markdown(
685
  "## Efficient Portfolio Advisor\n"
686
  "Search symbols, enter dollar amounts, set your horizon. "
687
+ "Prices from Yahoo Finance. Risk-free from FRED. "
688
+ "Low/Medium/High suggestions use embeddings over a 1,000-mix dataset generated from your current universe."
689
  )
690
 
691
  with gr.Row():
 
693
  q = gr.Textbox(label="Search symbol")
694
  search_note = gr.Markdown()
695
  matches = gr.Dropdown(choices=[], label="Matches")
696
+ search_btn = gr.Button("Search")
697
+ add_btn = gr.Button("Add selected to portfolio")
 
698
 
699
+ gr.Markdown("### Portfolio positions type dollar amounts (negatives allowed for shorts)")
700
  table = gr.Dataframe(
701
  headers=["ticker", "amount_usd"],
702
  datatype=["str", "number"],
 
705
  )
706
 
707
  horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
708
+ lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta and sigma")
709
 
710
+ run_btn = gr.Button("Compute (build dataset & plot)")
 
711
 
712
+ gr.Markdown("### Suggestions (dataset + embeddings)")
713
  with gr.Row():
714
+ btn_low = gr.Button("Suggest LOW risk")
715
+ btn_med = gr.Button("Suggest MEDIUM risk")
716
+ btn_high = gr.Button("Suggest HIGH risk")
 
 
 
 
717
 
718
  with gr.Column(scale=1):
719
+ plot = gr.Image(label="Capital Market Line (CML)", type="pil")
720
  summary = gr.Markdown(label="Summary")
721
+ universe_msg = gr.Textbox(label="Status", interactive=False)
 
722
  positions = gr.Dataframe(
723
  label="Computed positions",
724
  headers=POS_COLS,
 
728
  interactive=False
729
  )
730
  suggestions = gr.Dataframe(
731
+ label="Suggested portfolio (weights as % exposures)",
732
  headers=SUG_COLS,
733
  datatype=["str", "number"],
734
  col_count=(len(SUG_COLS), "fixed"),
735
  value=empty_suggest_df(),
736
  interactive=False
737
  )
738
+ sugg_msg = gr.Textbox(label="Suggestion detail", interactive=False)
739
+ dl = gr.File(label="Generated dataset (CSV)", value=None, visible=True)
740
 
741
  # wiring
742
  def do_search(query):
 
751
  run_btn.click(
752
  fn=compute,
753
  inputs=[lookback, table],
754
+ outputs=[plot, summary, universe_msg, positions, suggestions, dl]
755
  )
756
 
757
+ def do_low():
758
+ df, msg, img = suggest_level("low")
759
+ return df, msg, (img if img is not None else gr.update())
760
+
761
+ def do_med():
762
+ df, msg, img = suggest_level("medium")
763
+ return df, msg, (img if img is not None else gr.update())
764
 
765
+ def do_high():
766
+ df, msg, img = suggest_level("high")
767
+ return df, msg, (img if img is not None else gr.update())
768
 
769
+ btn_low.click(fn=do_low, inputs=None, outputs=[suggestions, sugg_msg, plot])
770
+ btn_med.click(fn=do_med, inputs=None, outputs=[suggestions, sugg_msg, plot])
771
+ btn_high.click(fn=do_high, inputs=None, outputs=[suggestions, sugg_msg, plot])
 
772
 
773
  if __name__ == "__main__":
774
  demo.launch()