Tulitula commited on
Commit
b746adc
Β·
verified Β·
1 Parent(s): 0e6cd4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -310
app.py CHANGED
@@ -1,21 +1,17 @@
 
1
  import os, io, math, warnings
2
  warnings.filterwarnings("ignore")
3
 
4
  from typing import List, Tuple, Dict, Optional
5
- from functools import partial
6
 
7
  import numpy as np
8
  import pandas as pd
9
  import matplotlib.pyplot as plt
10
- from matplotlib.ticker import PercentFormatter
11
  import gradio as gr
12
  from PIL import Image
13
  import requests
14
  import yfinance as yf
15
 
16
- from sklearn.neighbors import KNeighborsRegressor
17
- from sklearn.preprocessing import StandardScaler
18
-
19
  # ---------------- config ----------------
20
  DATA_DIR = "data"
21
  DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
@@ -24,9 +20,8 @@ MAX_TICKERS = 30
24
  DEFAULT_LOOKBACK_YEARS = 5
25
  MARKET_TICKER = "VOO"
26
 
27
- # column schemas (weights shown in percent in UI tables)
28
- POS_COLS = ["ticker", "amount_usd", "weight_%", "beta"]
29
- SUG_RISK_COLS = ["ticker", "suggested_weight_%"]
30
 
31
  FRED_MAP = [
32
  (1, "DGS1"),
@@ -47,8 +42,8 @@ def ensure_data_dir():
47
  def empty_positions_df():
48
  return pd.DataFrame(columns=POS_COLS)
49
 
50
- def empty_risk_df():
51
- return pd.DataFrame(columns=SUG_RISK_COLS)
52
 
53
  def fred_series_for_horizon(years: float) -> str:
54
  y = max(1.0, min(100.0, float(years)))
@@ -71,28 +66,27 @@ def fetch_fred_yield_annual(code: str) -> float:
71
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
72
  start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
73
  end = pd.Timestamp.today(tz="UTC")
 
74
  df = yf.download(
75
- list(dict.fromkeys(tickers)),
76
- start=start.date(),
77
- end=end.date(),
78
- interval="1mo",
79
- auto_adjust=True,
80
- progress=False
81
  )["Close"]
82
  if isinstance(df, pd.Series):
83
  df = df.to_frame()
84
- if isinstance(df.columns, pd.MultiIndex):
85
- df.columns = [c[-1] if isinstance(c, tuple) else str(c) for c in df.columns]
86
- else:
87
- df.columns = [str(c) for c in df.columns]
88
  df = df.dropna(how="all").fillna(method="ffill")
 
 
 
89
  return df
90
 
91
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
92
  return prices.pct_change().dropna()
93
 
94
- def annualize_mean(m): return np.asarray(m, dtype=float) * 12.0
95
- def annualize_sigma(s): return np.asarray(s, dtype=float) * math.sqrt(12.0)
 
 
 
96
 
97
  def yahoo_search(query: str):
98
  if not query or len(query.strip()) == 0:
@@ -109,7 +103,7 @@ def yahoo_search(query: str):
109
  sym = q.get("symbol")
110
  name = q.get("shortname") or q.get("longname") or ""
111
  exch = q.get("exchDisp") or ""
112
- if sym and isinstance(sym, str) and sym.isascii():
113
  out.append({"symbol": sym, "name": name, "exchange": exch})
114
  if not out:
115
  out = [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n a"}]
@@ -126,19 +120,18 @@ def validate_tickers(symbols: List[str], years: int) -> List[str]:
126
 
127
  # -------------- aligned moments --------------
128
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
129
- uniq = [c for c in dict.fromkeys(symbols)]
130
- if MARKET_TICKER not in uniq:
131
- uniq = uniq + [MARKET_TICKER]
132
- px = fetch_prices_monthly(uniq, years)
133
  rets = monthly_returns(px)
134
- cols = [c for c in uniq if c in rets.columns]
135
  R = rets[cols].dropna(how="any")
136
  return R.loc[:, ~R.columns.duplicated()]
137
 
138
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
139
  R = get_aligned_monthly_returns(symbols, years)
140
  if MARKET_TICKER not in R.columns or R.shape[0] < 3:
141
- raise ValueError("Not enough aligned data")
142
  rf_m = rf_ann / 12.0
143
 
144
  m = R[MARKET_TICKER]
@@ -154,15 +147,12 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
154
  var_m = max(var_m, 1e-6)
155
 
156
  betas: Dict[str, float] = {}
157
- for s in [c for c in R.columns]:
158
- ex_s = (R[s] - rf_m) if s in R.columns else None
159
- if s == MARKET_TICKER:
160
- betas[s] = 1.0
161
- elif ex_s is not None:
162
- betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
163
-
164
- # include market in covariance so risk is measured correctly when VOO is held
165
- asset_cols = list(R.columns)
166
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
167
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
168
 
@@ -177,6 +167,8 @@ def portfolio_stats(weights: Dict[str, float],
177
  rf_ann: float,
178
  erp_ann: float) -> Tuple[float, float, float]:
179
  tickers = list(weights.keys())
 
 
180
  w = np.array([weights[t] for t in tickers], dtype=float)
181
  gross = float(np.sum(np.abs(w)))
182
  if gross == 0:
@@ -188,7 +180,7 @@ def portfolio_stats(weights: Dict[str, float],
188
  sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
189
  return beta_p, er_p, sigma_p
190
 
191
- # -------------- CML helpers --------------
192
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
193
  if sigma_mkt <= 1e-12:
194
  return 0.0, 1.0, rf_ann
@@ -201,7 +193,7 @@ def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma
201
  a = (mu_target - rf_ann) / erp_ann
202
  return a, 1.0 - a, abs(a) * sigma_mkt
203
 
204
- def plot_cml(
205
  rf_ann, erp_ann, sigma_mkt,
206
  pt_sigma, pt_mu,
207
  same_sigma_sigma, same_sigma_mu,
@@ -221,36 +213,25 @@ def plot_cml(
221
  xs = np.linspace(0, xmax, 160)
222
  slope = erp_ann / max(sigma_mkt, 1e-12)
223
  cml = rf_ann + slope * xs
224
- plt.plot(xs, cml, label="CML through VOO")
225
 
226
- plt.scatter([0.0], [rf_ann], label="Risk free")
227
- plt.scatter([sigma_mkt], [rf_ann + erp_ann], label="Market VOO")
228
- plt.scatter([pt_sigma], [pt_mu], label="Your portfolio")
229
- plt.scatter([same_sigma_sigma], [same_sigma_mu], label="Efficient same sigma")
230
- plt.scatter([same_mu_sigma], [same_mu_mu], label="Efficient same return")
 
 
 
 
231
  if targ_sigma is not None and targ_mu is not None:
232
- plt.scatter([targ_sigma], [targ_mu], label="Dataset suggestion")
233
-
234
- # Guides + annotations (in percent)
235
- plt.plot([pt_sigma, same_sigma_sigma], [pt_mu, same_sigma_mu],
236
- linestyle="--", linewidth=1.2, alpha=0.7, color="gray")
237
- d_ret = (same_sigma_mu - pt_mu) * 100.0
238
- plt.annotate(f"Return gain at same sigma {d_ret:+.2f}%",
239
- xy=(same_sigma_sigma, same_sigma_mu),
240
- xytext=(same_sigma_sigma + 0.02 * xmax, same_sigma_mu),
241
- arrowprops=dict(arrowstyle="->", lw=1.0), fontsize=9, va="center")
242
-
243
- plt.plot([pt_sigma, same_mu_sigma], [pt_mu, same_mu_mu],
244
- linestyle="--", linewidth=1.2, alpha=0.7, color="gray")
245
- d_sig = (same_mu_sigma - pt_sigma) * 100.0
246
- plt.annotate(f"Risk change at same return {d_sig:+.2f}%",
247
- xy=(same_mu_sigma, same_mu_mu),
248
- xytext=(same_mu_sigma, same_mu_mu + 0.03),
249
- arrowprops=dict(arrowstyle="->", lw=1.0), fontsize=9, ha="center")
250
-
251
- ax = plt.gca()
252
- ax.yaxis.set_major_formatter(PercentFormatter(1.0))
253
- ax.xaxis.set_major_formatter(PercentFormatter(1.0))
254
  plt.xlabel("Standard deviation (%)")
255
  plt.ylabel("Expected return (%)")
256
  plt.legend(loc="best")
@@ -262,209 +243,126 @@ def plot_cml(
262
  buf.seek(0)
263
  return Image.open(buf)
264
 
265
- # -------------- synthetic dataset (for predictor + risk buttons) --------------
266
- def synth_profile(seed: int) -> str:
267
- rng = np.random.default_rng(seed)
268
- risk = rng.choice(["cautious", "balanced", "moderate", "growth", "aggressive"])
269
- horizon = rng.choice(["three years", "five years", "seven years", "ten years", "fifteen years"])
270
- goal = rng.choice(["retirement savings", "first home", "education fund", "wealth building", "travel fund", "emergency buffer"])
271
- return f"{risk} investor, {horizon} horizon, goal is {goal}."
272
-
273
- def build_synthetic_dataset(universe: List[str], years: int, rf_ann: float, erp_ann: float) -> pd.DataFrame:
274
- symbols = list(sorted(set([s for s in universe if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
275
- moms = estimate_all_moments_aligned(symbols, years, rf_ann)
276
- covA, betas = moms["cov_ann"], moms["betas"]
277
- rows, rng = [], np.random.default_rng(123)
278
- for i in range(1000):
279
- k = rng.integers(low=min(2, len(symbols)), high=min(8, len(symbols)) + 1)
280
- picks = list(rng.choice(symbols, size=k, replace=False))
281
- signs = rng.choice([-1.0, 1.0], size=k, p=[0.25, 0.75])
282
- raw = rng.dirichlet(np.ones(k))
283
- gross = 1.0 + float(rng.gamma(2.0, 0.5))
284
- w = gross * signs * raw
285
- beta_p, er_p, sigma_p = portfolio_stats({picks[j]: w[j] for j in range(k)}, covA, betas, rf_ann, erp_ann)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  rows.append({
287
  "id": i,
288
- "profile_text": synth_profile(10_000 + i),
289
- "tickers": ",".join(picks),
290
- "weights": ",".join(f"{x:.4f}" for x in w),
291
  "beta_p": beta_p,
292
  "er_p": er_p,
293
  "sigma_p": sigma_p
294
  })
295
  return pd.DataFrame(rows)
296
 
297
- def save_synth_csv(df: pd.DataFrame, path: str = DATASET_PATH):
298
  os.makedirs(os.path.dirname(path), exist_ok=True)
299
  df.to_csv(path, index=False)
300
 
301
  def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
302
  try:
303
- ts = [t.strip() for t in str(row["tickers"]).split(",")]
304
  ws = [float(x) for x in str(row["weights"]).split(",")]
305
  wmap = {t: ws[i] for i, t in enumerate(ts) if i < len(ws)}
306
- w = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
307
- gross = float(np.sum(np.abs(w)))
308
- if gross <= 1e-12:
309
  return None
310
- return w / gross
311
  except Exception:
312
  return None
313
 
314
- def fit_surrogate_from_csv(csv_path: str, universe: List[str]):
315
- try:
316
- df = pd.read_csv(csv_path)
317
- except Exception:
318
- return None, None, 0
319
- X_list, Y_list = [], []
320
  for _, r in df.iterrows():
321
  x = _row_to_exposures(r, universe)
322
  if x is None:
323
  continue
324
- y = np.array([float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"])], dtype=float)
325
- X_list.append(x); Y_list.append(y)
326
- if not X_list:
327
- return None, None, 0
328
- X = np.vstack(X_list); Y = np.vstack(Y_list)
329
- scaler = StandardScaler().fit(X)
330
- Xn = scaler.transform(X)
331
- k = min(25, len(Xn))
332
- knn = KNeighborsRegressor(n_neighbors=k, weights="distance")
333
- knn.fit(Xn, Y)
334
- return scaler, knn, len(Xn)
335
-
336
- def predict_from_surrogate(amounts_map: Dict[str, float], universe: List[str],
337
- scaler: StandardScaler, knn: KNeighborsRegressor):
338
- gross = sum(abs(v) for v in amounts_map.values())
339
- if gross <= 1e-12:
340
- return None
341
- w = np.array([amounts_map.get(t, 0.0) for t in universe], dtype=float) / gross
342
- yhat = knn.predict(scaler.transform([w]))[0]
343
- er_hat, sigma_hat, beta_hat = float(yhat[0]), float(yhat[1]), float(yhat[2])
344
- return er_hat, sigma_hat, beta_hat
345
-
346
- # ---- dataset risk buttons helpers (purely CSV-based) ----
347
- def pick_row_by_risk(df: pd.DataFrame, level: str) -> Optional[pd.Series]:
348
- df = df.dropna(subset=["sigma_p"])
349
- if df.empty:
350
  return None
351
- if level == "low":
352
- return df.loc[df["sigma_p"].idxmin()]
353
- if level == "high":
354
- return df.loc[df["sigma_p"].idxmax()]
355
- # medium: closest to median sigma
356
- med = float(df["sigma_p"].median())
357
- idx = (df["sigma_p"] - med).abs().idxmin()
358
- return df.loc[idx]
359
-
360
- def row_to_suggestion(row: pd.Series, universe: List[str]) -> Optional[Dict]:
361
- x = _row_to_exposures(row, universe)
362
- if x is None:
363
- return None
364
- wmap = {universe[i]: float(x[i]) for i in range(len(universe)) if abs(float(x[i])) > 1e-4}
365
- # sort top exposures
366
- wmap = dict(sorted(wmap.items(), key=lambda kv: -abs(kv[1]))[:12])
367
- return {
368
- "weights": wmap,
369
- "er": float(row["er_p"]),
370
- "sigma": float(row["sigma_p"]),
371
- "beta": float(row["beta_p"]),
372
- }
373
-
374
- def suggest_by_risk(level: str, state: dict):
375
- # State must come from a previous "Compute"
376
- if not isinstance(state, dict) or not state.get("csv_path") or not os.path.exists(state["csv_path"]):
377
- return gr.update(), empty_risk_df(), "Run analysis first to build the dataset."
378
-
379
- try:
380
- df = pd.read_csv(state["csv_path"])
381
- except Exception:
382
- return gr.update(), empty_risk_df(), "Could not read dataset."
383
-
384
- row = pick_row_by_risk(df, {"low":"low","med":"med","high":"high"}[level])
385
- if row is None:
386
- return gr.update(), empty_risk_df(), "Dataset is empty."
387
-
388
- cand = row_to_suggestion(row, UNIVERSE)
389
- if cand is None:
390
- return gr.update(), empty_risk_df(), "No suggestion available."
391
-
392
- # Build table in percents
393
- rows = [{"ticker": k, "suggested_weight_%": v * 100.0} for k, v in cand["weights"].items()]
394
- risk_table = pd.DataFrame(rows, columns=SUG_RISK_COLS)
395
-
396
- # Overlay the dataset suggestion on the existing CML
397
- img = plot_cml(
398
- state["rf_ann"], state["erp_ann"], state["sigma_mkt"],
399
- state["pt_sigma"], state["pt_mu"],
400
- state["same_sigma_sigma"], state["same_sigma_mu"],
401
- state["same_mu_sigma"], state["same_mu_mu"],
402
- targ_sigma=cand["sigma"], targ_mu=cand["er"]
403
- )
404
-
405
- msg = (
406
- f"**Dataset suggestion ({'Lowest' if level=='low' else 'Medium' if level=='med' else 'Highest'} risk)** \n"
407
- f"- Predicted expected return: {fmt_pct(cand['er'])} \n"
408
- f"- Predicted sigma: {fmt_pct(cand['sigma'])} \n"
409
- f"- Predicted beta: {cand['beta']:.2f}"
410
- )
411
- return img, risk_table, msg
412
 
413
- # -------------- summary --------------
414
  def fmt_pct(x: float) -> str:
415
  return f"{x*100:.2f}%"
416
 
417
- def humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta):
418
- close_mu = abs(dmu) <= 0.005
419
- close_sig = abs(dsig) <= 0.005
420
- close_beta = abs(dbeta) <= 0.05
421
- parts = []
422
- parts.append(f"- Predicted annual return {fmt_pct(er_hat)} , difference {fmt_pct(dmu)}")
423
- parts.append(f"- Predicted annual volatility {fmt_pct(sigma_hat)} , difference {fmt_pct(dsig)}")
424
- parts.append(f"- Predicted beta {beta_hat:.2f} , difference {dbeta:+.02f}")
425
- verdict = ("The synthetic model matches the historical calculation closely. "
426
- "You can trust these quick predictions for similar mixes."
427
- if (close_mu and close_sig and close_beta)
428
- else "The synthetic model is not very close here. Rely more on the historical calculation for this mix.")
429
- return "\n".join(parts + ["", f"**Verdict** {verdict}"])
430
-
431
  def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
432
  beta_p, er_p, sigma_p,
433
  a_sigma, b_sigma, mu_eff_sigma,
434
  a_mu, b_mu, sigma_eff_mu,
435
- synth=None, synth_nrows: int = 0) -> str:
436
  lines = []
437
  lines.append("### Inputs")
438
- lines.append(f"- Lookback years {lookback}")
439
- lines.append(f"- Horizon years {int(round(horizon))}")
440
- lines.append(f"- Risk free {fmt_pct(rf)} from {rf_code}")
441
- lines.append(f"- Market ERP {fmt_pct(erp)}")
442
- lines.append(f"- Market sigma {fmt_pct(sigma_mkt)}")
443
  lines.append("")
444
  lines.append("### Your portfolio")
445
- lines.append(f"- Beta {beta_p:.2f}")
446
- lines.append(f"- Sigma {fmt_pct(sigma_p)}")
447
- lines.append(f"- Expected return {fmt_pct(er_p)}")
448
- if synth is not None:
449
- er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta = synth
450
- lines.append("")
451
- lines.append("### Synthetic prediction from data/investor_profiles.csv")
452
- lines.append(f"- Samples used {synth_nrows}")
453
- lines.append(humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta))
454
  lines.append("")
455
  lines.append("### Efficient alternatives on CML")
456
- lines.append("Efficient same sigma")
457
- lines.append(f"- Market weight {a_sigma:.2f} , Bills weight {b_sigma:.2f}")
458
- lines.append(f"- Expected return {fmt_pct(mu_eff_sigma)}")
459
- lines.append("Efficient same return")
460
- lines.append(f"- Market weight {a_mu:.2f} , Bills weight {b_mu:.2f}")
461
- lines.append(f"- Sigma {fmt_pct(sigma_eff_mu)}")
462
  return "\n".join(lines)
463
 
464
- # -------------- app state on launch --------------
465
- ensure_data_dir()
466
- UNIVERSE = [MARKET_TICKER, "QQQ", "XLK", "XLP", "XLE", "VNQ", "IEF", "HYG", "GLD", "EEM"]
467
- HORIZON_YEARS = 5
 
 
468
  RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
469
  RF_ANN = fetch_fred_yield_annual(RF_CODE)
470
 
@@ -515,79 +413,62 @@ def set_horizon(years: float):
515
  HORIZON_YEARS = y
516
  RF_CODE = code
517
  RF_ANN = rf
518
- return f"Risk free series {code}. Latest annual rate {rf:.2%}. Dataset will use this rate on compute."
519
 
520
- def compute(years_lookback: int, table: pd.DataFrame, use_synth: bool):
521
  df = table.dropna()
522
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
523
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
524
 
525
  symbols = [t for t in df["ticker"].tolist() if t]
526
  if len(symbols) == 0:
527
- return None, "Add at least one ticker", "Universe empty", empty_positions_df(), None, {}
528
 
529
  symbols = validate_tickers(symbols, years_lookback)
530
  if len(symbols) == 0:
531
- return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), None, {}
532
-
533
- global UNIVERSE
534
- UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
535
 
536
  df = df[df["ticker"].isin(symbols)].copy()
537
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
 
538
  rf_ann = RF_ANN
539
 
 
540
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
541
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
542
 
543
  gross = sum(abs(v) for v in amounts.values())
544
  if gross == 0:
545
- return None, "All amounts are zero", "Universe ok", empty_positions_df(), None, {}
546
  weights = {k: v / gross for k, v in amounts.items()}
547
-
548
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
549
 
550
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
551
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
552
 
553
- # ensure synthetic dataset exists once (for predictor + risk buttons)
554
- if not os.path.exists(DATASET_PATH):
555
- synth_df = build_synthetic_dataset(
556
- universe=list(sorted(set(symbols + [MARKET_TICKER]))),
557
- years=DEFAULT_LOOKBACK_YEARS,
558
- rf_ann=rf_ann,
559
- erp_ann=erp_ann
560
- )
561
- save_synth_csv(synth_df)
562
- csv_path = DATASET_PATH if os.path.exists(DATASET_PATH) else None
563
-
564
- scaler, knn, nrows = None, None, 0
565
- synth_tuple = None
566
- if use_synth and csv_path:
567
- scaler, knn, nrows = fit_surrogate_from_csv(csv_path, UNIVERSE)
568
- if scaler is not None and knn is not None:
569
- pred = predict_from_surrogate(amounts, UNIVERSE, scaler, knn)
570
- if pred is not None:
571
- er_hat, sigma_hat, beta_hat = pred
572
- synth_tuple = (
573
- er_hat, sigma_hat, beta_hat,
574
- er_hat - er_p, sigma_hat - sigma_p, beta_hat - beta_p
575
- )
576
-
577
- img = plot_cml(
578
  rf_ann, erp_ann, sigma_mkt,
579
  sigma_p, er_p,
580
  sigma_p, mu_eff_sigma,
581
  sigma_eff_mu, er_p,
582
  targ_sigma=None, targ_mu=None
583
  )
584
-
585
  info = build_summary_md(
586
  years_lookback, HORIZON_YEARS, rf_ann, RF_CODE, erp_ann, sigma_mkt,
587
  beta_p, er_p, sigma_p,
588
  a_sigma, b_sigma, mu_eff_sigma,
589
  a_mu, b_mu, sigma_eff_mu,
590
- synth=synth_tuple, synth_nrows=nrows
591
  )
592
 
593
  rows = []
@@ -596,27 +477,72 @@ def compute(years_lookback: int, table: pd.DataFrame, use_synth: bool):
596
  rows.append({
597
  "ticker": t,
598
  "amount_usd": amounts.get(t, 0.0),
599
- "weight_%": weights.get(t, 0.0) * 100.0,
600
  "beta": beta_val,
601
  })
602
  pos_table = pd.DataFrame(rows, columns=POS_COLS)
 
 
 
 
 
 
 
 
 
603
 
604
- # Pack state for risk buttons
605
- state = {
606
- "csv_path": csv_path,
607
- "rf_ann": rf_ann,
608
- "erp_ann": erp_ann,
609
- "sigma_mkt": sigma_mkt,
610
- "pt_sigma": sigma_p,
611
- "pt_mu": er_p,
612
- "same_sigma_sigma": sigma_p,
613
- "same_sigma_mu": mu_eff_sigma,
614
- "same_mu_sigma": sigma_eff_mu,
615
- "same_mu_mu": er_p,
616
- }
617
-
618
- uni_msg = f"Universe set to {', '.join(UNIVERSE)}"
619
- return img, info, uni_msg, pos_table, csv_path, state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
 
621
  # -------------- UI --------------
622
  ensure_data_dir()
@@ -625,19 +551,18 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
625
  gr.Markdown(
626
  "## Efficient Portfolio Advisor\n"
627
  "Search symbols, enter dollar amounts, set your horizon. "
628
- "Prices come from Yahoo Finance. Risk free comes from FRED.\n\n"
629
- "**New:** Dataset-only risk suggestions (Low / Medium / High) from the 1,000-row synthetic CSV."
630
  )
631
 
632
- app_state = gr.State({})
633
-
634
  with gr.Row():
635
  with gr.Column(scale=1):
636
  q = gr.Textbox(label="Search symbol")
637
  search_note = gr.Markdown()
638
  matches = gr.Dropdown(choices=[], label="Matches")
639
- search_btn = gr.Button("Search")
640
- add_btn = gr.Button("Add selected to portfolio")
 
641
 
642
  gr.Markdown("### Portfolio positions (type dollar amounts, negatives allowed for shorts)")
643
  table = gr.Dataframe(
@@ -650,19 +575,24 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
650
  horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
651
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta & sigma")
652
 
653
- use_synth = gr.Checkbox(label="Use synthetic predictor (fast check)", value=True)
654
-
655
- run_btn = gr.Button("Compute and suggest")
656
 
657
- gr.Markdown("### Dataset-based risk suggestions")
 
 
 
 
658
  with gr.Row():
659
- btn_low = gr.Button("Lowest risk (dataset)")
660
- btn_med = gr.Button("Medium risk (dataset)")
661
- btn_high = gr.Button("Highest risk (dataset)")
 
662
  with gr.Column(scale=1):
663
  plot = gr.Image(label="Capital Market Line", type="pil")
664
  summary = gr.Markdown(label="Summary")
665
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
 
666
  positions = gr.Dataframe(
667
  label="Computed positions",
668
  headers=POS_COLS,
@@ -671,17 +601,18 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
671
  value=empty_positions_df(),
672
  interactive=False
673
  )
674
- risk_table = gr.Dataframe(
675
- label="Suggested portfolio from dataset",
676
- headers=SUG_RISK_COLS,
677
  datatype=["str", "number"],
678
- col_count=(len(SUG_RISK_COLS), "fixed"),
679
- value=empty_risk_df(),
680
  interactive=False
681
  )
682
- risk_msg = gr.Markdown(label="Suggestion metrics")
683
- dl = gr.File(label="Session CSV path (synthetic predictor data)", value=None, visible=True)
684
 
 
685
  def do_search(query):
686
  note, options = search_tickers_cb(query)
687
  return note, gr.update(choices=options)
@@ -693,14 +624,24 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
693
 
694
  run_btn.click(
695
  fn=compute,
696
- inputs=[lookback, table, use_synth],
697
- outputs=[plot, summary, universe_msg, positions, dl, app_state]
698
  )
699
 
700
- # Risk buttons (purely dataset-driven)
701
- btn_low.click(fn=partial(suggest_by_risk, "low"), inputs=[app_state], outputs=[plot, risk_table, risk_msg])
702
- btn_med.click(fn=partial(suggest_by_risk, "med"), inputs=[app_state], outputs=[plot, risk_table, risk_msg])
703
- btn_high.click(fn=partial(suggest_by_risk, "high"), inputs=[app_state], outputs=[plot, risk_table, risk_msg])
 
 
 
 
 
 
 
 
 
 
704
 
705
  if __name__ == "__main__":
706
  demo.launch()
 
1
+ # app.py
2
  import os, io, math, warnings
3
  warnings.filterwarnings("ignore")
4
 
5
  from typing import List, Tuple, Dict, Optional
 
6
 
7
  import numpy as np
8
  import pandas as pd
9
  import matplotlib.pyplot as plt
 
10
  import gradio as gr
11
  from PIL import Image
12
  import requests
13
  import yfinance as yf
14
 
 
 
 
15
  # ---------------- config ----------------
16
  DATA_DIR = "data"
17
  DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
 
20
  DEFAULT_LOOKBACK_YEARS = 5
21
  MARKET_TICKER = "VOO"
22
 
23
+ POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
24
+ SUG_COLS = ["ticker", "suggested_weight_pct"]
 
25
 
26
  FRED_MAP = [
27
  (1, "DGS1"),
 
42
  def empty_positions_df():
43
  return pd.DataFrame(columns=POS_COLS)
44
 
45
+ def empty_suggest_df():
46
+ return pd.DataFrame(columns=SUG_COLS)
47
 
48
  def fred_series_for_horizon(years: float) -> str:
49
  y = max(1.0, min(100.0, float(years)))
 
66
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
67
  start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
68
  end = pd.Timestamp.today(tz="UTC")
69
+ syms = [str(t).upper().strip() for t in dict.fromkeys(tickers)]
70
  df = yf.download(
71
+ syms, start=start.date(), end=end.date(),
72
+ interval="1mo", auto_adjust=True, progress=False
 
 
 
 
73
  )["Close"]
74
  if isinstance(df, pd.Series):
75
  df = df.to_frame()
 
 
 
 
76
  df = df.dropna(how="all").fillna(method="ffill")
77
+ # columns become single Index if single ticker
78
+ if isinstance(df.columns, pd.MultiIndex):
79
+ df.columns = [c[1] if isinstance(c, tuple) else c for c in df.columns]
80
  return df
81
 
82
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
83
  return prices.pct_change().dropna()
84
 
85
+ def annualize_mean(m):
86
+ return np.asarray(m, dtype=float) * 12.0
87
+
88
+ def annualize_sigma(s):
89
+ return np.asarray(s, dtype=float) * math.sqrt(12.0)
90
 
91
  def yahoo_search(query: str):
92
  if not query or len(query.strip()) == 0:
 
103
  sym = q.get("symbol")
104
  name = q.get("shortname") or q.get("longname") or ""
105
  exch = q.get("exchDisp") or ""
106
+ if sym and sym.isascii():
107
  out.append({"symbol": sym, "name": name, "exchange": exch})
108
  if not out:
109
  out = [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n a"}]
 
120
 
121
  # -------------- aligned moments --------------
122
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
123
+ uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
124
+ tickers = uniq + [MARKET_TICKER]
125
+ px = fetch_prices_monthly(tickers, years)
 
126
  rets = monthly_returns(px)
127
+ cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
128
  R = rets[cols].dropna(how="any")
129
  return R.loc[:, ~R.columns.duplicated()]
130
 
131
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
132
  R = get_aligned_monthly_returns(symbols, years)
133
  if MARKET_TICKER not in R.columns or R.shape[0] < 3:
134
+ raise ValueError("Could not align data with market or not enough rows.")
135
  rf_m = rf_ann / 12.0
136
 
137
  m = R[MARKET_TICKER]
 
147
  var_m = max(var_m, 1e-6)
148
 
149
  betas: Dict[str, float] = {}
150
+ for s in [c for c in R.columns if c != MARKET_TICKER]:
151
+ ex_s = R[s] - rf_m
152
+ betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
153
+ betas[MARKET_TICKER] = 1.0
154
+
155
+ asset_cols = [c for c in R.columns if c != MARKET_TICKER]
 
 
 
156
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
157
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
158
 
 
167
  rf_ann: float,
168
  erp_ann: float) -> Tuple[float, float, float]:
169
  tickers = list(weights.keys())
170
+ if len(tickers) == 0:
171
+ return 0.0, 0.0, 0.0
172
  w = np.array([weights[t] for t in tickers], dtype=float)
173
  gross = float(np.sum(np.abs(w)))
174
  if gross == 0:
 
180
  sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
181
  return beta_p, er_p, sigma_p
182
 
183
+ # -------------- CML helpers + plot (percent axes) --------------
184
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
185
  if sigma_mkt <= 1e-12:
186
  return 0.0, 1.0, rf_ann
 
193
  a = (mu_target - rf_ann) / erp_ann
194
  return a, 1.0 - a, abs(a) * sigma_mkt
195
 
196
+ def plot_cml_percent(
197
  rf_ann, erp_ann, sigma_mkt,
198
  pt_sigma, pt_mu,
199
  same_sigma_sigma, same_sigma_mu,
 
213
  xs = np.linspace(0, xmax, 160)
214
  slope = erp_ann / max(sigma_mkt, 1e-12)
215
  cml = rf_ann + slope * xs
 
216
 
217
+ def pct(x): return 100.0 * np.asarray(x)
218
+
219
+ plt.plot(pct(xs), pct(cml), label="CML through VOO")
220
+
221
+ plt.scatter([0.0], [pct(rf_ann)], label="Risk free")
222
+ plt.scatter([pct(sigma_mkt)], [pct(rf_ann + erp_ann)], label="Market VOO")
223
+ plt.scatter([pct(pt_sigma)], [pct(pt_mu)], label="Your portfolio")
224
+ plt.scatter([pct(same_sigma_sigma)], [pct(same_sigma_mu)], label="Efficient same sigma")
225
+ plt.scatter([pct(same_mu_sigma)], [pct(same_mu_mu)], label="Efficient same return")
226
  if targ_sigma is not None and targ_mu is not None:
227
+ plt.scatter([pct(targ_sigma)], [pct(targ_mu)], label="Target suggestion")
228
+
229
+ # Guides (keep simple)
230
+ plt.plot([pct(pt_sigma), pct(same_sigma_sigma)], [pct(pt_mu), pct(same_sigma_mu)],
231
+ linestyle="--", linewidth=1.0, alpha=0.7, color="gray")
232
+ plt.plot([pct(pt_sigma), pct(same_mu_sigma)], [pct(pt_mu), pct(same_mu_mu)],
233
+ linestyle="--", linewidth=1.0, alpha=0.7, color="gray")
234
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  plt.xlabel("Standard deviation (%)")
236
  plt.ylabel("Expected return (%)")
237
  plt.legend(loc="best")
 
243
  buf.seek(0)
244
  return Image.open(buf)
245
 
246
+ # -------------- dataset over *current* tickers --------------
247
+ def dirichlet_mixture(n: int, k: int, allow_shorts: bool, rng: np.random.Generator) -> np.ndarray:
248
+ """Return n weight vectors (exposures) across k assets; sum |w| = 1."""
249
+ out = []
250
+ n1 = int(n * 0.6) # diversified
251
+ n2 = n - n1 # concentrated
252
+ for _ in range(n1):
253
+ w = rng.dirichlet(np.ones(k))
254
+ if allow_shorts:
255
+ signs = rng.choice([-1.0, 1.0], size=k, p=[0.25, 0.75])
256
+ w = w * signs
257
+ out.append(w)
258
+ for _ in range(n2):
259
+ hot = rng.integers(0, k)
260
+ alpha = np.ones(k) * 0.3
261
+ alpha[hot] = 3.0
262
+ w = rng.dirichlet(alpha)
263
+ if allow_shorts:
264
+ signs = rng.choice([-1.0, 1.0], size=k, p=[0.35, 0.65])
265
+ w = w * signs
266
+ out.append(w)
267
+ W = np.vstack(out)
268
+ # normalize to exposure space (sum |w| = 1)
269
+ denom = np.sum(np.abs(W), axis=1, keepdims=True)
270
+ denom[denom == 0] = 1.0
271
+ return W / denom
272
+
273
+ def build_fixed_universe_dataset(
274
+ symbols: List[str], years: int, rf_ann: float, erp_ann: float,
275
+ covA: pd.DataFrame, betas: Dict[str, float],
276
+ allow_shorts: bool, n_rows: int = 1000
277
+ ) -> pd.DataFrame:
278
+ rng = np.random.default_rng(12345)
279
+ k = len(symbols)
280
+ W = dirichlet_mixture(n_rows, k, allow_shorts, rng)
281
+
282
+ rows = []
283
+ for i in range(W.shape[0]):
284
+ w = W[i]
285
+ wmap = {symbols[j]: float(w[j]) for j in range(k)}
286
+ beta_p, er_p, sigma_p = portfolio_stats(wmap, covA, betas, rf_ann, erp_ann)
287
  rows.append({
288
  "id": i,
289
+ "tickers": ",".join(symbols),
290
+ "weights": ",".join(f"{x:.6f}" for x in w),
 
291
  "beta_p": beta_p,
292
  "er_p": er_p,
293
  "sigma_p": sigma_p
294
  })
295
  return pd.DataFrame(rows)
296
 
297
+ def save_dataset_csv(df: pd.DataFrame, path: str = DATASET_PATH):
298
  os.makedirs(os.path.dirname(path), exist_ok=True)
299
  df.to_csv(path, index=False)
300
 
301
  def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
302
  try:
303
+ ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
304
  ws = [float(x) for x in str(row["weights"]).split(",")]
305
  wmap = {t: ws[i] for i, t in enumerate(ts) if i < len(ws)}
306
+ x = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
307
+ g = float(np.sum(np.abs(x)))
308
+ if g <= 1e-12:
309
  return None
310
+ return x / g
311
  except Exception:
312
  return None
313
 
314
+ def pick_low_med_high(csv_path: str, universe: List[str]):
315
+ df = pd.read_csv(csv_path)
316
+ rows = []
 
 
 
317
  for _, r in df.iterrows():
318
  x = _row_to_exposures(r, universe)
319
  if x is None:
320
  continue
321
+ rows.append((x, float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"])))
322
+ if not rows:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  return None
324
+ rows_sorted = sorted(rows, key=lambda t: t[2]) # by sigma
325
+ lo = rows_sorted[0]
326
+ hi = rows_sorted[-1]
327
+ med = rows_sorted[len(rows_sorted)//2]
328
+ return {"low": lo, "medium": med, "high": hi}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
+ # -------------- summary builder --------------
331
  def fmt_pct(x: float) -> str:
332
  return f"{x*100:.2f}%"
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
335
  beta_p, er_p, sigma_p,
336
  a_sigma, b_sigma, mu_eff_sigma,
337
  a_mu, b_mu, sigma_eff_mu,
338
+ ds_info: str) -> str:
339
  lines = []
340
  lines.append("### Inputs")
341
+ lines.append(f"- Lookback years: {lookback}")
342
+ lines.append(f"- Horizon years: {int(round(horizon))}")
343
+ lines.append(f"- Risk free: {fmt_pct(rf)} from {rf_code}")
344
+ lines.append(f"- Market ERP: {fmt_pct(erp)}")
345
+ lines.append(f"- Market sigma: {fmt_pct(sigma_mkt)}")
346
  lines.append("")
347
  lines.append("### Your portfolio")
348
+ lines.append(f"- Beta: {beta_p:.2f}")
349
+ lines.append(f"- Sigma: {fmt_pct(sigma_p)}")
350
+ lines.append(f"- Expected return: {fmt_pct(er_p)}")
 
 
 
 
 
 
351
  lines.append("")
352
  lines.append("### Efficient alternatives on CML")
353
+ lines.append(f"- Same sigma β‡’ Market {a_sigma:.2f} , Bills {b_sigma:.2f} , ER {fmt_pct(mu_eff_sigma)}")
354
+ lines.append(f"- Same return β‡’ Market {a_mu:.2f} , Bills {b_mu:.2f} , Sigma {fmt_pct(sigma_eff_mu)}")
355
+ lines.append("")
356
+ lines.append("### Dataset for risk suggestions")
357
+ lines.append(ds_info)
 
358
  return "\n".join(lines)
359
 
360
+ # -------------- globals to carry session state --------------
361
+ LAST_MOMS = None
362
+ LAST_BASE = None
363
+ LAST_UNIVERSE = []
364
+ LAST_DATASET_PATH = None
365
+ HORIZON_YEARS = 5.0
366
  RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
367
  RF_ANN = fetch_fred_yield_annual(RF_CODE)
368
 
 
413
  HORIZON_YEARS = y
414
  RF_CODE = code
415
  RF_ANN = rf
416
+ return f"Risk free {fmt_pct(rf)} from {code}. Will be used on Compute."
417
 
418
+ def compute(years_lookback: int, table: pd.DataFrame):
419
  df = table.dropna()
420
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
421
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
422
 
423
  symbols = [t for t in df["ticker"].tolist() if t]
424
  if len(symbols) == 0:
425
+ return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), "", None
426
 
427
  symbols = validate_tickers(symbols, years_lookback)
428
  if len(symbols) == 0:
429
+ return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), empty_suggest_df(), "", None
 
 
 
430
 
431
  df = df[df["ticker"].isin(symbols)].copy()
432
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
433
+ allow_shorts = any(v < 0 for v in amounts.values())
434
  rf_ann = RF_ANN
435
 
436
+ # moments
437
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
438
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
439
 
440
  gross = sum(abs(v) for v in amounts.values())
441
  if gross == 0:
442
+ return None, "All amounts are zero", "Universe ok", empty_positions_df(), empty_suggest_df(), "", None
443
  weights = {k: v / gross for k, v in amounts.items()}
 
444
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
445
 
446
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
447
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
448
 
449
+ # dataset strictly over *these* symbols
450
+ ensure_data_dir()
451
+ ds = build_fixed_universe_dataset(
452
+ symbols=symbols, years=years_lookback, rf_ann=rf_ann, erp_ann=erp_ann,
453
+ covA=covA.loc[symbols, symbols], betas=betas, allow_shorts=allow_shorts, n_rows=1000
454
+ )
455
+ save_dataset_csv(ds, DATASET_PATH)
456
+ ds_info = f"- Built {len(ds)} simulated mixes over current tickers ({'shorts allowed' if allow_shorts else 'long-only'})."
457
+
458
+ # plot + summary
459
+ img = plot_cml_percent(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  rf_ann, erp_ann, sigma_mkt,
461
  sigma_p, er_p,
462
  sigma_p, mu_eff_sigma,
463
  sigma_eff_mu, er_p,
464
  targ_sigma=None, targ_mu=None
465
  )
 
466
  info = build_summary_md(
467
  years_lookback, HORIZON_YEARS, rf_ann, RF_CODE, erp_ann, sigma_mkt,
468
  beta_p, er_p, sigma_p,
469
  a_sigma, b_sigma, mu_eff_sigma,
470
  a_mu, b_mu, sigma_eff_mu,
471
+ ds_info=ds_info
472
  )
473
 
474
  rows = []
 
477
  rows.append({
478
  "ticker": t,
479
  "amount_usd": amounts.get(t, 0.0),
480
+ "weight_exposure": weights.get(t, 0.0),
481
  "beta": beta_val,
482
  })
483
  pos_table = pd.DataFrame(rows, columns=POS_COLS)
484
+ pos_table["weight_exposure"] = pos_table["weight_exposure"].astype(float)
485
+
486
+ uni_msg = f"Universe set to {', '.join(symbols)}"
487
+ # store globals for Suggest buttons
488
+ global LAST_MOMS, LAST_BASE, LAST_UNIVERSE, LAST_DATASET_PATH
489
+ LAST_MOMS = {"betas": betas, "covA": covA, "erp_ann": erp_ann, "sigma_mkt": sigma_mkt}
490
+ LAST_BASE = {"rf_ann": rf_ann, "er_p": er_p, "sigma_p": sigma_p}
491
+ LAST_UNIVERSE = list(symbols)
492
+ LAST_DATASET_PATH = DATASET_PATH
493
 
494
+ return img, info, uni_msg, pos_table, empty_suggest_df(), ds_info, DATASET_PATH
495
+
496
+ def _overlay_plot_with_suggestion(sigma_s, er_s):
497
+ if not LAST_MOMS or not LAST_BASE:
498
+ return None
499
+ rf_ann = LAST_BASE["rf_ann"]
500
+ erp_ann = LAST_MOMS["erp_ann"]
501
+ sigma_mkt = LAST_MOMS["sigma_mkt"]
502
+ sigma_p = LAST_BASE["sigma_p"]
503
+ er_p = LAST_BASE["er_p"]
504
+ a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
505
+ a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
506
+ return plot_cml_percent(
507
+ rf_ann, erp_ann, sigma_mkt,
508
+ sigma_p, er_p,
509
+ sigma_p, mu_eff_sigma,
510
+ sigma_eff_mu, er_p,
511
+ targ_sigma=sigma_s, targ_mu=er_s
512
+ )
513
+
514
+ def suggest_level(level: str):
515
+ if not LAST_DATASET_PATH or not os.path.exists(LAST_DATASET_PATH) or not LAST_UNIVERSE:
516
+ return empty_suggest_df(), "Run Compute first.", None
517
+ picks = pick_low_med_high(LAST_DATASET_PATH, LAST_UNIVERSE)
518
+ if picks is None or level not in picks:
519
+ return empty_suggest_df(), "No suggestion available.", None
520
+ x, er_p, sig_p, beta_p = picks[level]
521
+ # build table in percent
522
+ rows = [{"ticker": LAST_UNIVERSE[i], "suggested_weight_pct": float(x[i]) * 100.0} for i in range(len(LAST_UNIVERSE))]
523
+ df = pd.DataFrame(rows, columns=SUG_COLS)
524
+ msg = f"{level.capitalize()} risk β†’ ER {fmt_pct(er_p)}, Sigma {fmt_pct(sig_p)}, Beta {beta_p:.2f}"
525
+ img = _overlay_plot_with_suggestion(sig_p, er_p)
526
+ return df, msg, img
527
+
528
+ def apply_suggestion_to_amounts(level: str, table: pd.DataFrame):
529
+ if table is None or len(table) == 0:
530
+ return table
531
+ df_sug, _, _ = suggest_level(level)
532
+ if df_sug is None or len(df_sug) == 0:
533
+ return table
534
+ # compute gross dollars (use total |amounts|; if zero, default to 10,000)
535
+ t = table.copy()
536
+ t["ticker"] = t["ticker"].astype(str).str.upper().str.strip()
537
+ t["amount_usd"] = pd.to_numeric(t["amount_usd"], errors="coerce").fillna(0.0)
538
+ gross = float(np.sum(np.abs(t["amount_usd"].values)))
539
+ if gross <= 1e-9:
540
+ gross = 10000.0
541
+ w = {r["ticker"]: float(r["suggested_weight_pct"]) / 100.0 for _, r in df_sug.iterrows()}
542
+ # map to amounts using current ticker order; missing β†’ 0
543
+ new_amounts = [gross * w.get(sym.upper(), 0.0) for sym in t["ticker"].tolist()]
544
+ t["amount_usd"] = new_amounts
545
+ return t
546
 
547
  # -------------- UI --------------
548
  ensure_data_dir()
 
551
  gr.Markdown(
552
  "## Efficient Portfolio Advisor\n"
553
  "Search symbols, enter dollar amounts, set your horizon. "
554
+ "Prices: Yahoo Finance. Risk free: FRED. "
555
+ "Suggestions (Low/Medium/High) come **only** from the 1,000-portfolio dataset built over your tickers."
556
  )
557
 
 
 
558
  with gr.Row():
559
  with gr.Column(scale=1):
560
  q = gr.Textbox(label="Search symbol")
561
  search_note = gr.Markdown()
562
  matches = gr.Dropdown(choices=[], label="Matches")
563
+ with gr.Row():
564
+ search_btn = gr.Button("Search")
565
+ add_btn = gr.Button("Add selected to portfolio")
566
 
567
  gr.Markdown("### Portfolio positions (type dollar amounts, negatives allowed for shorts)")
568
  table = gr.Dataframe(
 
575
  horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
576
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta & sigma")
577
 
578
+ with gr.Row():
579
+ run_btn = gr.Button("Compute (build dataset)", variant="primary")
 
580
 
581
+ gr.Markdown("### Risk tolerance suggestions (dataset-based only)")
582
+ with gr.Row():
583
+ btn_low = gr.Button("Low risk")
584
+ btn_med = gr.Button("Medium risk")
585
+ btn_high = gr.Button("High risk")
586
  with gr.Row():
587
+ apply_low = gr.Button("Apply Low β†’ $")
588
+ apply_med = gr.Button("Apply Medium β†’ $")
589
+ apply_high = gr.Button("Apply High β†’ $")
590
+
591
  with gr.Column(scale=1):
592
  plot = gr.Image(label="Capital Market Line", type="pil")
593
  summary = gr.Markdown(label="Summary")
594
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
595
+ dataset_info = gr.Markdown(label="Dataset info", value="")
596
  positions = gr.Dataframe(
597
  label="Computed positions",
598
  headers=POS_COLS,
 
601
  value=empty_positions_df(),
602
  interactive=False
603
  )
604
+ suggestions = gr.Dataframe(
605
+ label="Suggested weights (percent of exposure)",
606
+ headers=SUG_COLS,
607
  datatype=["str", "number"],
608
+ col_count=(len(SUG_COLS), "fixed"),
609
+ value=empty_suggest_df(),
610
  interactive=False
611
  )
612
+ sugg_msg = gr.Markdown("")
613
+ dl = gr.File(label="Session dataset CSV", value=None, visible=True)
614
 
615
+ # wiring
616
  def do_search(query):
617
  note, options = search_tickers_cb(query)
618
  return note, gr.update(choices=options)
 
624
 
625
  run_btn.click(
626
  fn=compute,
627
+ inputs=[lookback, table],
628
+ outputs=[plot, summary, universe_msg, positions, suggestions, dataset_info, dl]
629
  )
630
 
631
+ # suggest buttons
632
+ def wrap_suggest(level):
633
+ df, msg, img = suggest_level(level)
634
+ img_out = img if img is not None else gr.update()
635
+ return df, msg, img_out
636
+
637
+ btn_low.click(lambda: wrap_suggest("low"), outputs=[suggestions, sugg_msg, plot])
638
+ btn_med.click(lambda: wrap_suggest("medium"), outputs=[suggestions, sugg_msg, plot])
639
+ btn_high.click(lambda: wrap_suggest("high"), outputs=[suggestions, sugg_msg, plot])
640
+
641
+ # apply buttons (only updates the table; user can hit Compute again)
642
+ apply_low.click(lambda tb: apply_suggestion_to_amounts("low", tb), inputs=table, outputs=table)
643
+ apply_med.click(lambda tb: apply_suggestion_to_amounts("medium", tb), inputs=table, outputs=table)
644
+ apply_high.click(lambda tb: apply_suggestion_to_amounts("high", tb), inputs=table, outputs=table)
645
 
646
  if __name__ == "__main__":
647
  demo.launch()