Tulitula commited on
Commit
9e8b4dc
·
verified ·
1 Parent(s): 1d99074

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -657
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import os, io, math, warnings
2
  warnings.filterwarnings("ignore")
3
 
4
- from typing import List, Tuple, Dict, Optional
5
-
6
  import numpy as np
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
@@ -11,42 +10,23 @@ from PIL import Image
11
  import requests
12
  import yfinance as yf
13
 
14
- from sklearn.neighbors import KNeighborsRegressor
15
- from sklearn.preprocessing import StandardScaler
16
-
17
  # ---------------- config ----------------
18
  DATA_DIR = "data"
19
- DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
20
-
21
  MAX_TICKERS = 30
22
  DEFAULT_LOOKBACK_YEARS = 5
23
  MARKET_TICKER = "VOO"
24
-
25
- POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
26
- SUG_COLS = ["ticker", "suggested_weight_exposure"]
27
 
28
  FRED_MAP = [
29
- (1, "DGS1"),
30
- (2, "DGS2"),
31
- (3, "DGS3"),
32
- (5, "DGS5"),
33
- (7, "DGS7"),
34
- (10, "DGS10"),
35
- (20, "DGS20"),
36
- (30, "DGS30"),
37
- (100, "DGS30"),
38
  ]
39
 
40
  # ---------------- helpers ----------------
41
  def ensure_data_dir():
42
  os.makedirs(DATA_DIR, exist_ok=True)
43
 
44
- def empty_positions_df():
45
- return pd.DataFrame(columns=POS_COLS)
46
-
47
- def empty_suggest_df():
48
- return pd.DataFrame(columns=SUG_COLS)
49
-
50
  def fred_series_for_horizon(years: float) -> str:
51
  y = max(1.0, min(100.0, float(years)))
52
  for cutoff, code in FRED_MAP:
@@ -55,7 +35,6 @@ def fred_series_for_horizon(years: float) -> str:
55
  return "DGS30"
56
 
57
  def fetch_fred_yield_annual(code: str) -> float:
58
- # FRED CSV endpoint
59
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
60
  try:
61
  r = requests.get(url, timeout=10)
@@ -66,69 +45,23 @@ def fetch_fred_yield_annual(code: str) -> float:
66
  except Exception:
67
  return 0.03
68
 
69
- # ---------- offline fallback (synthetic prices) ----------
70
- def _offline_prices(tickers: List[str], years: int) -> pd.DataFrame:
71
- # Build a synthetic monthly price panel so the app remains usable offline.
72
- months = max(12 * int(max(1, years)), 6)
73
- idx = pd.date_range(end=pd.Timestamp.today(tz="UTC").normalize(), periods=months, freq="M")
74
-
75
- rng = np.random.default_rng(42)
76
- # Market process
77
- ann_mu_mkt, ann_vol_mkt = 0.08, 0.18
78
- mu_m = ann_mu_mkt / 12.0
79
- vol_m = ann_vol_mkt / (12.0 ** 0.5)
80
- mkt_rets = rng.normal(mu_m, vol_m, size=months)
81
- mkt_prices = 100.0 * np.cumprod(1.0 + mkt_rets)
82
-
83
- df = pd.DataFrame(index=idx)
84
- cols = list(dict.fromkeys(tickers))
85
- if MARKET_TICKER not in cols:
86
- cols.append(MARKET_TICKER)
87
-
88
- for t in cols:
89
- if t == MARKET_TICKER:
90
- df[t] = mkt_prices
91
- else:
92
- beta = float(rng.uniform(0.6, 1.4))
93
- idio_vol = float(rng.uniform(0.05, 0.20)) / (12.0 ** 0.5)
94
- rets = beta * mkt_rets + rng.normal(0.0, idio_vol, size=months)
95
- df[t] = 100.0 * np.cumprod(1.0 + rets)
96
- return df
97
-
98
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
99
- # Robust monthly downloader with per-ticker fetch and offline fallback
100
- tickers = list(dict.fromkeys([t for t in tickers if isinstance(t, str) and t.strip()]))
101
- if not tickers:
102
- return pd.DataFrame()
103
-
104
  start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
105
  end = pd.Timestamp.today(tz="UTC")
106
-
107
  frames = []
108
  for t in tickers:
109
  try:
110
  s = yf.download(
111
- t,
112
- start=start.date(),
113
- end=end.date(),
114
- interval="1mo",
115
- auto_adjust=True,
116
- progress=False
117
  )["Close"]
118
  if isinstance(s, pd.Series) and s.dropna().size > 0:
119
  frames.append(s.rename(t))
120
  except Exception:
121
- # skip this ticker; will fallback if insufficient data
122
  pass
123
-
124
  if frames:
125
- df = pd.concat(frames, axis=1).sort_index().dropna(how="all").fillna(method="ffill")
126
- # If we have enough aligned data and market exists, use it
127
- if MARKET_TICKER in df.columns and df.dropna(how="any").shape[0] >= 3:
128
- return df
129
-
130
- # Fallback: synthetic panel ensures the app works even if Yahoo is down
131
- return _offline_prices(tickers, years)
132
 
133
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
134
  return prices.pct_change().dropna()
@@ -139,609 +72,107 @@ def annualize_mean(m):
139
  def annualize_sigma(s):
140
  return np.asarray(s, dtype=float) * math.sqrt(12.0)
141
 
142
- def yahoo_search(query: str):
143
- # Yahoo symbol search
144
- if not query or len(query.strip()) == 0:
145
- return []
146
- url = "https://query1.finance.yahoo.com/v1/finance/search"
147
- params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
148
- headers = {"User-Agent": "Mozilla/5.0"}
149
- try:
150
- r = requests.get(url, params=params, headers=headers, timeout=10)
151
- r.raise_for_status()
152
- data = r.json()
153
- out = []
154
- for q in data.get("quotes", []):
155
- sym = q.get("symbol")
156
- name = q.get("shortname") or q.get("longname") or ""
157
- exch = q.get("exchDisp") or ""
158
- if sym and sym.isascii():
159
- out.append({"symbol": sym, "name": name, "exchange": exch})
160
- if not out:
161
- out = [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n a"}]
162
- return out[:10]
163
- except Exception:
164
- return [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n a"}]
165
-
166
- def validate_tickers(symbols: List[str], years: int) -> List[str]:
167
- # Pass-through validation to avoid network dependency during add/edit.
168
- uniq = [s.strip().upper() for s in symbols if s and isinstance(s, str)]
169
- return list(dict.fromkeys(uniq))[:MAX_TICKERS]
170
-
171
- # -------------- aligned moments --------------
172
- def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
173
- uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
174
- tickers = uniq + [MARKET_TICKER]
175
- px = fetch_prices_monthly(tickers, years)
176
- rets = monthly_returns(px)
177
- cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
178
- R = rets[cols].dropna(how="any")
179
- return R.loc[:, ~R.columns.duplicated()]
180
-
181
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
182
- R = get_aligned_monthly_returns(symbols, years)
183
- if MARKET_TICKER not in R.columns or R.shape[0] < 3:
184
- raise ValueError("Not enough aligned data")
185
  rf_m = rf_ann / 12.0
186
-
187
- m = R[MARKET_TICKER]
188
- if isinstance(m, pd.DataFrame):
189
- m = m.iloc[:, 0].squeeze()
190
-
191
- mu_m_ann = float(annualize_mean(m.mean()))
192
- sigma_m_ann = float(annualize_sigma(m.std(ddof=1)))
193
- erp_ann = float(mu_m_ann - rf_ann)
194
-
195
- ex_m = m - rf_m
196
- var_m = float(np.var(ex_m.values, ddof=1))
197
- var_m = max(var_m, 1e-6)
198
-
199
- betas: Dict[str, float] = {}
200
- for s in [c for c in R.columns if c != MARKET_TICKER]:
201
- ex_s = R[s] - rf_m
202
- betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
203
-
204
- betas[MARKET_TICKER] = 1.0 # by definition
205
-
206
- asset_cols = [c for c in R.columns if c != MARKET_TICKER]
207
- cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
208
- covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
209
-
210
- return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
211
 
212
  def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
213
  return float(rf_ann + beta * erp_ann)
214
 
215
- def portfolio_stats(weights: Dict[str, float],
216
- cov_ann: pd.DataFrame,
217
- betas: Dict[str, float],
218
- rf_ann: float,
219
- erp_ann: float) -> Tuple[float, float, float]:
220
  tickers = list(weights.keys())
221
- w = np.array([weights[t] for t in tickers], dtype=float)
222
- gross = float(np.sum(np.abs(w)))
223
- if gross == 0:
224
- return 0.0, 0.0, 0.0
225
- w_expo = w / gross
226
- beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
227
  er_p = capm_er(beta_p, rf_ann, erp_ann)
228
- cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
229
- sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
230
  return beta_p, er_p, sigma_p
231
 
232
- # -------------- CML helpers --------------
233
- def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
234
- if sigma_mkt <= 1e-12:
235
- return 0.0, 1.0, rf_ann
236
  a = sigma_target / sigma_mkt
237
- return a, 1.0 - a, rf_ann + a * erp_ann
238
 
239
- def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
240
- if abs(erp_ann) <= 1e-12:
241
- return 0.0, 1.0, rf_ann
242
  a = (mu_target - rf_ann) / erp_ann
243
- return a, 1.0 - a, abs(a) * sigma_mkt
244
-
245
- def plot_cml(
246
- rf_ann, erp_ann, sigma_mkt,
247
- pt_sigma, pt_mu,
248
- same_sigma_sigma, same_sigma_mu,
249
- same_mu_sigma, same_mu_mu,
250
- targ_sigma=None, targ_mu=None
251
- ) -> Image.Image:
252
- fig = plt.figure(figsize=(6, 4), dpi=120)
253
-
254
- xmax = max(
255
- 0.3,
256
- sigma_mkt * 2.0,
257
- pt_sigma * 1.4,
258
- same_mu_sigma * 1.4,
259
- same_sigma_sigma * 1.4,
260
- (targ_sigma or 0.0) * 1.4,
261
- )
262
- xs = np.linspace(0, xmax, 160)
263
- slope = erp_ann / max(sigma_mkt, 1e-12)
264
- cml = rf_ann + slope * xs
265
- plt.plot(xs, cml, label="CML through VOO")
266
-
267
- # Key points
268
- plt.scatter([0.0], [rf_ann], label="Risk free")
269
- plt.scatter([sigma_mkt], [rf_ann + erp_ann], label="Market VOO")
270
- plt.scatter([pt_sigma], [pt_mu], label="Your portfolio")
271
- plt.scatter([same_sigma_sigma], [same_sigma_mu], label="Efficient same sigma")
272
- plt.scatter([same_mu_sigma], [same_mu_mu], label="Efficient same return")
273
- if targ_sigma is not None and targ_mu is not None:
274
- plt.scatter([targ_sigma], [targ_mu], label="Target suggestion")
275
-
276
- # Dotted guides and annotations that show the gap
277
- # Same sigma guide, vertical gap in return
278
- plt.plot(
279
- [pt_sigma, same_sigma_sigma],
280
- [pt_mu, same_sigma_mu],
281
- linestyle="--",
282
- linewidth=1.2,
283
- alpha=0.7,
284
- color="gray",
285
- )
286
- d_ret = (same_sigma_mu - pt_mu) * 100.0
287
- plt.annotate(
288
- f"Return gain at same sigma {d_ret:+.2f}%",
289
- xy=(same_sigma_sigma, same_sigma_mu),
290
- xytext=(same_sigma_sigma + 0.02 * xmax, same_sigma_mu),
291
- arrowprops=dict(arrowstyle="->", lw=1.0),
292
- fontsize=9,
293
- va="center",
294
- )
295
-
296
- # Same return guide, horizontal gap in sigma
297
- plt.plot(
298
- [pt_sigma, same_mu_sigma],
299
- [pt_mu, same_mu_mu],
300
- linestyle="--",
301
- linewidth=1.2,
302
- alpha=0.7,
303
- color="gray",
304
- )
305
- d_sig = (same_mu_sigma - pt_sigma) * 100.0
306
- plt.annotate(
307
- f"Risk change at same return {d_sig:+.2f}%",
308
- xy=(same_mu_sigma, same_mu_mu),
309
- xytext=(same_mu_sigma, same_mu_mu + 0.03),
310
- arrowprops=dict(arrowstyle="->", lw=1.0),
311
- fontsize=9,
312
- ha="center",
313
- )
314
 
315
- plt.xlabel("Standard deviation")
316
- plt.ylabel("Expected return")
317
- plt.legend(loc="best")
318
- plt.tight_layout()
319
-
320
- buf = io.BytesIO()
321
- plt.savefig(buf, format="png")
322
- plt.close(fig)
323
- buf.seek(0)
324
- return Image.open(buf)
325
-
326
- # -------------- synthetic dataset --------------
327
- def synth_profile(seed: int) -> str:
328
- rng = np.random.default_rng(seed)
329
- risk = rng.choice(["cautious", "balanced", "moderate", "growth", "aggressive"])
330
- horizon = rng.choice(["three years", "five years", "seven years", "ten years", "fifteen years"])
331
- goal = rng.choice(["retirement savings", "first home", "education fund", "wealth building", "travel fund", "emergency buffer"])
332
- return f"{risk} investor, {horizon} horizon, goal is {goal}."
333
-
334
- def build_synthetic_dataset(universe: List[str], years: int, rf_ann: float, erp_ann: float) -> pd.DataFrame:
335
- symbols = list(sorted(set([s for s in universe if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
336
- moms = estimate_all_moments_aligned(symbols, years, rf_ann)
337
- covA, betas = moms["cov_ann"], moms["betas"]
338
- rows, rng = [], np.random.default_rng(123)
339
- for i in range(1000):
340
- k = rng.integers(low=min(2, len(symbols)), high=min(8, len(symbols)) + 1)
341
  picks = list(rng.choice(symbols, size=k, replace=False))
342
- signs = rng.choice([-1.0, 1.0], size=k, p=[0.25, 0.75])
343
  raw = rng.dirichlet(np.ones(k))
344
- gross = 1.0 + float(rng.gamma(2.0, 0.5))
345
- w = gross * signs * raw
346
- beta_p, er_p, sigma_p = portfolio_stats({picks[j]: w[j] for j in range(k)}, covA, betas, rf_ann, erp_ann)
347
  rows.append({
348
- "id": i,
349
- "profile_text": synth_profile(10_000 + i),
350
  "tickers": ",".join(picks),
351
  "weights": ",".join(f"{x:.4f}" for x in w),
352
- "beta_p": beta_p,
353
- "er_p": er_p,
354
- "sigma_p": sigma_p
355
  })
356
  return pd.DataFrame(rows)
357
 
358
- def save_synth_csv(df: pd.DataFrame, path: str = DATASET_PATH):
359
- os.makedirs(os.path.dirname(path), exist_ok=True)
360
- df.to_csv(path, index=False)
361
-
362
- # ----------- surrogate from saved CSV only -----------
363
- def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
364
- try:
365
- ts = [t.strip() for t in str(row["tickers"]).split(",")]
366
- ws = [float(x) for x in str(row["weights"]).split(",")]
367
- wmap = {t: ws[i] for i, t in enumerate(ts) if i < len(ws)}
368
- w = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
369
- gross = float(np.sum(np.abs(w)))
370
- if gross <= 1e-12:
371
- return None
372
- return w / gross
373
- except Exception:
374
- return None
375
-
376
- def fit_surrogate_from_csv(csv_path: str, universe: List[str]):
377
- try:
378
- df = pd.read_csv(csv_path)
379
- except Exception:
380
- return None, None, 0
381
- X_list, Y_list = [], []
382
- for _, r in df.iterrows():
383
- x = _row_to_exposures(r, universe)
384
- if x is None:
385
- continue
386
- y = np.array([float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"])], dtype=float)
387
- X_list.append(x); Y_list.append(y)
388
- if not X_list:
389
- return None, None, 0
390
- X = np.vstack(X_list); Y = np.vstack(Y_list)
391
- scaler = StandardScaler().fit(X)
392
- Xn = scaler.transform(X)
393
- k = min(25, len(Xn))
394
- knn = KNeighborsRegressor(n_neighbors=k, weights="distance")
395
- knn.fit(Xn, Y)
396
- return scaler, knn, len(Xn)
397
-
398
- def predict_from_surrogate(amounts_map: Dict[str, float], universe: List[str],
399
- scaler: StandardScaler, knn: KNeighborsRegressor):
400
- gross = sum(abs(v) for v in amounts_map.values())
401
- if gross <= 1e-12:
402
- return None
403
- w = np.array([amounts_map.get(t, 0.0) for t in universe], dtype=float) / gross
404
- yhat = knn.predict(scaler.transform([w]))[0]
405
- er_hat, sigma_hat, beta_hat = float(yhat[0]), float(yhat[1]), float(yhat[2])
406
- return er_hat, sigma_hat, beta_hat
407
-
408
- # ----------- target search over synthetic dataset -----------
409
- def target_best_from_synth(csv_path: str,
410
- universe: List[str],
411
- target_mu: Optional[float],
412
- target_sigma: Optional[float]):
413
- try:
414
- df = pd.read_csv(csv_path)
415
- except Exception:
416
- return None
417
-
418
- if target_mu is None and target_sigma is None:
419
- return None
420
-
421
- rows = []
422
- for _, r in df.iterrows():
423
- x = _row_to_exposures(r, universe)
424
- if x is None:
425
- continue
426
- rows.append((x, float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"]), r))
427
-
428
- if not rows:
429
- return None
430
-
431
- mu_w = 1.0
432
- sig_w = 1.0
433
- best = None
434
- best_d = float("inf")
435
- for x, er_p, sig_p, beta_p, r in rows:
436
- d = 0.0
437
- if target_mu is not None:
438
- d += mu_w * (er_p - target_mu) ** 2
439
- if target_sigma is not None:
440
- d += sig_w * (sig_p - target_sigma) ** 2
441
- if d < best_d:
442
- best_d = d
443
- best = (x, er_p, sig_p, beta_p, r)
444
-
445
- if best is None:
446
- return None
447
-
448
- x, er_p, sig_p, beta_p, r = best
449
- wmap = {t: float(x[i]) for i, t in enumerate(universe) if abs(float(x[i])) > 1e-4}
450
- top = sorted(wmap.items(), key=lambda kv: -abs(kv[1]))[:12]
451
- wmap_top = dict(top)
452
- return {"weights": wmap_top, "er": er_p, "sigma": sig_p, "beta": beta_p}
453
-
454
- # -------------- summary builder --------------
455
- def fmt_pct(x: float) -> str:
456
- return f"{x*100:.2f}%"
457
-
458
- def humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta):
459
- close_mu = abs(dmu) <= 0.005
460
- close_sig = abs(dsig) <= 0.005
461
- close_beta = abs(dbeta) <= 0.05
462
- parts = []
463
- parts.append(f"- Predicted annual return {fmt_pct(er_hat)} , difference {fmt_pct(dmu)}")
464
- parts.append(f"- Predicted annual volatility {fmt_pct(sigma_hat)} , difference {fmt_pct(dsig)}")
465
- parts.append(f"- Predicted beta {beta_hat:.2f} , difference {dbeta:+.02f}")
466
- if close_mu and close_sig and close_beta:
467
- verdict = "The synthetic model matches the historical calculation closely. You can trust these quick predictions for similar mixes."
468
- else:
469
- verdict = "The synthetic model is not very close here. Rely more on the historical calculation for this mix."
470
- return "\n".join(parts + ["", f"**Verdict** {verdict}"])
471
-
472
- def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
473
- beta_p, er_p, sigma_p,
474
- a_sigma, b_sigma, mu_eff_sigma,
475
- a_mu, b_mu, sigma_eff_mu,
476
- synth=None, synth_nrows: int = 0,
477
- targ=None) -> str:
478
- lines = []
479
- lines.append("### Inputs")
480
- lines.append(f"- Lookback years {lookback}")
481
- lines.append(f"- Horizon years {int(round(horizon))}")
482
- lines.append(f"- Risk free {fmt_pct(rf)} from {rf_code}")
483
- lines.append(f"- Market ERP {fmt_pct(erp)}")
484
- lines.append(f"- Market sigma {fmt_pct(sigma_mkt)}")
485
- lines.append("")
486
- lines.append("### Your portfolio")
487
- lines.append(f"- Beta {beta_p:.2f}")
488
- lines.append(f"- Sigma {fmt_pct(sigma_p)}")
489
- lines.append(f"- Expected return {fmt_pct(er_p)}")
490
- if synth is not None:
491
- er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta = synth
492
- lines.append("")
493
- lines.append("### Synthetic prediction from data slash investor underscore profiles dot csv")
494
- lines.append(f"- Samples used {synth_nrows}")
495
- lines.append(humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta))
496
- if targ is not None:
497
- lines.append("")
498
- lines.append("### Target driven suggestion from synthetic dataset")
499
- lines.append(f"- Suggested expected return {fmt_pct(targ['er'])}")
500
- lines.append(f"- Suggested sigma {fmt_pct(targ['sigma'])}")
501
- lines.append(f"- Suggested beta {targ['beta']:.2f}")
502
- pretty = ", ".join([f"{k} {v:+.2f}" for k, v in targ["weights"].items()])
503
- lines.append(f"- Weights, exposure terms {pretty}")
504
- lines.append("")
505
- lines.append("### Efficient alternatives on CML")
506
- lines.append("Efficient same sigma")
507
- lines.append(f"- Market weight {a_sigma:.2f} , Bills weight {b_sigma:.2f}")
508
- lines.append(f"- Expected return {fmt_pct(mu_eff_sigma)}")
509
- lines.append("Efficient same return")
510
- lines.append(f"- Market weight {a_mu:.2f} , Bills weight {b_mu:.2f}")
511
- lines.append(f"- Sigma {fmt_pct(sigma_eff_mu)}")
512
- return "\n".join(lines)
513
-
514
- # -------------- app state on launch --------------
515
- ensure_data_dir()
516
- UNIVERSE = [MARKET_TICKER, "QQQ", "XLK", "XLP", "XLE", "VNQ", "IEF", "HYG", "GLD", "EEM"]
517
- HORIZON_YEARS = 5
518
- RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
519
- RF_ANN = fetch_fred_yield_annual(RF_CODE)
520
-
521
- # -------------- gradio callbacks --------------
522
- def search_tickers_cb(q: str):
523
- hits = yahoo_search(q)
524
- if not hits:
525
- return "No matches", []
526
- opts = [f"{h['symbol']} | {h['name']} | {h['exchange']}" for h in hits]
527
- return "Select a symbol and click Add", opts
528
-
529
- def add_symbol(selection: str, table: pd.DataFrame):
530
- if not selection:
531
- return table, "Pick a row from Matches first"
532
- symbol = selection.split("|")[0].strip().upper()
533
- current = [] if table is None or len(table) == 0 else [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
534
- tickers = current if symbol in current else current + [symbol]
535
- val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
536
- tickers = [t for t in tickers if t in val]
537
- amt_map = {}
538
- if table is not None and len(table) > 0:
539
- for _, r in table.iterrows():
540
- t = str(r.get("ticker", "")).upper()
541
- if t in tickers:
542
- amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
543
- new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
544
- msg = f"Added {symbol}" if symbol in tickers else f"{symbol} not valid"
545
- if len(new_table) > MAX_TICKERS:
546
- new_table = new_table.iloc[:MAX_TICKERS]
547
- msg = f"Reached max of {MAX_TICKERS}"
548
- return new_table, msg
549
-
550
- def lock_ticker_column(tb: pd.DataFrame):
551
- if tb is None or len(tb) == 0:
552
- return pd.DataFrame(columns=["ticker", "amount_usd"])
553
- tickers = [str(x).upper() for x in tb["ticker"].tolist()]
554
- amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
555
- val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
556
- tickers = [t for t in tickers if t in val]
557
- amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
558
- return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
559
-
560
- def set_horizon(years: float):
561
- y = max(1.0, min(100.0, float(years)))
562
- code = fred_series_for_horizon(y)
563
- rf = fetch_fred_yield_annual(code)
564
- global HORIZON_YEARS, RF_CODE, RF_ANN
565
- HORIZON_YEARS = y
566
- RF_CODE = code
567
- RF_ANN = rf
568
- return f"Risk free series {code}. Latest annual rate {rf:.2%}. Dataset will use this rate on compute."
569
-
570
- def compute(years_lookback: int, table: pd.DataFrame,
571
- target_mu: Optional[float], target_sigma: Optional[float],
572
- use_synth: bool):
573
- df = table.dropna()
574
- df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
575
- df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
576
-
577
- symbols = [t for t in df["ticker"].tolist() if t]
578
- if len(symbols) == 0:
579
- return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), None
580
-
581
- symbols = validate_tickers(symbols, years_lookback)
582
- if len(symbols) == 0:
583
- return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), empty_suggest_df(), None
584
-
585
- global UNIVERSE
586
- UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
587
-
588
- df = df[df["ticker"].isin(symbols)].copy()
589
- amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
590
- rf_ann = RF_ANN
591
-
592
- moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
593
- betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
594
-
595
- gross = sum(abs(v) for v in amounts.values())
596
- if gross == 0:
597
- return None, "All amounts are zero", "Universe ok", empty_positions_df(), empty_suggest_df(), None
598
- weights = {k: v / gross for k, v in amounts.items()}
599
-
600
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
601
-
602
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
603
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
604
-
605
- # ensure dataset exists once
606
- if not os.path.exists(DATASET_PATH):
607
- synth_df = build_synthetic_dataset(
608
- universe=list(sorted(set(symbols + [MARKET_TICKER]))),
609
- years=DEFAULT_LOOKBACK_YEARS,
610
- rf_ann=rf_ann,
611
- erp_ann=erp_ann
612
- )
613
- save_synth_csv(synth_df)
614
- csv_path = DATASET_PATH if os.path.exists(DATASET_PATH) else None
615
-
616
- scaler, knn, nrows = None, None, 0
617
- synth_tuple = None
618
- if use_synth and csv_path:
619
- scaler, knn, nrows = fit_surrogate_from_csv(csv_path, UNIVERSE)
620
- if scaler is not None and knn is not None:
621
- pred = predict_from_surrogate(amounts, UNIVERSE, scaler, knn)
622
- if pred is not None:
623
- er_hat, sigma_hat, beta_hat = pred
624
- synth_tuple = (
625
- er_hat, sigma_hat, beta_hat,
626
- er_hat - er_p, sigma_hat - sigma_p, beta_hat - beta_p
627
- )
628
-
629
- # target driven suggestion from synthetic dataset
630
- targ = None
631
- targ_table = empty_suggest_df()
632
- targ_sigma_plot = None
633
- targ_mu_plot = None
634
- if csv_path and (target_mu is not None or target_sigma is not None):
635
- cand = target_best_from_synth(csv_path, UNIVERSE, target_mu, target_sigma)
636
- if cand is not None:
637
- targ = cand
638
- targ_sigma_plot = cand["sigma"]
639
- targ_mu_plot = cand["er"]
640
- rows = [{"ticker": k, "suggested_weight_exposure": v} for k, v in cand["weights"].items()]
641
- targ_table = pd.DataFrame(rows, columns=SUG_COLS)
642
-
643
- img = plot_cml(
644
- rf_ann, erp_ann, sigma_mkt,
645
- sigma_p, er_p,
646
- sigma_p, mu_eff_sigma,
647
- sigma_eff_mu, er_p,
648
- targ_sigma=targ_sigma_plot, targ_mu=targ_mu_plot
649
- )
650
-
651
- info = build_summary_md(
652
- years_lookback, HORIZON_YEARS, rf_ann, RF_CODE, erp_ann, sigma_mkt,
653
- beta_p, er_p, sigma_p,
654
- a_sigma, b_sigma, mu_eff_sigma,
655
- a_mu, b_mu, sigma_eff_mu,
656
- synth=synth_tuple, synth_nrows=nrows,
657
- targ=targ
658
- )
659
-
660
- rows = []
661
- for t in symbols:
662
- beta_val = 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
663
- rows.append({
664
- "ticker": t,
665
- "amount_usd": amounts.get(t, 0.0),
666
- "weight_exposure": weights.get(t, 0.0),
667
- "beta": beta_val,
668
- })
669
- pos_table = pd.DataFrame(rows, columns=POS_COLS)
670
-
671
- uni_msg = f"Universe set to {', '.join(UNIVERSE)}"
672
- return img, info, uni_msg, pos_table, targ_table, csv_path
673
-
674
- # -------------- UI --------------
675
- ensure_data_dir()
676
-
677
- with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
678
- gr.Markdown(
679
- "## Efficient Portfolio Advisor\n"
680
- "Search symbols, enter dollar amounts, set your horizon. "
681
- "Prices come from Yahoo Finance. Risk free comes from FRED."
682
- )
683
-
684
- with gr.Row():
685
- with gr.Column(scale=1):
686
- q = gr.Textbox(label="Search symbol")
687
- search_note = gr.Markdown()
688
- matches = gr.Dropdown(choices=[], label="Matches")
689
- search_btn = gr.Button("Search")
690
- add_btn = gr.Button("Add selected to portfolio")
691
-
692
- gr.Markdown("### Portfolio positions. type dollar amounts. negatives allowed for shorts")
693
- table = gr.Dataframe(
694
- headers=["ticker", "amount_usd"],
695
- datatype=["str", "number"],
696
- row_count=0,
697
- col_count=(2, "fixed")
698
- )
699
-
700
- horizon = gr.Number(label="Horizon in years from 1 to 100", value=5, precision=0)
701
- lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta and sigma")
702
-
703
- gr.Markdown("### Optional targets on the CML")
704
- target_mu = gr.Number(label="Target expected return, annual, example 0.12 means 12 percent", value=None, precision=6)
705
- target_sigma = gr.Number(label="Target sigma, annual, example 0.18 means 18 percent", value=None, precision=6)
706
- use_synth = gr.Checkbox(label="Use synthetic predictor", value=True)
707
-
708
- run_btn = gr.Button("Compute and suggest")
709
- with gr.Column(scale=1):
710
- plot = gr.Image(label="Capital Market Line", type="pil")
711
- summary = gr.Markdown(label="Summary")
712
- universe_msg = gr.Textbox(label="Universe status", interactive=False)
713
- positions = gr.Dataframe(
714
- label="Computed positions",
715
- headers=POS_COLS,
716
- datatype=["str", "number", "number", "number"],
717
- col_count=(len(POS_COLS), "fixed"),
718
- value=empty_positions_df(),
719
- interactive=False
720
- )
721
- suggestions = gr.Dataframe(
722
- label="Suggested portfolio from targets",
723
- headers=SUG_COLS,
724
- datatype=["str", "number"],
725
- col_count=(len(SUG_COLS), "fixed"),
726
- value=empty_suggest_df(),
727
- interactive=False
728
- )
729
- dl = gr.File(label="Session CSV path", value=None, visible=True)
730
-
731
- def do_search(query):
732
- note, options = search_tickers_cb(query)
733
- return note, gr.update(choices=options)
734
-
735
- search_btn.click(fn=do_search, inputs=q, outputs=[search_note, matches])
736
- add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
737
- table.change(fn=lock_ticker_column, inputs=table, outputs=table)
738
- horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
739
-
740
- run_btn.click(
741
- fn=compute,
742
- inputs=[lookback, table, target_mu, target_sigma, use_synth],
743
- outputs=[plot, summary, universe_msg, positions, suggestions, dl]
744
- )
745
 
746
  if __name__ == "__main__":
747
- demo.launch()
 
 
1
  import os, io, math, warnings
2
  warnings.filterwarnings("ignore")
3
 
4
+ from typing import List, Tuple, Dict
 
5
  import numpy as np
6
  import pandas as pd
7
  import matplotlib.pyplot as plt
 
10
  import requests
11
  import yfinance as yf
12
 
 
 
 
13
  # ---------------- config ----------------
14
  DATA_DIR = "data"
 
 
15
  MAX_TICKERS = 30
16
  DEFAULT_LOOKBACK_YEARS = 5
17
  MARKET_TICKER = "VOO"
18
+ POS_COLS = ["ticker", "weight_exposure", "beta", "er_p", "sigma_p"]
 
 
19
 
20
  FRED_MAP = [
21
+ (1, "DGS1"), (2, "DGS2"), (3, "DGS3"),
22
+ (5, "DGS5"), (7, "DGS7"), (10, "DGS10"),
23
+ (20, "DGS20"), (30, "DGS30"), (100, "DGS30")
 
 
 
 
 
 
24
  ]
25
 
26
  # ---------------- helpers ----------------
27
  def ensure_data_dir():
28
  os.makedirs(DATA_DIR, exist_ok=True)
29
 
 
 
 
 
 
 
30
  def fred_series_for_horizon(years: float) -> str:
31
  y = max(1.0, min(100.0, float(years)))
32
  for cutoff, code in FRED_MAP:
 
35
  return "DGS30"
36
 
37
  def fetch_fred_yield_annual(code: str) -> float:
 
38
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
39
  try:
40
  r = requests.get(url, timeout=10)
 
45
  except Exception:
46
  return 0.03
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
 
 
 
 
 
49
  start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
50
  end = pd.Timestamp.today(tz="UTC")
 
51
  frames = []
52
  for t in tickers:
53
  try:
54
  s = yf.download(
55
+ t, start=start.date(), end=end.date(),
56
+ interval="1mo", auto_adjust=True, progress=False
 
 
 
 
57
  )["Close"]
58
  if isinstance(s, pd.Series) and s.dropna().size > 0:
59
  frames.append(s.rename(t))
60
  except Exception:
 
61
  pass
 
62
  if frames:
63
+ return pd.concat(frames, axis=1).dropna(how="any").fillna(method="ffill")
64
+ return pd.DataFrame()
 
 
 
 
 
65
 
66
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
67
  return prices.pct_change().dropna()
 
72
  def annualize_sigma(s):
73
  return np.asarray(s, dtype=float) * math.sqrt(12.0)
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
76
+ px = fetch_prices_monthly(symbols, years)
77
+ rets = monthly_returns(px)
 
78
  rf_m = rf_ann / 12.0
79
+ mu = rets.mean()
80
+ sigma = rets.std(ddof=1)
81
+ betas = {}
82
+ mkt = rets[MARKET_TICKER]
83
+ var_m = np.var(mkt - rf_m, ddof=1)
84
+ for s in symbols:
85
+ if s == MARKET_TICKER:
86
+ betas[s] = 1.0
87
+ else:
88
+ ex_s = rets[s] - rf_m
89
+ betas[s] = np.cov(ex_s, mkt - rf_m, ddof=1)[0,1] / var_m
90
+ erp = annualize_mean(mu[MARKET_TICKER]) - rf_ann
91
+ sigma_mkt = annualize_sigma(sigma[MARKET_TICKER])
92
+ covA = pd.DataFrame(np.cov(rets.T) * 12.0, index=symbols, columns=symbols)
93
+ return betas, covA, erp, sigma_mkt
 
 
 
 
 
 
 
 
 
 
94
 
95
  def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
96
  return float(rf_ann + beta * erp_ann)
97
 
98
+ def portfolio_stats(weights: Dict[str, float], cov_ann: pd.DataFrame,
99
+ betas: Dict[str, float], rf_ann: float, erp_ann: float):
 
 
 
100
  tickers = list(weights.keys())
101
+ w = np.array(list(weights.values()))
102
+ w_expo = w / sum(abs(w))
103
+ beta_p = np.dot([betas[t] for t in tickers], w_expo)
 
 
 
104
  er_p = capm_er(beta_p, rf_ann, erp_ann)
105
+ cov = cov_ann.loc[tickers, tickers].to_numpy()
106
+ sigma_p = math.sqrt(max(w_expo @ cov @ w_expo, 0.0))
107
  return beta_p, er_p, sigma_p
108
 
109
+ def efficient_same_sigma(sigma_target, rf_ann, erp_ann, sigma_mkt):
 
 
 
110
  a = sigma_target / sigma_mkt
111
+ return a, 1 - a, rf_ann + a * erp_ann
112
 
113
+ def efficient_same_return(mu_target, rf_ann, erp_ann, sigma_mkt):
 
 
114
  a = (mu_target - rf_ann) / erp_ann
115
+ return a, 1 - a, abs(a) * sigma_mkt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ def build_synthetic_dataset(symbols: List[str], years: int, rf_ann: float, erp_ann: float):
118
+ betas, covA, _, _ = estimate_all_moments_aligned(symbols, years, rf_ann)
119
+ rng = np.random.default_rng(42)
120
+ rows = []
121
+ for _ in range(1000):
122
+ k = rng.integers(2, len(symbols)+1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  picks = list(rng.choice(symbols, size=k, replace=False))
 
124
  raw = rng.dirichlet(np.ones(k))
125
+ gross = 1.0 + rng.gamma(2.0, 0.5)
126
+ w = gross * raw
127
+ stats = portfolio_stats({picks[i]: w[i] for i in range(k)}, covA, betas, rf_ann, erp_ann)
128
  rows.append({
 
 
129
  "tickers": ",".join(picks),
130
  "weights": ",".join(f"{x:.4f}" for x in w),
131
+ "beta_p": stats[0], "er_p": stats[1], "sigma_p": stats[2]
 
 
132
  })
133
  return pd.DataFrame(rows)
134
 
135
+ def select_risk_profiles(df):
136
+ high = df.sort_values("er_p", ascending=False).head(1)
137
+ low = df.sort_values("sigma_p", ascending=True).head(1)
138
+ med_idx = ((df["er_p"] - df["er_p"].median())**2 + (df["sigma_p"] - df["sigma_p"].median())**2).idxmin()
139
+ medium = df.loc[[med_idx]]
140
+ return high, medium, low
141
+
142
+ # ---------------- main compute ----------------
143
+ def compute(years_lookback, tickers_df):
144
+ tickers_df["ticker"] = tickers_df["ticker"].str.upper().str.strip()
145
+ tickers = tickers_df["ticker"].tolist()
146
+ amounts = tickers_df["amount_usd"].tolist()
147
+ rf_ann = fetch_fred_yield_annual(fred_series_for_horizon(5))
148
+ betas, covA, erp_ann, sigma_mkt = estimate_all_moments_aligned(tickers + [MARKET_TICKER], years_lookback, rf_ann)
149
+ weights = {t: a for t, a in zip(tickers, amounts)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
151
+ eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
152
+ eff_return = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
153
+ synth_df = build_synthetic_dataset(tickers + [MARKET_TICKER], years_lookback, rf_ann, erp_ann)
154
+ high, medium, low = select_risk_profiles(synth_df)
155
+ return {
156
+ "user": (beta_p, er_p, sigma_p, weights),
157
+ "eff_sigma": eff_sigma,
158
+ "eff_return": eff_return,
159
+ "high": high,
160
+ "medium": medium,
161
+ "low": low
162
+ }
163
+
164
+ # ---------------- UI ----------------
165
+ with gr.Blocks() as demo:
166
+ gr.Markdown("## Efficient Portfolio Advisor with Synthetic Risk Profiles")
167
+ table = gr.Dataframe(headers=["ticker", "amount_usd"], datatype=["str", "number"], row_count=3)
168
+ lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years")
169
+ run_btn = gr.Button("Compute")
170
+ output = gr.Textbox(label="Results")
171
+ def run_app(lookback, table):
172
+ res = compute(lookback, table)
173
+ return str(res)
174
+ run_btn.click(fn=run_app, inputs=[lookback, table], outputs=[output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  if __name__ == "__main__":
177
+ ensure_data_dir()
178
+ demo.launch()