Tulitula commited on
Commit
addb902
·
verified ·
1 Parent(s): af7bed5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -170
app.py CHANGED
@@ -1,11 +1,22 @@
 
1
  import os, io, math, json, warnings
2
  warnings.filterwarnings("ignore")
3
 
4
- # --- make caches writable BEFORE importing matplotlib / transformers ---
5
- os.environ.setdefault("MPLCONFIGDIR", "/home/user/.config/matplotlib")
6
- os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
7
- os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/home/user/.cache/sentencetransformers")
8
- os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "false")
 
 
 
 
 
 
 
 
 
 
9
 
10
  from typing import List, Tuple, Dict, Optional
11
 
@@ -24,9 +35,6 @@ from sklearn.neighbors import KNeighborsRegressor
24
  # =========================
25
  # Config
26
  # =========================
27
- DATA_DIR = "data"
28
- os.makedirs(DATA_DIR, exist_ok=True)
29
-
30
  DEFAULT_LOOKBACK_YEARS = 5
31
  MAX_TICKERS = 30
32
  MARKET_TICKER = "VOO" # proxy for market portfolio
@@ -125,6 +133,7 @@ def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
125
  return prices.pct_change().dropna()
126
 
127
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
 
128
  symbols = [s.strip().upper() for s in symbols if s and isinstance(s, str)]
129
  base = [s for s in symbols if s != MARKET_TICKER]
130
  px = fetch_prices_monthly(base + [MARKET_TICKER], years)
@@ -206,12 +215,14 @@ def portfolio_stats(weights: Dict[str, float],
206
  # Efficient (CML) alternatives
207
  # =========================
208
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
 
209
  if sigma_mkt <= 1e-12:
210
  return 0.0, 1.0, rf_ann
211
  a = sigma_target / sigma_mkt
212
  return a, 1.0 - a, rf_ann + a * erp_ann
213
 
214
  def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
 
215
  if abs(erp_ann) <= 1e-12:
216
  return 0.0, 1.0, 0.0
217
  a = (mu_target - rf_ann) / erp_ann
@@ -248,11 +259,14 @@ def plot_cml(
248
  plt.scatter([0.0], [_pct_arr(rf_ann)], label="Risk-free", zorder=5)
249
  plt.scatter([_pct_arr(sigma_mkt)], [_pct_arr(rf_ann + erp_ann)], label="Market (VOO)", zorder=5)
250
 
 
251
  plt.scatter([_pct_arr(pt_sigma_hist)], [_pct_arr(pt_mu_capm)], label="Your portfolio (CAPM)", zorder=6)
252
 
 
253
  plt.scatter([_pct_arr(same_sigma_sigma)], [_pct_arr(same_sigma_mu)], label="Efficient: same σ", zorder=5)
254
  plt.scatter([_pct_arr(same_mu_sigma)], [_pct_arr(same_mu_mu)], label="Efficient: same μ", zorder=5)
255
 
 
256
  plt.plot([_pct_arr(pt_sigma_hist), _pct_arr(same_sigma_sigma)],
257
  [_pct_arr(pt_mu_capm), _pct_arr(same_sigma_mu)],
258
  ls="--", lw=1.1, alpha=0.7, color="gray")
@@ -336,6 +350,9 @@ def mmr_select(query_emb: np.ndarray,
336
  cand_embs: np.ndarray,
337
  k: int = 3,
338
  lambda_param: float = MMR_LAMBDA) -> List[int]:
 
 
 
339
  if cand_embs.shape[0] <= k:
340
  return list(range(cand_embs.shape[0]))
341
  sim_to_query = st_util.cos_sim(query_emb, cand_embs).cpu().numpy().reshape(-1)
@@ -483,157 +500,183 @@ def _weights_dict_from_row(r: pd.Series) -> Dict[str, float]:
483
  return {k: v / gross for k, v in wmap.items()}
484
 
485
  def compute(lookback_years: int,
486
- table: Optional[pd.DataFrame],
487
  risk_bucket: str,
488
  horizon_years: float):
489
 
490
- if table is None or len(table) == 0:
491
- return (None, "Add at least one ticker", "", pd.DataFrame(columns=POS_COLS),
492
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
493
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
494
- pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions yet.")
495
-
496
- df = table.copy().dropna()
497
- df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
498
- df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
499
-
500
- symbols = [t for t in df["ticker"].tolist() if t]
501
- symbols = validate_tickers(symbols, lookback_years)
502
- if len(symbols) == 0:
503
- return (None, "Could not validate any tickers", "Universe invalid",
504
- pd.DataFrame(columns=POS_COLS),
505
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
506
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
507
- pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions.")
508
-
509
- universe = sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER]))
510
- df = df[df["ticker"].isin(symbols)].copy()
511
- amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
512
- gross_amt = sum(abs(v) for v in amounts.values())
513
- if gross_amt <= 1e-9:
514
- return (None, "All amounts are zero", "Universe ok", pd.DataFrame(columns=POS_COLS),
515
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
516
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
517
- pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions.")
518
-
519
- weights = {k: v / gross_amt for k, v in amounts.items()}
520
-
521
- rf_code = fred_series_for_horizon(horizon_years)
522
- rf_ann = fetch_fred_yield_annual(rf_code)
523
- moms = estimate_all_moments_aligned(universe, lookback_years, rf_ann)
524
- betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
525
-
526
- beta_p, er_capm_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
527
-
528
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
529
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_capm_p, rf_ann, erp_ann, sigma_mkt)
530
-
531
- eff_same_sigma_tbl = _table_from_weights({MARKET_TICKER: a_sigma, BILLS_TICKER: b_sigma}, gross_amt)
532
- eff_same_mu_tbl = _table_from_weights({MARKET_TICKER: a_mu, BILLS_TICKER: b_mu}, gross_amt)
533
-
534
- synth = build_synth_dataset(universe, covA, betas, rf_ann, erp_ann, n_rows=N_SYNTH, seed=777)
535
-
536
- median_sigma = float(synth["sigma"].median()) if len(synth) else sigma_p
537
- low_max = max(float(synth["sigma"].min()), median_sigma - 0.05)
538
- high_min = median_sigma + 0.05
539
-
540
- if risk_bucket == "Low":
541
- cand_df = synth[synth["sigma"] <= low_max].copy()
542
- elif risk_bucket == "High":
543
- cand_df = synth[synth["sigma"] >= high_min].copy()
544
- else:
545
- cand_df = synth[(synth["sigma"] > low_max) & (synth["sigma"] < high_min)].copy()
546
-
547
- if len(cand_df) == 0:
548
- cand_df = synth.copy()
549
-
550
- embed = get_embedder()
551
- cand_sentences = cand_df.apply(row_to_sentence, axis=1).tolist()
552
-
553
- cur_pairs = ", ".join([f"{k}:{v:+.2f}" for k, v in sorted(weights.items())])
554
- q_sentence = f"user portfolio ({risk_bucket} risk); capm_target {er_capm_p:.4f}; sigma_hist {sigma_p:.4f}; exposures {cur_pairs}"
555
-
556
- cand_embs = embed.encode(cand_sentences, convert_to_tensor=True, normalize_embeddings=True, batch_size=64, show_progress_bar=False)
557
- q_emb = embed.encode([q_sentence], convert_to_tensor=True, normalize_embeddings=True)[0]
558
-
559
- sims = st_util.cos_sim(q_emb, cand_embs)[0]
560
- top_idx = sims.topk(k=min(MMR_K, len(cand_df))).indices.cpu().numpy().tolist()
561
- shortlist_embs = cand_embs[top_idx]
562
- mmr_local = mmr_select(q_emb, shortlist_embs, k=3, lambda_param=MMR_LAMBDA)
563
- chosen = [top_idx[i] for i in mmr_local]
564
- recs = cand_df.iloc[chosen].reset_index(drop=True)
565
-
566
- suggs = []
567
- for _, r in recs.iterrows():
568
- wmap = _weights_dict_from_row(r)
569
- suggs.append({
570
- "weights": wmap,
571
- "er_capm": float(r["er_capm"]),
572
- "sigma": float(r["sigma"]),
573
- "beta": float(r["beta"]),
574
- "table": _table_from_weights(wmap, gross_amt)
575
- })
576
-
577
- img = plot_cml(
578
- rf_ann, erp_ann, sigma_mkt,
579
- sigma_p, er_capm_p,
580
- same_sigma_sigma=sigma_p, same_sigma_mu=mu_eff_sigma,
581
- same_mu_sigma=sigma_eff_mu, same_mu_mu=er_capm_p
582
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
 
584
- rows = []
585
- for t in universe:
586
- if t == MARKET_TICKER:
587
- continue
588
- rows.append({
589
- "ticker": t,
590
- "amount_usd": round(amounts.get(t, 0.0), 2),
591
- "weight_exposure": round(weights.get(t, 0.0), 6),
592
- "beta": round(betas.get(t, np.nan), 4) if t != MARKET_TICKER else 1.0
593
- })
594
- pos_table = pd.DataFrame(rows, columns=POS_COLS)
595
-
596
- info_lines = []
597
- info_lines.append("### Inputs")
598
- info_lines.append(f"- Lookback years **{int(lookback_years)}**")
599
- info_lines.append(f"- Horizon years **{int(round(horizon_years))}**")
600
- info_lines.append(f"- Risk-free **{fmt_pct(rf_ann)}** from **{rf_code}**")
601
- info_lines.append(f"- Market ERP **{fmt_pct(erp_ann)}**")
602
- info_lines.append(f"- Market σ **{fmt_pct(sigma_mkt)}**")
603
- info_lines.append("")
604
- info_lines.append("### Your portfolio (plotted as CAPM return, historical σ)")
605
- info_lines.append(f"- Beta **{beta_p:.2f}**")
606
- info_lines.append(f"- σ (historical) **{fmt_pct(sigma_p)}**")
607
- info_lines.append(f"- E[return] (CAPM / SML) **{fmt_pct(er_capm_p)}**")
608
- info_lines.append("")
609
- info_lines.append("### Efficient alternatives on CML")
610
- info_lines.append(f"- Same σ → Market **{a_sigma:.2f}**, Bills **{b_sigma:.2f}**, Return **{fmt_pct(mu_eff_sigma)}**")
611
- info_lines.append(f"- Same μ Market **{a_mu:.2f}**, Bills **{b_mu:.2f}**, σ **{fmt_pct(sigma_eff_mu)}**")
612
- info_lines.append("")
613
- info_lines.append(f"### Dataset-based suggestions (risk: **{risk_bucket}**)")
614
- info_lines.append("Use the selector to flip between **Pick #1 / #2 / #3**. Table shows % exposure and $ amounts.")
615
-
616
- current_idx = 1
617
- current = suggs[current_idx - 1] if suggs else None
618
- current_tbl = current["table"] if current else pd.DataFrame(columns=SUG_COLS)
619
- current_msg = ("Pick #1 "
620
- f"E[μ] {fmt_pct(current['er_capm'])}, σ {fmt_pct(current['sigma'])}, β {current['beta']:.2f}"
621
- ) if current else "No suggestion."
622
-
623
- return (img,
624
- "\n".join(info_lines),
625
- f"Universe set to {', '.join(universe)}",
626
- pos_table,
627
- suggs[0]["table"] if len(suggs) >= 1 else pd.DataFrame(columns=SUG_COLS),
628
- suggs[1]["table"] if len(suggs) >= 2 else pd.DataFrame(columns=SUG_COLS),
629
- suggs[2]["table"] if len(suggs) >= 3 else pd.DataFrame(columns=SUG_COLS),
630
- eff_same_sigma_tbl,
631
- eff_same_mu_tbl,
632
- json.dumps([{
633
- "er_capm": s["er_capm"], "sigma": s["sigma"], "beta": s["beta"],
634
- } for s in suggs]),
635
- current_idx,
636
- current_msg)
 
 
 
 
 
 
 
637
 
638
  def on_pick_change(idx: int, meta_json: str):
639
  try:
@@ -674,9 +717,13 @@ with gr.Blocks(title="Efficient Portfolio Advisor", css="""
674
  datatype=["str", "number"],
675
  row_count=0,
676
  col_count=(2, "fixed"),
677
- wrap=True
 
678
  )
679
 
 
 
 
680
  with gr.Column(scale=1):
681
  horizon = gr.Slider(1, 30, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Investment horizon (years)")
682
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback (years) for β and σ")
@@ -687,13 +734,15 @@ with gr.Blocks(title="Efficient Portfolio Advisor", css="""
687
  search_btn.click(fn=do_search, inputs=q, outputs=[search_note, matches])
688
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
689
  table.change(fn=lock_ticker_column, inputs=table, outputs=table)
690
- horizon.change(fn=set_horizon, inputs=horizon, outputs=[rf_msg, gr.State()])
 
 
691
 
692
  with gr.Tab("Results"):
693
  with gr.Row():
694
  with gr.Column(scale=1):
695
  plot = gr.Image(label="Capital Market Line", type="pil")
696
- summary = gr.Markdown(label="Summary")
697
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
698
 
699
  with gr.Column(scale=1):
@@ -702,14 +751,15 @@ with gr.Blocks(title="Efficient Portfolio Advisor", css="""
702
  headers=POS_COLS,
703
  datatype=["str", "number", "number", "number"],
704
  col_count=(len(POS_COLS), "fixed"),
705
- interactive=False
 
706
  )
707
 
708
  gr.Markdown("### Recommendations (always from embeddings)")
709
  with gr.Row():
710
- sugg1 = gr.Dataframe(label="Pick #1", interactive=False)
711
- sugg2 = gr.Dataframe(label="Pick #2", interactive=False)
712
- sugg3 = gr.Dataframe(label="Pick #3", interactive=False)
713
 
714
  with gr.Row():
715
  pick_idx = gr.Slider(1, 3, value=1, step=1, label="Carousel: show Pick #")
@@ -717,8 +767,8 @@ with gr.Blocks(title="Efficient Portfolio Advisor", css="""
717
  pick_msg = gr.Markdown("")
718
 
719
  gr.Markdown("### Efficient alternatives on the CML")
720
- eff_same_sigma_tbl = gr.Dataframe(label="Efficient: Same σ", interactive=False)
721
- eff_same_mu_tbl = gr.Dataframe(label="Efficient: Same μ", interactive=False)
722
 
723
  run_btn.click(
724
  fn=compute,
@@ -747,9 +797,5 @@ with gr.Blocks(title="Efficient Portfolio Advisor", css="""
747
  )
748
 
749
  if __name__ == "__main__":
750
- # Important for HF Spaces proxy
751
- demo.launch(
752
- server_name="0.0.0.0",
753
- server_port=int(os.environ.get("PORT", 7860)),
754
- show_error=True,
755
- )
 
1
+ # app.py
2
  import os, io, math, json, warnings
3
  warnings.filterwarnings("ignore")
4
 
5
+ # --- make common caches writable even on locked-down containers ---
6
+ APP_ROOT = os.path.abspath(os.path.dirname(__file__))
7
+ DATA_DIR = os.path.join(APP_ROOT, "data")
8
+ os.makedirs(DATA_DIR, exist_ok=True)
9
+
10
+ # Matplotlib cache
11
+ os.environ.setdefault("MPLCONFIGDIR", os.path.join(DATA_DIR, ".mplconfig"))
12
+ os.makedirs(os.environ["MPLCONFIGDIR"], exist_ok=True)
13
+
14
+ # Hugging Face / Sentence Transformers caches
15
+ os.environ.setdefault("HF_HOME", os.path.join(DATA_DIR, ".huggingface"))
16
+ os.environ.setdefault("HUGGINGFACE_HUB_CACHE", os.path.join(DATA_DIR, ".huggingface", "hub"))
17
+ os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", os.path.join(DATA_DIR, ".sentencetransformers"))
18
+ for d in [os.environ["HF_HOME"], os.environ["HUGGINGFACE_HUB_CACHE"], os.environ["SENTENCE_TRANSFORMERS_HOME"]]:
19
+ os.makedirs(d, exist_ok=True)
20
 
21
  from typing import List, Tuple, Dict, Optional
22
 
 
35
  # =========================
36
  # Config
37
  # =========================
 
 
 
38
  DEFAULT_LOOKBACK_YEARS = 5
39
  MAX_TICKERS = 30
40
  MARKET_TICKER = "VOO" # proxy for market portfolio
 
133
  return prices.pct_change().dropna()
134
 
135
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
136
+ """Return subset of symbols that have enough data over lookback."""
137
  symbols = [s.strip().upper() for s in symbols if s and isinstance(s, str)]
138
  base = [s for s in symbols if s != MARKET_TICKER]
139
  px = fetch_prices_monthly(base + [MARKET_TICKER], years)
 
215
  # Efficient (CML) alternatives
216
  # =========================
217
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
218
+ """Weights (a on Market, b on Bills) and expected return on CML with same sigma."""
219
  if sigma_mkt <= 1e-12:
220
  return 0.0, 1.0, rf_ann
221
  a = sigma_target / sigma_mkt
222
  return a, 1.0 - a, rf_ann + a * erp_ann
223
 
224
  def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
225
+ """Weights (a on Market, b on Bills) and sigma on CML with same expected return."""
226
  if abs(erp_ann) <= 1e-12:
227
  return 0.0, 1.0, 0.0
228
  a = (mu_target - rf_ann) / erp_ann
 
259
  plt.scatter([0.0], [_pct_arr(rf_ann)], label="Risk-free", zorder=5)
260
  plt.scatter([_pct_arr(sigma_mkt)], [_pct_arr(rf_ann + erp_ann)], label="Market (VOO)", zorder=5)
261
 
262
+ # Your portfolio point uses CAPM expected return + historical sigma
263
  plt.scatter([_pct_arr(pt_sigma_hist)], [_pct_arr(pt_mu_capm)], label="Your portfolio (CAPM)", zorder=6)
264
 
265
+ # Efficient matches
266
  plt.scatter([_pct_arr(same_sigma_sigma)], [_pct_arr(same_sigma_mu)], label="Efficient: same σ", zorder=5)
267
  plt.scatter([_pct_arr(same_mu_sigma)], [_pct_arr(same_mu_mu)], label="Efficient: same μ", zorder=5)
268
 
269
+ # helper guides
270
  plt.plot([_pct_arr(pt_sigma_hist), _pct_arr(same_sigma_sigma)],
271
  [_pct_arr(pt_mu_capm), _pct_arr(same_sigma_mu)],
272
  ls="--", lw=1.1, alpha=0.7, color="gray")
 
350
  cand_embs: np.ndarray,
351
  k: int = 3,
352
  lambda_param: float = MMR_LAMBDA) -> List[int]:
353
+ """
354
+ Maximal Marginal Relevance: pick k diverse-yet-relevant indices.
355
+ """
356
  if cand_embs.shape[0] <= k:
357
  return list(range(cand_embs.shape[0]))
358
  sim_to_query = st_util.cos_sim(query_emb, cand_embs).cpu().numpy().reshape(-1)
 
500
  return {k: v / gross for k, v in wmap.items()}
501
 
502
  def compute(lookback_years: int,
503
+ table_input,
504
  risk_bucket: str,
505
  horizon_years: float):
506
 
507
+ try:
508
+ # --- coerce incoming table to DataFrame (Gradio 5 may pass list-like) ---
509
+ if table_input is None:
510
+ df = pd.DataFrame(columns=["ticker", "amount_usd"])
511
+ elif isinstance(table_input, pd.DataFrame):
512
+ df = table_input.copy()
513
+ else:
514
+ df = pd.DataFrame(table_input, columns=["ticker", "amount_usd"])
515
+
516
+ df = df.dropna(how="all")
517
+ if df.empty:
518
+ return (None, "Add at least one ticker", "", pd.DataFrame(columns=POS_COLS),
519
+ pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
520
+ pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
521
+ pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions yet.")
522
+
523
+ # --- sanitize
524
+ df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
525
+ df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
526
+
527
+ symbols = [t for t in df["ticker"].tolist() if t]
528
+ symbols = validate_tickers(symbols, int(lookback_years))
529
+ if len(symbols) == 0:
530
+ return (None, "Could not validate any tickers", "Universe invalid",
531
+ pd.DataFrame(columns=POS_COLS),
532
+ pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
533
+ pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
534
+ pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions.")
535
+
536
+ # --- universe & amounts
537
+ universe = sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER]))
538
+ df = df[df["ticker"].isin(symbols)].copy()
539
+ amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
540
+ gross_amt = sum(abs(v) for v in amounts.values())
541
+ if gross_amt <= 1e-9:
542
+ return (None, "All amounts are zero", "Universe ok", pd.DataFrame(columns=POS_COLS),
543
+ pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
544
+ pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
545
+ pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions.")
546
+
547
+ weights = {k: v / gross_amt for k, v in amounts.items()}
548
+
549
+ # --- risk free & moments
550
+ rf_code = fred_series_for_horizon(horizon_years)
551
+ rf_ann = fetch_fred_yield_annual(rf_code)
552
+ moms = estimate_all_moments_aligned(universe, int(lookback_years), rf_ann)
553
+ betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
554
+
555
+ # --- portfolio stats (CAPM return + historical sigma)
556
+ beta_p, er_capm_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
557
+
558
+ # --- efficient alternatives on CML
559
+ a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
560
+ a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_capm_p, rf_ann, erp_ann, sigma_mkt)
561
+
562
+ eff_same_sigma_tbl = _table_from_weights({MARKET_TICKER: a_sigma, BILLS_TICKER: b_sigma}, gross_amt)
563
+ eff_same_mu_tbl = _table_from_weights({MARKET_TICKER: a_mu, BILLS_TICKER: b_mu}, gross_amt)
564
+
565
+ # --- build synthetic dataset (based ONLY on this universe)
566
+ synth = build_synth_dataset(universe, covA, betas, rf_ann, erp_ann, n_rows=N_SYNTH, seed=777)
567
+
568
+ # --- risk buckets by sigma (absolute percentage points around median)
569
+ median_sigma = float(synth["sigma"].median()) if len(synth) else sigma_p
570
+ low_max = max(float(synth["sigma"].min()), median_sigma - 0.05) # 5% below median
571
+ high_min = median_sigma + 0.05
572
+
573
+ if risk_bucket == "Low":
574
+ cand_df = synth[synth["sigma"] <= low_max].copy()
575
+ elif risk_bucket == "High":
576
+ cand_df = synth[synth["sigma"] >= high_min].copy()
577
+ else: # Medium
578
+ cand_df = synth[(synth["sigma"] > low_max) & (synth["sigma"] < high_min)].copy()
579
+
580
+ if len(cand_df) == 0:
581
+ cand_df = synth.copy()
582
+
583
+ # --- embed all candidates + query, and pick 3 via MMR for diversity
584
+ embed = get_embedder()
585
+ cand_sentences = cand_df.apply(row_to_sentence, axis=1).tolist()
586
+
587
+ cur_pairs = ", ".join([f"{k}:{v:+.2f}" for k, v in sorted(weights.items())])
588
+ q_sentence = f"user portfolio ({risk_bucket} risk); capm_target {er_capm_p:.4f}; sigma_hist {sigma_p:.4f}; exposures {cur_pairs}"
589
+
590
+ cand_embs = embed.encode(cand_sentences, convert_to_tensor=True, normalize_embeddings=True, batch_size=64, show_progress_bar=False)
591
+ q_emb = embed.encode([q_sentence], convert_to_tensor=True, normalize_embeddings=True)[0]
592
+
593
+ sims = st_util.cos_sim(q_emb, cand_embs)[0]
594
+ top_idx = sims.topk(k=min(MMR_K, len(cand_df))).indices.cpu().numpy().tolist()
595
+ shortlist_embs = cand_embs[top_idx]
596
+ mmr_local = mmr_select(q_emb, shortlist_embs, k=3, lambda_param=MMR_LAMBDA)
597
+ chosen = [top_idx[i] for i in mmr_local]
598
+ recs = cand_df.iloc[chosen].reset_index(drop=True)
599
+
600
+ # --- suggestion tables for 3 picks
601
+ suggs = []
602
+ for _, r in recs.iterrows():
603
+ wmap = _weights_dict_from_row(r)
604
+ suggs.append({
605
+ "weights": wmap,
606
+ "er_capm": float(r["er_capm"]),
607
+ "sigma": float(r["sigma"]),
608
+ "beta": float(r["beta"]),
609
+ "table": _table_from_weights(wmap, gross_amt)
610
+ })
611
+
612
+ # --- plot
613
+ img = plot_cml(
614
+ rf_ann, erp_ann, sigma_mkt,
615
+ sigma_p, er_capm_p,
616
+ same_sigma_sigma=sigma_p, same_sigma_mu=mu_eff_sigma,
617
+ same_mu_sigma=sigma_eff_mu, same_mu_mu=er_capm_p
618
+ )
619
 
620
+ # --- positions table (computed)
621
+ rows = []
622
+ for t in universe:
623
+ if t == MARKET_TICKER:
624
+ continue
625
+ rows.append({
626
+ "ticker": t,
627
+ "amount_usd": round(amounts.get(t, 0.0), 2),
628
+ "weight_exposure": round(weights.get(t, 0.0), 6),
629
+ "beta": round(betas.get(t, np.nan), 4) if t != MARKET_TICKER else 1.0
630
+ })
631
+ pos_table = pd.DataFrame(rows, columns=POS_COLS)
632
+
633
+ # --- info summary
634
+ info_lines = []
635
+ info_lines.append("### Inputs")
636
+ info_lines.append(f"- Lookback years **{int(lookback_years)}**")
637
+ info_lines.append(f"- Horizon years **{int(round(horizon_years))}**")
638
+ info_lines.append(f"- Risk-free **{fmt_pct(rf_ann)}** from **{rf_code}**")
639
+ info_lines.append(f"- Market ERP **{fmt_pct(erp_ann)}**")
640
+ info_lines.append(f"- Market σ **{fmt_pct(sigma_mkt)}**")
641
+ info_lines.append("")
642
+ info_lines.append("### Your portfolio (plotted as CAPM return, historical σ)")
643
+ info_lines.append(f"- Beta **{beta_p:.2f}**")
644
+ info_lines.append(f"- σ (historical) **{fmt_pct(sigma_p)}**")
645
+ info_lines.append(f"- E[return] (CAPM / SML) **{fmt_pct(er_capm_p)}**")
646
+ info_lines.append("")
647
+ info_lines.append("### Efficient alternatives on CML")
648
+ info_lines.append(f"- Same σ → Market **{a_sigma:.2f}**, Bills **{b_sigma:.2f}**, Return **{fmt_pct(mu_eff_sigma)}**")
649
+ info_lines.append(f"- Same μ → Market **{a_mu:.2f}**, Bills **{b_mu:.2f}**, σ **{fmt_pct(sigma_eff_mu)}**")
650
+ info_lines.append("")
651
+ info_lines.append(f"### Dataset-based suggestions (risk: **{risk_bucket}**)")
652
+ info_lines.append("Use the selector to flip between **Pick #1 / #2 / #3**. Table shows % exposure and $ amounts.")
653
+
654
+ current_idx = 1
655
+ current = suggs[current_idx - 1] if suggs else None
656
+ current_tbl = current["table"] if current else pd.DataFrame(columns=SUG_COLS)
657
+ current_msg = ("Pick #1 "
658
+ f"E[μ] {fmt_pct(current['er_capm'])}, σ {fmt_pct(current['sigma'])}, β {current['beta']:.2f}"
659
+ ) if current else "No suggestion."
660
+
661
+ return (img,
662
+ "\n".join(info_lines),
663
+ f"Universe set to {', '.join(universe)}",
664
+ pos_table,
665
+ suggs[0]["table"] if len(suggs) >= 1 else pd.DataFrame(columns=SUG_COLS),
666
+ suggs[1]["table"] if len(suggs) >= 2 else pd.DataFrame(columns=SUG_COLS),
667
+ suggs[2]["table"] if len(suggs) >= 3 else pd.DataFrame(columns=SUG_COLS),
668
+ eff_same_sigma_tbl,
669
+ eff_same_mu_tbl,
670
+ json.dumps([{
671
+ "er_capm": s["er_capm"], "sigma": s["sigma"], "beta": s["beta"],
672
+ } for s in suggs]),
673
+ current_idx,
674
+ current_msg)
675
+
676
+ except Exception as e:
677
+ msg = f"⚠️ Compute failed: {type(e).__name__}: {e}"
678
+ empty = pd.DataFrame()
679
+ return (None, msg, "Error", empty, empty, empty, empty, empty, empty, "[]", 1, msg)
680
 
681
  def on_pick_change(idx: int, meta_json: str):
682
  try:
 
717
  datatype=["str", "number"],
718
  row_count=0,
719
  col_count=(2, "fixed"),
720
+ wrap=True,
721
+ type="pandas" # important for Gradio 5
722
  )
723
 
724
+ # Handy sample
725
+ sample_btn = gr.Button("Load sample portfolio")
726
+
727
  with gr.Column(scale=1):
728
  horizon = gr.Slider(1, 30, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Investment horizon (years)")
729
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback (years) for β and σ")
 
734
  search_btn.click(fn=do_search, inputs=q, outputs=[search_note, matches])
735
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
736
  table.change(fn=lock_ticker_column, inputs=table, outputs=table)
737
+ horizon.change(fn=set_horizon, inputs=horizon, outputs=[rf_msg, gr.State()]) # rf_msg + silent
738
+ sample_btn.click(lambda: pd.DataFrame({"ticker": ["AAPL","MSFT","VOO"], "amount_usd": [3000, 2000, 5000]}),
739
+ inputs=None, outputs=table)
740
 
741
  with gr.Tab("Results"):
742
  with gr.Row():
743
  with gr.Column(scale=1):
744
  plot = gr.Image(label="Capital Market Line", type="pil")
745
+ summary = gr.Markdown()
746
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
747
 
748
  with gr.Column(scale=1):
 
751
  headers=POS_COLS,
752
  datatype=["str", "number", "number", "number"],
753
  col_count=(len(POS_COLS), "fixed"),
754
+ interactive=False,
755
+ type="pandas"
756
  )
757
 
758
  gr.Markdown("### Recommendations (always from embeddings)")
759
  with gr.Row():
760
+ sugg1 = gr.Dataframe(label="Pick #1", interactive=False, type="pandas")
761
+ sugg2 = gr.Dataframe(label="Pick #2", interactive=False, type="pandas")
762
+ sugg3 = gr.Dataframe(label="Pick #3", interactive=False, type="pandas")
763
 
764
  with gr.Row():
765
  pick_idx = gr.Slider(1, 3, value=1, step=1, label="Carousel: show Pick #")
 
767
  pick_msg = gr.Markdown("")
768
 
769
  gr.Markdown("### Efficient alternatives on the CML")
770
+ eff_same_sigma_tbl = gr.Dataframe(label="Efficient: Same σ", interactive=False, type="pandas")
771
+ eff_same_mu_tbl = gr.Dataframe(label="Efficient: Same μ", interactive=False, type="pandas")
772
 
773
  run_btn.click(
774
  fn=compute,
 
797
  )
798
 
799
  if __name__ == "__main__":
800
+ # On HF Spaces you don't need share=True; binding to 0.0.0.0 is enough.
801
+ demo.launch(server_name="0.0.0.0", server_port=7860)