Tulitula commited on
Commit
f8757bd
·
verified ·
1 Parent(s): d148815

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +38 -608
Dockerfile CHANGED
@@ -1,608 +1,38 @@
1
- # app.py
2
- import os, io, math, time, warnings
3
- warnings.filterwarnings("ignore")
4
-
5
- # --- make matplotlib headless & writable ---
6
- import matplotlib
7
- matplotlib.use("Agg")
8
- os.environ.setdefault("MPLCONFIGDIR", "/home/user/.config/matplotlib")
9
-
10
- from typing import List, Tuple, Dict, Optional
11
-
12
- import numpy as np
13
- import pandas as pd
14
- import matplotlib.pyplot as plt
15
- from PIL import Image
16
- import requests
17
- import yfinance as yf
18
- import gradio as gr
19
-
20
- # ---------------- config ----------------
21
- DATA_DIR = "data"
22
- os.makedirs(DATA_DIR, exist_ok=True)
23
-
24
- MAX_TICKERS = 30
25
- DEFAULT_LOOKBACK_YEARS = 10
26
- MARKET_TICKER = "VOO"
27
-
28
- SYNTH_ROWS = 1000 # size of generated dataset for suggestions
29
-
30
- # Globals that update with horizon changes
31
- HORIZON_YEARS = 10
32
- RF_CODE = "DGS10"
33
- RF_ANN = 0.0375 # updated at launch
34
-
35
- # ---------------- helpers ----------------
36
- def fred_series_for_horizon(years: float) -> str:
37
- y = max(1.0, min(100.0, float(years)))
38
- if y <= 2: return "DGS2"
39
- if y <= 3: return "DGS3"
40
- if y <= 5: return "DGS5"
41
- if y <= 7: return "DGS7"
42
- if y <= 10: return "DGS10"
43
- if y <= 20: return "DGS20"
44
- return "DGS30"
45
-
46
- def fetch_fred_yield_annual(code: str) -> float:
47
- url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
48
- try:
49
- r = requests.get(url, timeout=10)
50
- r.raise_for_status()
51
- df = pd.read_csv(io.StringIO(r.text))
52
- s = pd.to_numeric(df.iloc[:, 1], errors="coerce").dropna()
53
- return float(s.iloc[-1] / 100.0) if len(s) else 0.03
54
- except Exception:
55
- return 0.03
56
-
57
- def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
58
- tickers = list(dict.fromkeys([t.upper().strip() for t in tickers]))
59
- start = (pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)).date()
60
- end = pd.Timestamp.today(tz="UTC").date()
61
-
62
- df = yf.download(
63
- tickers,
64
- start=start,
65
- end=end,
66
- interval="1mo",
67
- auto_adjust=True,
68
- actions=False,
69
- progress=False,
70
- group_by="column",
71
- threads=False,
72
- )
73
-
74
- # Normalize to wide frame of prices (one column per ticker)
75
- if isinstance(df, pd.Series):
76
- df = df.to_frame()
77
- if isinstance(df.columns, pd.MultiIndex):
78
- lvl0 = [str(x) for x in df.columns.get_level_values(0).unique()]
79
- if "Close" in lvl0:
80
- df = df["Close"]
81
- elif "Adj Close" in lvl0:
82
- df = df["Adj Close"]
83
- else:
84
- df = df.xs(df.columns.levels[0][-1], axis=1, level=0, drop_level=True)
85
-
86
- cols = [c for c in tickers if c in df.columns]
87
- out = df[cols].dropna(how="all").fillna(method="ffill")
88
- return out
89
-
90
- def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
91
- return prices.pct_change().dropna()
92
-
93
- def yahoo_search(query: str):
94
- if not query or not str(query).strip():
95
- return []
96
- url = "https://query1.finance.yahoo.com/v1/finance/search"
97
- params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
98
- headers = {"User-Agent": "Mozilla/5.0"}
99
- try:
100
- r = requests.get(url, params=params, headers=headers, timeout=10)
101
- r.raise_for_status()
102
- data = r.json()
103
- out = []
104
- for q in data.get("quotes", []):
105
- sym = q.get("symbol")
106
- name = q.get("shortname") or q.get("longname") or ""
107
- exch = q.get("exchDisp") or ""
108
- if sym and sym.isascii():
109
- out.append(f"{sym} | {name} | {exch}")
110
- if not out:
111
- out = [f"{query.strip().upper()} | typed symbol | n/a"]
112
- return out[:10]
113
- except Exception:
114
- return [f"{query.strip().upper()} | typed symbol | n/a"]
115
-
116
- def validate_tickers(symbols: List[str], years: int) -> List[str]:
117
- base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols]) if s]
118
- px = fetch_prices_monthly(base + [MARKET_TICKER], years)
119
- ok = [s for s in base if s in px.columns]
120
- if MARKET_TICKER not in px.columns:
121
- return []
122
- return ok
123
-
124
- # -------------- aligned moments --------------
125
- def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
126
- uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
127
- tickers = uniq + [MARKET_TICKER]
128
- px = fetch_prices_monthly(tickers, years)
129
- rets = monthly_returns(px)
130
- cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
131
- R = rets[cols].dropna(how="any")
132
- return R.loc[:, ~R.columns.duplicated()]
133
-
134
- def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
135
- R = get_aligned_monthly_returns(symbols, years)
136
- if MARKET_TICKER not in R.columns or len(R) < 3:
137
- raise ValueError("Not enough aligned data with market proxy.")
138
- rf_m = rf_ann / 12.0
139
-
140
- m = R[MARKET_TICKER]
141
- if isinstance(m, pd.DataFrame):
142
- m = m.iloc[:, 0].squeeze()
143
-
144
- mu_m_ann = float(m.mean() * 12.0)
145
- sigma_m_ann = float(m.std(ddof=1) * math.sqrt(12.0))
146
- erp_ann = float(mu_m_ann - rf_ann)
147
-
148
- ex_m = m - rf_m
149
- var_m = float(np.var(ex_m.values, ddof=1))
150
- var_m = max(var_m, 1e-9)
151
-
152
- betas: Dict[str, float] = {}
153
- for s in [c for c in R.columns if c != MARKET_TICKER]:
154
- ex_s = R[s] - rf_m
155
- cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
156
- betas[s] = cov_sm / var_m
157
-
158
- betas[MARKET_TICKER] = 1.0
159
-
160
- asset_cols = [c for c in R.columns if c != MARKET_TICKER]
161
- cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
162
- covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
163
-
164
- return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
165
-
166
- def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
167
- return float(rf_ann + beta * erp_ann)
168
-
169
- def portfolio_stats(weights: Dict[str, float],
170
- cov_ann: pd.DataFrame,
171
- betas: Dict[str, float],
172
- rf_ann: float,
173
- erp_ann: float) -> Tuple[float, float, float]:
174
- tickers = list(weights.keys())
175
- w = np.array([weights[t] for t in tickers], dtype=float)
176
- gross = float(np.sum(np.abs(w)))
177
- if gross <= 1e-12:
178
- return 0.0, rf_ann, 0.0
179
- w_expo = w / gross
180
- beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
181
- mu_capm = capm_er(beta_p, rf_ann, erp_ann)
182
- cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
183
- sigma_hist = float(max(w_expo.T @ cov @ w_expo, 0.0)) ** 0.5
184
- return beta_p, mu_capm, sigma_hist # <-- X uses HIST sigma
185
-
186
- def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
187
- if sigma_mkt <= 1e-12:
188
- return 0.0, 1.0, rf_ann
189
- a = sigma_target / sigma_mkt
190
- return a, 1.0 - a, rf_ann + a * erp_ann
191
-
192
- def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
193
- if abs(erp_ann) <= 1e-12:
194
- return 0.0, 1.0, rf_ann
195
- a = (mu_target - rf_ann) / erp_ann
196
- return a, 1.0 - a, abs(a) * sigma_mkt
197
-
198
- # -------------- plotting (CAPM on CML) --------------
199
- def _pct(x):
200
- return np.asarray(x, dtype=float) * 100.0
201
-
202
- def plot_cml(
203
- rf_ann, erp_ann, sigma_mkt,
204
- sigma_hist, mu_capm,
205
- mu_same_sigma, sigma_same_mu,
206
- sugg_mu=None, sugg_sigma=None
207
- ) -> Image.Image:
208
- fig = plt.figure(figsize=(6, 4), dpi=120)
209
-
210
- xmax = max(0.3, sigma_mkt * 2.2, (sigma_hist or 0.0) * 1.6, (sugg_sigma or 0.0) * 1.6)
211
- xs = np.linspace(0, xmax, 200)
212
- cml = rf_ann + (erp_ann / max(sigma_mkt, 1e-9)) * xs
213
-
214
- plt.plot(_pct(xs), _pct(cml), label="CML via Market", linewidth=1.8)
215
- plt.scatter([_pct(0)], [_pct(rf_ann)], label="Risk-free")
216
- plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market")
217
-
218
- # YOUR point: X = historical sigma, Y = CAPM expected return
219
- plt.scatter([_pct(sigma_hist)], [_pct(mu_capm)], label="Your CAPM point", marker="o")
220
-
221
- # Efficient references on CML
222
- plt.scatter([_pct(sigma_hist)], [_pct(mu_same_sigma)], label="Efficient: same σ", marker="^")
223
- plt.scatter([_pct(sigma_same_mu)], [_pct(mu_capm)], label="Efficient: same E[r]", marker="v")
224
-
225
- if sugg_mu is not None and sugg_sigma is not None:
226
- plt.scatter([_pct(sugg_sigma)], [_pct(sugg_mu)], label="Selected Suggestion", marker="X", s=60)
227
-
228
- plt.xlabel("σ (annualized, %)")
229
- plt.ylabel("Expected return (annual, %)")
230
- plt.legend(loc="best")
231
- plt.tight_layout()
232
-
233
- buf = io.BytesIO()
234
- plt.savefig(buf, format="png")
235
- plt.close(fig)
236
- buf.seek(0)
237
- return Image.open(buf)
238
-
239
- # -------------- synthetic dataset --------------
240
- def build_synthetic_dataset(universe: List[str],
241
- covA: pd.DataFrame,
242
- betas: Dict[str, float],
243
- rf_ann: float,
244
- erp_ann: float,
245
- sigma_mkt: float,
246
- n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
247
- rng = np.random.default_rng(12345)
248
- assets = [t for t in universe if t != MARKET_TICKER]
249
- if not assets:
250
- assets = [MARKET_TICKER]
251
-
252
- rows = []
253
- for _ in range(n_rows):
254
- k = int(rng.integers(low=2, high=min(8, len(universe)) + 1))
255
- picks = list(rng.choice(universe, size=k, replace=False))
256
- w = rng.dirichlet(np.ones(k))
257
- beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
258
- mu_capm = capm_er(beta_p, rf_ann, erp_ann)
259
- sub = covA.reindex(index=picks, columns=picks).fillna(0.0).to_numpy()
260
- sigma_hist = float(max(w.T @ sub @ w, 0.0)) ** 0.5
261
- sigma_capm = abs(beta_p) * sigma_mkt
262
-
263
- rows.append({
264
- "tickers": ",".join(picks),
265
- "weights": ",".join(f"{x:.6f}" for x in w),
266
- "beta": beta_p,
267
- "mu_capm": mu_capm,
268
- "sigma_hist": sigma_hist,
269
- "sigma_capm": sigma_capm
270
- })
271
- return pd.DataFrame(rows)
272
-
273
- def _band_bounds(sigma_mkt: float, band: str) -> Tuple[float, float]:
274
- band = (band or "Medium").strip().lower()
275
- if band.startswith("low"):
276
- return 0.0, 0.8 * sigma_mkt
277
- if band.startswith("high"):
278
- return 1.2 * sigma_mkt, 3.0 * sigma_mkt
279
- return 0.8 * sigma_mkt, 1.2 * sigma_mkt
280
-
281
- def top3_by_return_in_band(df: pd.DataFrame, band: str, sigma_mkt: float) -> pd.DataFrame:
282
- lo, hi = _band_bounds(sigma_mkt, band)
283
- pick = df[(df["sigma_capm"] >= lo) & (df["sigma_capm"] <= hi)].copy()
284
- if pick.empty:
285
- pick = df.copy()
286
- pick = pick.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
287
- pick.insert(0, "pick", [1, 2, 3][: len(pick)])
288
- return pick
289
-
290
- # -------------- optional: embeddings rerank --------------
291
- def rerank_with_embeddings(top3: pd.DataFrame, band: str) -> pd.DataFrame:
292
- try:
293
- from sentence_transformers import SentenceTransformer
294
- model = SentenceTransformer("FinLang/finance-embeddings-investopedia")
295
- prompt = {
296
- "low": "low risk conservative portfolio stable diversified market exposure",
297
- "medium": "balanced medium risk diversified portfolio",
298
- "high": "high risk growth aggressive portfolio higher expected return"
299
- }[(band or "medium").lower() if (band or "medium").lower() in {"low","medium","high"} else "medium"]
300
-
301
- cand_texts = []
302
- for _, r in top3.iterrows():
303
- cand_texts.append(
304
- f"portfolio with tickers {r['tickers']} having beta {float(r['beta']):.2f}, "
305
- f"expected return {float(r['mu_capm']):.3f}, sigma {float(r['sigma_capm']):.3f}"
306
- )
307
-
308
- q = model.encode([prompt])
309
- c = model.encode(cand_texts)
310
- sims = (q @ c.T) / (np.linalg.norm(q) * np.linalg.norm(c, axis=1, keepdims=False))
311
- order = np.argsort(-sims.ravel())
312
- return top3.iloc[order].reset_index(drop=True)
313
- except Exception:
314
- return top3
315
-
316
- # -------------- UI helpers --------------
317
- def empty_positions_df():
318
- return pd.DataFrame(columns=["ticker", "amount_usd", "weight_exposure", "beta"])
319
-
320
- def empty_suggestion_df():
321
- return pd.DataFrame(columns=["ticker", "weight_%", "amount_$"])
322
-
323
- def set_horizon(years: float):
324
- y = max(1.0, min(100.0, float(years)))
325
- code = fred_series_for_horizon(y)
326
- rf = fetch_fred_yield_annual(code)
327
- global HORIZON_YEARS, RF_CODE, RF_ANN
328
- HORIZON_YEARS = y
329
- RF_CODE = code
330
- RF_ANN = rf
331
- return f"Risk-free series {code}. Latest annual rate {rf:.2%}."
332
-
333
- def search_tickers_cb(q: str):
334
- opts = yahoo_search(q)
335
- note = "Select a symbol and click 'Add selected to portfolio'." if opts else "No matches."
336
- return note, gr.update(choices=opts, value=None)
337
-
338
- def add_symbol(selection: str, table: Optional[pd.DataFrame]):
339
- if not selection:
340
- return table if isinstance(table, pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"]), "Pick a row in Matches first."
341
- symbol = selection.split("|")[0].strip().upper()
342
-
343
- current = []
344
- if isinstance(table, pd.DataFrame) and not table.empty:
345
- current = [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
346
- tickers = current if symbol in current else current + [symbol]
347
-
348
- val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
349
- tickers = [t for t in tickers if t in val]
350
-
351
- amt_map = {}
352
- if isinstance(table, pd.DataFrame) and not table.empty:
353
- for _, r in table.iterrows():
354
- t = str(r.get("ticker", "")).upper()
355
- if t in tickers:
356
- amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
357
-
358
- new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
359
- if len(new_table) > MAX_TICKERS:
360
- new_table = new_table.iloc[:MAX_TICKERS]
361
- return new_table, f"Reached max of {MAX_TICKERS}."
362
- return new_table, f"Added {symbol}."
363
-
364
- def lock_ticker_column(tb: Optional[pd.DataFrame]):
365
- if not isinstance(tb, pd.DataFrame) or tb.empty:
366
- return pd.DataFrame(columns=["ticker", "amount_usd"])
367
- tickers = [str(x).upper() for x in tb["ticker"].tolist()]
368
- amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
369
- val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
370
- tickers = [t for t in tickers if t in val]
371
- amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
372
- return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
373
-
374
- # -------------- main compute --------------
375
- UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
376
-
377
- def compute(
378
- years_lookback: int,
379
- table: Optional[pd.DataFrame],
380
- risk_band: str,
381
- use_embeddings: bool,
382
- pick_idx: int
383
- ):
384
- print("Compute: start")
385
- # sanitize table
386
- if isinstance(table, pd.DataFrame):
387
- df = table.copy()
388
- else:
389
- df = pd.DataFrame(columns=["ticker", "amount_usd"])
390
- df = df.dropna(how="all")
391
- if "ticker" not in df.columns: df["ticker"] = []
392
- if "amount_usd" not in df.columns: df["amount_usd"] = []
393
- df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
394
- df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
395
-
396
- symbols = [t for t in df["ticker"].tolist() if t]
397
- if len(symbols) == 0:
398
- return None, "Add at least one ticker.", "Universe empty.", empty_positions_df(), empty_suggestion_df(), None, gr.update()
399
-
400
- symbols = validate_tickers(symbols, years_lookback)
401
- print("Compute: validated", symbols)
402
- if len(symbols) == 0:
403
- return None, "Could not validate any tickers.", "Universe invalid.", empty_positions_df(), empty_suggestion_df(), None, gr.update()
404
-
405
- global UNIVERSE
406
- UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
407
-
408
- df = df[df["ticker"].isin(symbols)].copy()
409
- amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
410
- rf_ann = RF_ANN
411
-
412
- # Moments
413
- moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
414
- betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
415
- print("Compute: moments ok; sigma_mkt=", sigma_mkt, "erp=", erp_ann)
416
-
417
- # Weights
418
- gross = sum(abs(v) for v in amounts.values())
419
- if gross <= 1e-12:
420
- return None, "All amounts are zero.", "Universe ok.", empty_positions_df(), empty_suggestion_df(), None, gr.update()
421
- weights = {k: v / gross for k, v in amounts.items()}
422
-
423
- # Portfolio stats (X uses historical sigma; Y uses CAPM E[r])
424
- beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
425
- sigma_capm = abs(beta_p) * sigma_mkt
426
-
427
- # Efficient alternatives (on CML)
428
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
429
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
430
-
431
- # Synthetic dataset & suggestions
432
- synth = build_synthetic_dataset(UNIVERSE, covA, betas, rf_ann, erp_ann, sigma_mkt, n_rows=SYNTH_ROWS)
433
- csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
434
- synth.to_csv(csv_path, index=False)
435
-
436
- top3 = top3_by_return_in_band(synth, risk_band, sigma_mkt)
437
- if use_embeddings:
438
- top3 = rerank_with_embeddings(top3, risk_band)
439
- if top3.empty:
440
- top3 = synth.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
441
- top3.insert(0, "pick", [1, 2, 3][: len(top3)])
442
-
443
- idx = max(1, min(3, int(pick_idx))) - 1
444
- row = top3.iloc[idx]
445
-
446
- sugg_mu = float(row["mu_capm"])
447
- sugg_sigma = float(row["sigma_capm"])
448
-
449
- # suggestion holdings (% and $)
450
- ts = [t.strip() for t in str(row["tickers"]).split(",")]
451
- ws = [float(x) for x in str(row["weights"]).split(",")]
452
- s = sum(ws) if ws else 1.0
453
- ws = [max(0.0, w) / s for w in ws]
454
- budget = gross if gross > 0 else 1.0
455
- sugg_table = pd.DataFrame(
456
- [{"ticker": t, "weight_%": round(w*100.0, 2), "amount_$": round(w*budget, 0)} for t, w in zip(ts, ws)],
457
- columns=["ticker", "weight_%", "amount_$"]
458
- )
459
-
460
- # positions table
461
- pos_table = pd.DataFrame(
462
- [{
463
- "ticker": t,
464
- "amount_usd": amounts.get(t, 0.0),
465
- "weight_exposure": weights.get(t, 0.0),
466
- "beta": 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
467
- } for t in symbols],
468
- columns=["ticker", "amount_usd", "weight_exposure", "beta"]
469
- )
470
-
471
- # plot (CAPM on CML; your point uses sigma_hist on X)
472
- img = plot_cml(
473
- rf_ann, erp_ann, sigma_mkt,
474
- sigma_hist, mu_capm,
475
- mu_same_sigma=mu_eff_sigma, sigma_same_mu=sigma_eff_mu,
476
- sugg_mu=sugg_mu, sugg_sigma=sugg_sigma
477
- )
478
-
479
- info = "\n".join([
480
- "### Inputs",
481
- f"- Lookback years {years_lookback}",
482
- f"- Horizon years {int(round(HORIZON_YEARS))}",
483
- f"- Risk-free {rf_ann:.2%} from {RF_CODE}",
484
- f"- Market ERP {erp_ann:.2%}",
485
- f"- Market σ {sigma_mkt:.2%}",
486
- "",
487
- "### Your portfolio (CAPM on CML axes)",
488
- f"- Beta {beta_p:.2f}",
489
- f"- Expected return (CAPM / SML) {mu_capm:.2%}",
490
- f"- σ (historical) {sigma_hist:.2%}",
491
- f"- σ on CML for same β (|β|×σ_mkt) {sigma_capm:.2%}",
492
- "",
493
- "### Efficient alternatives on CML",
494
- f"- Same σ as your portfolio (historical): Market weight {a_sigma:.2f}, Bills weight {b_sigma:.2f}, return {mu_eff_sigma:.2%}",
495
- f"- Same return (CAPM): Market weight {a_mu:.2f}, Bills weight {b_mu:.2f}, σ {sigma_eff_mu:.2%}",
496
- "",
497
- "### Dataset-based suggestions (risk: " + risk_band + ")",
498
- f"- Showing Pick **#{idx+1}** → CAPM return {sugg_mu:.2%}, CAPM σ {sugg_sigma:.2%}",
499
- "",
500
- "_Plot shows CAPM E[r] vs σ; your point uses historical σ; efficient references are market/bills on the CML._"
501
- ])
502
-
503
- uni_msg = f"Universe set to: {', '.join(UNIVERSE)}"
504
- print("Compute: done")
505
- return img, info, uni_msg, pos_table, sugg_table, csv_path, gr.update(label=f"Pick #{idx+1} of 3")
506
-
507
- # -------------- UI --------------
508
- def inc_pick(i: int): return min(3, max(1, int(i or 1) + 1))
509
- def dec_pick(i: int): return max(1, min(3, int(i or 1) - 1))
510
-
511
- with gr.Blocks(title="Efficient Portfolio Advisor", analytics_enabled=False) as demo:
512
- gr.Markdown(
513
- "## Efficient Portfolio Advisor\n"
514
- "Search symbols, enter **dollar amounts**, set horizon. Returns use Yahoo Finance monthly data; risk-free from FRED. "
515
- "Plot shows **CAPM point (E[r]) vs historical σ** plus efficient CML points."
516
- )
517
-
518
- with gr.Row():
519
- with gr.Column(scale=1):
520
- q = gr.Textbox(label="Search symbol")
521
- search_note = gr.Markdown()
522
- matches = gr.Dropdown(choices=[], label="Matches")
523
- search_btn = gr.Button("Search")
524
- add_btn = gr.Button("Add selected to portfolio")
525
-
526
- gr.Markdown("### Portfolio positions (enter $ amounts; negatives allowed for shorts)")
527
- table = gr.Dataframe(
528
- headers=["ticker", "amount_usd"],
529
- datatype=["str", "number"],
530
- type="pandas",
531
- row_count=0,
532
- col_count=(2, "fixed")
533
- )
534
-
535
- horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
536
- lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for betas & covariances")
537
-
538
- gr.Markdown("### Suggestions")
539
- risk_band = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk tolerance")
540
- use_emb = gr.Checkbox(value=True, label="Use finance embeddings to refine picks")
541
-
542
- with gr.Row():
543
- prev_btn = gr.Button("◀ Prev")
544
- pick_idx = gr.Number(value=1, precision=0, label="Carousel")
545
- next_btn = gr.Button("Next ▶")
546
-
547
- run_btn = gr.Button("Compute (build dataset & suggest)")
548
- with gr.Column(scale=1):
549
- plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
550
- summary = gr.Markdown(label="Inputs & Results")
551
- universe_msg = gr.Textbox(label="Universe status", interactive=False)
552
- positions = gr.Dataframe(
553
- label="Computed positions",
554
- headers=["ticker", "amount_usd", "weight_exposure", "beta"],
555
- datatype=["str", "number", "number", "number"],
556
- type="pandas",
557
- col_count=(4, "fixed"),
558
- value=empty_positions_df(),
559
- interactive=False
560
- )
561
- sugg_table = gr.Dataframe(
562
- label="Selected suggestion (carousel) — holdings shown in % and $",
563
- headers=["ticker", "weight_%", "amount_$"],
564
- datatype=["str", "number", "number"],
565
- type="pandas",
566
- col_count=(3, "fixed"),
567
- value=empty_suggestion_df(),
568
- interactive=False
569
- )
570
- dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
571
-
572
- # wire search / add / locking / horizon
573
- search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
574
- add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
575
- table.change(fn=lock_ticker_column, inputs=table, outputs=table)
576
- horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
577
-
578
- # carousel buttons update pick index and then recompute
579
- prev_btn.click(fn=dec_pick, inputs=pick_idx, outputs=pick_idx).then(
580
- fn=compute,
581
- inputs=[lookback, table, risk_band, use_emb, pick_idx],
582
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl, pick_idx]
583
- )
584
- next_btn.click(fn=inc_pick, inputs=pick_idx, outputs=pick_idx).then(
585
- fn=compute,
586
- inputs=[lookback, table, risk_band, use_emb, pick_idx],
587
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl, pick_idx]
588
- )
589
-
590
- # main compute
591
- run_btn.click(
592
- fn=compute,
593
- inputs=[lookback, table, risk_band, use_emb, pick_idx],
594
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl, pick_idx]
595
- )
596
-
597
- # initialize risk-free at launch
598
- RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
599
- RF_ANN = fetch_fred_yield_annual(RF_CODE)
600
-
601
- if __name__ == "__main__":
602
- # IMPORTANT for Spaces/Docker: bind to 0.0.0.0 and the correct PORT
603
- demo.queue(concurrency_count=8).launch(
604
- server_name="0.0.0.0",
605
- server_port=int(os.environ.get("PORT", "7860")),
606
- show_error=True,
607
- share=False
608
- )
 
1
+ FROM python:3.10-slim-bullseye
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PIP_NO_CACHE_DIR=1 \
6
+ PATH="/home/user/.local/bin:${PATH}" \
7
+ MPLCONFIGDIR="/home/user/.config/matplotlib" \
8
+ HF_HOME="/home/user/.cache/huggingface" \
9
+ SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/sentencetransformers" \
10
+ GRADIO_SERVER_NAME="0.0.0.0" \
11
+ GRADIO_SERVER_PORT="7860"
12
+
13
+ # System deps for plotting, ffmpeg, etc.
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ git git-lfs ffmpeg \
16
+ libglib2.0-0 libsm6 libxext6 libxrender1 libgl1 \
17
+ fonts-dejavu-core \
18
+ && rm -rf /var/lib/apt/lists/* \
19
+ && git lfs install
20
+
21
+ # Non-root user so pip installs land in /home/user/.local
22
+ RUN useradd -m -u 1000 user
23
+ USER user
24
+ WORKDIR /home/user/app
25
+
26
+ # Pre-create writable caches
27
+ RUN mkdir -p /home/user/.config/matplotlib \
28
+ /home/user/.cache/huggingface/hub \
29
+ /home/user/.cache/sentencetransformers \
30
+ /home/user/.cache/pip
31
+
32
+ COPY --chown=user:user requirements.txt ./requirements.txt
33
+ RUN python -m pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
34
+
35
+ COPY --chown=user:user . .
36
+
37
+ EXPOSE 7860
38
+ CMD ["python", "app.py"]