Corin1998 commited on
Commit
04bec24
·
verified ·
1 Parent(s): 6997332

Create save/features.py

Browse files
Files changed (1) hide show
  1. save/features.py +38 -0
save/features.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd, numpy as np
2
+ from typing import Tuple
3
+
4
+ def clip_by_quantile(s: pd.Series, lo=0.01, hi=0.99):
5
+ ql, qh = s.quantile(lo), s.quantile(hi)
6
+ return s.clip(lower=ql, upper=qh)
7
+
8
+ def build_features(df: pd.DataFrame) -> pd.DataFrame:
9
+ df = df.copy()
10
+
11
+ # 基本的な数値→float化(エラーはNaN)
12
+ for c in [
13
+ "sales","operating_income","net_income",
14
+ "current_assets","current_liabilities",
15
+ "total_assets","total_liabilities","total_equity",
16
+ "operating_cash_flow",
17
+ "market_cagr_pct","key_product_count","key_product_growing_count",
18
+ ]:
19
+ if c in df.columns:
20
+ df[c] = pd.to_numeric(df[c], errors="coerce")
21
+
22
+ # 比率特徴
23
+ df["opm"] = np.where(df["sales"]>0, df["operating_income"]/df["sales"], 0.0)
24
+ df["npm"] = np.where(df["sales"]>0, df["net_income"]/df["sales"], 0.0)
25
+ df["cur_ratio"] = np.where(df["current_liabilities"]>0, df["current_assets"]/df["current_liabilities"], np.nan)
26
+ df["eq_ratio"] = np.where(df["total_assets"]>0, df["total_equity"]/df["total_assets"], np.nan)
27
+ df["de"] = np.where(df["total_equity"]>0, df["total_liabilities"]/df["total_equity"], np.nan)
28
+ df["ocf_sales"] = np.where(df["sales"]>0, df["operating_cash_flow"]/df["sales"], 0.0)
29
+
30
+ # テキスト派生
31
+ df["prod_grow_ratio"] = np.where(df["key_product_count"]>0,
32
+ df["key_product_growing_count"]/df["key_product_count"], 0.0)
33
+
34
+ # 異常値クリップ(winsorize)
35
+ for c in ["opm","npm","cur_ratio","eq_ratio","de","ocf_sales","market_cagr_pct","prod_grow_ratio"]:
36
+ df[c] = clip_by_quantile(df[c].astype(float).fillna(0.0), 0.01, 0.99)
37
+
38
+ return df