Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,115 +1,240 @@
|
|
| 1 |
-
import
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import matplotlib.pyplot as plt
|
| 4 |
-
import seaborn as sns
|
| 5 |
import io
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
#
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
if
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
else:
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
#
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
|
|
|
|
|
|
|
|
|
| 2 |
import io
|
| 3 |
+
import unicodedata
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import plotly.express as px
|
| 7 |
+
|
| 8 |
+
# -----------------------------
|
| 9 |
+
# Configuración de la página
|
| 10 |
+
# -----------------------------
|
| 11 |
+
st.set_page_config(
|
| 12 |
+
page_title="Employee Count Dashboard",
|
| 13 |
+
page_icon="📊",
|
| 14 |
+
layout="wide",
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# -----------------------------
|
| 18 |
+
# Utilidades
|
| 19 |
+
# -----------------------------
|
| 20 |
+
def normalize(s: str) -> str:
|
| 21 |
+
"""Normaliza un nombre de columna: minúsculas, sin acentos, sin dobles espacios."""
|
| 22 |
+
s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("utf-8", "ignore")
|
| 23 |
+
return " ".join(s.lower().split())
|
| 24 |
+
|
| 25 |
+
def find_target_column(df: pd.DataFrame, target="extskhis_emp full name") -> str | None:
|
| 26 |
+
"""Encuentra la columna objetivo, siendo tolerante a acentos/espacios/caso."""
|
| 27 |
+
norm_map = {col: normalize(col) for col in df.columns}
|
| 28 |
+
for col, norm in norm_map.items():
|
| 29 |
+
if norm == normalize(target):
|
| 30 |
+
return col
|
| 31 |
+
# fallback: columnas muy parecidas
|
| 32 |
+
candidates = [c for c, n in norm_map.items() if "full" in n and "name" in n]
|
| 33 |
+
return candidates[0] if candidates else None
|
| 34 |
+
|
| 35 |
+
@st.cache_data(show_spinner=False)
|
| 36 |
+
def load_csv(file_obj) -> pd.DataFrame:
|
| 37 |
+
return pd.read_csv(file_obj)
|
| 38 |
+
|
| 39 |
+
@st.cache_data(show_spinner=False)
|
| 40 |
+
def load_sample(sample_path: str) -> pd.DataFrame | None:
|
| 41 |
+
if os.path.exists(sample_path):
|
| 42 |
+
return pd.read_csv(sample_path)
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
def pretty_number(n: int) -> str:
|
| 46 |
+
return f"{n:,}".replace(",", " ")
|
| 47 |
+
|
| 48 |
+
# -----------------------------
|
| 49 |
+
# Estilos (UI) – look limpio
|
| 50 |
+
# -----------------------------
|
| 51 |
+
CUSTOM_CSS = """
|
| 52 |
+
<style>
|
| 53 |
+
/* Fondo suave y tarjetas con glass effect */
|
| 54 |
+
.stApp { background: linear-gradient(180deg, #f8fafc 0%, #eef2f7 100%); }
|
| 55 |
+
.block-container { padding-top: 1.5rem; }
|
| 56 |
+
|
| 57 |
+
/* Tarjetas KPI */
|
| 58 |
+
.kpi-card {
|
| 59 |
+
border-radius: 14px;
|
| 60 |
+
padding: 18px 20px;
|
| 61 |
+
background: rgba(255, 255, 255, 0.75);
|
| 62 |
+
backdrop-filter: blur(6px);
|
| 63 |
+
border: 1px solid rgba(0,0,0,0.06);
|
| 64 |
+
box-shadow: 0 10px 20px -12px rgba(0,0,0,0.12);
|
| 65 |
+
}
|
| 66 |
+
.kpi-label { font-size: 0.85rem; color: #5b6573; margin-bottom: 6px; }
|
| 67 |
+
.kpi-value { font-size: 1.6rem; font-weight: 700; color: #111827; }
|
| 68 |
+
|
| 69 |
+
/* Contenedor principal */
|
| 70 |
+
.section-card {
|
| 71 |
+
border-radius: 16px;
|
| 72 |
+
padding: 20px;
|
| 73 |
+
background: #ffffff;
|
| 74 |
+
border: 1px solid #e5e7eb;
|
| 75 |
+
box-shadow: 0 12px 24px -16px rgba(0,0,0,0.18);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
/* Título con acento */
|
| 79 |
+
h1 span.accent {
|
| 80 |
+
background: linear-gradient(90deg, #2563eb, #06b6d4);
|
| 81 |
+
-webkit-background-clip: text;
|
| 82 |
+
-webkit-text-fill-color: transparent;
|
| 83 |
+
}
|
| 84 |
+
</style>
|
| 85 |
+
"""
|
| 86 |
+
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
|
| 87 |
+
|
| 88 |
+
# -----------------------------
|
| 89 |
+
# Sidebar (carga de datos)
|
| 90 |
+
# -----------------------------
|
| 91 |
+
st.sidebar.title("⚙️ Configuración")
|
| 92 |
+
|
| 93 |
+
uploaded = st.sidebar.file_uploader("Sube tu archivo CSV", type=["csv"])
|
| 94 |
+
sample_note = st.sidebar.empty()
|
| 95 |
+
|
| 96 |
+
# Carga de datos: CSV subido o fallback sample
|
| 97 |
+
df = None
|
| 98 |
+
source_label = ""
|
| 99 |
+
if uploaded is not None:
|
| 100 |
+
try:
|
| 101 |
+
# Usa bytes para mantener cache estable
|
| 102 |
+
data_bytes = uploaded.getvalue()
|
| 103 |
+
df = load_csv(io.BytesIO(data_bytes))
|
| 104 |
+
source_label = f"Fuente: Archivo subido — **{uploaded.name}**"
|
| 105 |
+
except Exception as e:
|
| 106 |
+
st.sidebar.error(f"Error al leer el CSV: {e}")
|
| 107 |
+
else:
|
| 108 |
+
# intenta cargar sample
|
| 109 |
+
df_sample = load_sample("data/sample.csv")
|
| 110 |
+
if df_sample is not None:
|
| 111 |
+
df = df_sample
|
| 112 |
+
source_label = "Fuente: `data/sample.csv` (muestra)"
|
| 113 |
+
sample_note.info("No subiste archivo. Mostrando un ejemplo desde `data/sample.csv`.")
|
| 114 |
else:
|
| 115 |
+
sample_note.warning("No subiste archivo y no existe `data/sample.csv`. Sube un CSV para continuar.")
|
| 116 |
+
|
| 117 |
+
# -----------------------------
|
| 118 |
+
# UI principal
|
| 119 |
+
# -----------------------------
|
| 120 |
+
st.title("📊 Employee Count Dashboard · <span class='accent'>EXTSKHIS_EMP FULL NAME</span>", anchor=False)
|
| 121 |
+
|
| 122 |
+
if df is None or df.empty:
|
| 123 |
+
st.info("Sube un CSV con la columna **EXTSKHIS_EMP FULL NAME** para ver el dashboard.")
|
| 124 |
+
st.stop()
|
| 125 |
+
|
| 126 |
+
target_col = find_target_column(df, "EXTSKHIS_EMP FULL NAME")
|
| 127 |
+
if target_col is None:
|
| 128 |
+
st.error("No se encontró la columna **EXTSKHIS_EMP FULL NAME** (o equivalente). Verifica los encabezados.")
|
| 129 |
+
st.write("Columnas detectadas:", list(df.columns))
|
| 130 |
+
st.stop()
|
| 131 |
+
|
| 132 |
+
# Limpieza básica del campo (opcional)
|
| 133 |
+
df[target_col] = df[target_col].astype(str).str.strip()
|
| 134 |
+
|
| 135 |
+
# -----------------------------
|
| 136 |
+
# Controles
|
| 137 |
+
# -----------------------------
|
| 138 |
+
with st.sidebar:
|
| 139 |
+
st.divider()
|
| 140 |
+
st.subheader("Filtros")
|
| 141 |
+
search = st.text_input("Filtrar por nombre (contiene)", placeholder="Ej: Maria, Juan...")
|
| 142 |
+
min_count = st.number_input("Mínimo de ocurrencias", min_value=1, value=1, step=1)
|
| 143 |
+
top_n = st.slider("Mostrar Top N", min_value=5, max_value=100, value=20, step=5)
|
| 144 |
+
sort_mode = st.radio("Orden", ["Por conteo (desc)", "Alfabético (A→Z)"], index=0)
|
| 145 |
+
|
| 146 |
+
# Aplica filtro de texto
|
| 147 |
+
df_filtered = df
|
| 148 |
+
if search:
|
| 149 |
+
s = search.lower()
|
| 150 |
+
df_filtered = df[df[target_col].str.lower().str.contains(s, na=False)]
|
| 151 |
+
|
| 152 |
+
# Agrega conteos
|
| 153 |
+
counts = (
|
| 154 |
+
df_filtered.groupby(target_col, dropna=False)
|
| 155 |
+
.size()
|
| 156 |
+
.reset_index(name="Count")
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# Filtra por mínimo de ocurrencias
|
| 160 |
+
counts = counts[counts["Count"] >= min_count]
|
| 161 |
+
|
| 162 |
+
# Ordena
|
| 163 |
+
if sort_mode == "Por conteo (desc)":
|
| 164 |
+
counts = counts.sort_values("Count", ascending=False)
|
| 165 |
+
else:
|
| 166 |
+
counts = counts.sort_values(target_col, ascending=True)
|
| 167 |
+
|
| 168 |
+
# Top N
|
| 169 |
+
counts_top = counts.head(top_n)
|
| 170 |
+
|
| 171 |
+
# -----------------------------
|
| 172 |
+
# KPIs
|
| 173 |
+
# -----------------------------
|
| 174 |
+
c1, c2, c3 = st.columns(3)
|
| 175 |
+
with c1:
|
| 176 |
+
st.markdown("<div class='kpi-card'>"
|
| 177 |
+
"<div class='kpi-label'>Registros totales</div>"
|
| 178 |
+
f"<div class='kpi-value'>{pretty_number(len(df))}</div>"
|
| 179 |
+
"</div>", unsafe_allow_html=True)
|
| 180 |
+
with c2:
|
| 181 |
+
st.markdown("<div class='kpi-card'>"
|
| 182 |
+
"<div class='kpi-label'>Nombres únicos</div>"
|
| 183 |
+
f"<div class='kpi-value'>{pretty_number(counts[target_col].nunique())}</div>"
|
| 184 |
+
"</div>", unsafe_allow_html=True)
|
| 185 |
+
with c3:
|
| 186 |
+
st.markdown("<div class='kpi-card'>"
|
| 187 |
+
"<div class='kpi-label'>Mostrando en gráfico</div>"
|
| 188 |
+
f"<div class='kpi-value'>{pretty_number(len(counts_top))}</div>"
|
| 189 |
+
"</div>", unsafe_allow_html=True)
|
| 190 |
+
|
| 191 |
+
# Fuente de datos
|
| 192 |
+
st.caption(source_label)
|
| 193 |
+
|
| 194 |
+
# -----------------------------
|
| 195 |
+
# Gráfico
|
| 196 |
+
# -----------------------------
|
| 197 |
+
st.markdown("### 🔎 Conteo por **EXTSKHIS_EMP FULL NAME**")
|
| 198 |
+
|
| 199 |
+
if counts_top.empty:
|
| 200 |
+
st.warning("No hay filas que cumplan los filtros actuales.")
|
| 201 |
+
else:
|
| 202 |
+
# Para mejorar legibilidad en barras, hacemos categoría ordenada
|
| 203 |
+
category_order = counts_top.sort_values(
|
| 204 |
+
"Count", ascending=False
|
| 205 |
+
)[target_col].tolist()
|
| 206 |
+
|
| 207 |
+
fig = px.bar(
|
| 208 |
+
counts_top,
|
| 209 |
+
x="Count",
|
| 210 |
+
y=target_col,
|
| 211 |
+
orientation="h",
|
| 212 |
+
text="Count",
|
| 213 |
+
category_orders={target_col: category_order},
|
| 214 |
+
height=600 if len(counts_top) <= 25 else 800,
|
| 215 |
+
)
|
| 216 |
+
fig.update_traces(textposition="outside", cliponaxis=False)
|
| 217 |
+
fig.update_layout(
|
| 218 |
+
xaxis_title="Conteo",
|
| 219 |
+
yaxis_title="Nombre",
|
| 220 |
+
margin=dict(l=10, r=10, t=30, b=10),
|
| 221 |
+
bargap=0.25,
|
| 222 |
+
)
|
| 223 |
+
st.plotly_chart(fig, use_container_width=True, theme="streamlit")
|
| 224 |
+
|
| 225 |
+
# -----------------------------
|
| 226 |
+
# Tabla de detalle
|
| 227 |
+
# -----------------------------
|
| 228 |
+
with st.expander("📄 Ver tabla de conteos"):
|
| 229 |
+
st.dataframe(counts.reset_index(drop=True), use_container_width=True)
|
| 230 |
+
|
| 231 |
+
# -----------------------------
|
| 232 |
+
# Descargar resultados
|
| 233 |
+
# -----------------------------
|
| 234 |
+
csv_bytes = counts.to_csv(index=False).encode("utf-8")
|
| 235 |
+
st.download_button(
|
| 236 |
+
"⬇️ Descargar conteos (CSV)",
|
| 237 |
+
data=csv_bytes,
|
| 238 |
+
file_name="employee_counts.csv",
|
| 239 |
+
mime="text/csv",
|
| 240 |
+
)
|