Spaces:
Sleeping
Sleeping
Commit
·
7affc5d
1
Parent(s):
003c3e3
Try Visualizing Llava Embeddings
Browse files
app.py
CHANGED
|
@@ -191,6 +191,45 @@ def load_embeddings(model, version, embedding_prefix, weight_factor):
|
|
| 191 |
"synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
|
| 192 |
"pretrained": df_pretratrained}
|
| 193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
else:
|
| 196 |
st.error("Modelo no reconocido")
|
|
@@ -1667,7 +1706,7 @@ def run_model(model_name):
|
|
| 1667 |
|
| 1668 |
def main():
|
| 1669 |
config_style()
|
| 1670 |
-
tabs = st.tabs(["Donut", "Idefics2" , "Paligemma"])
|
| 1671 |
with tabs[0]:
|
| 1672 |
st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
|
| 1673 |
run_model("Donut")
|
|
@@ -1677,6 +1716,9 @@ def main():
|
|
| 1677 |
with tabs[2]:
|
| 1678 |
st.markdown('<h2 class="sub-title">Paligemma 🤗</h2>', unsafe_allow_html=True)
|
| 1679 |
run_model("Paligemma")
|
|
|
|
|
|
|
|
|
|
| 1680 |
|
| 1681 |
if __name__ == "__main__":
|
| 1682 |
model_options = [
|
|
|
|
| 191 |
"synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
|
| 192 |
"pretrained": df_pretratrained}
|
| 193 |
|
| 194 |
+
elif model == "Llava":
|
| 195 |
+
df_real = pd.read_csv(f"data/llava/{version}/{embedding_prefix}/de_Rodrigo_merit_secret_all_{weight_factor}embeddings.csv")
|
| 196 |
+
#TODO Embeddings de Paligemma se mantienen para evitar error
|
| 197 |
+
df_par = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-paragraph-degradation-seq_{weight_factor}embeddings.csv")
|
| 198 |
+
df_line = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-line-degradation-seq_{weight_factor}embeddings.csv")
|
| 199 |
+
df_seq = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-seq_{weight_factor}embeddings.csv")
|
| 200 |
+
df_rot = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-rotation-degradation-seq_{weight_factor}embeddings.csv")
|
| 201 |
+
df_zoom = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-zoom-degradation-seq_{weight_factor}embeddings.csv")
|
| 202 |
+
df_render = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-render-seq_{weight_factor}embeddings.csv")
|
| 203 |
+
|
| 204 |
+
# Cargar ambos subconjuntos pretrained y combinarlos
|
| 205 |
+
# TODO Pretrained de idefics2, se mantienen para evitar error, pero se debe meter los de paligemma
|
| 206 |
+
df_pretratrained_PDFA = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_PDFA_{weight_factor}embeddings.csv")
|
| 207 |
+
df_pretratrained_IDL = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_IDL_{weight_factor}embeddings.csv")
|
| 208 |
+
df_pretratrained = pd.concat([df_pretratrained_PDFA, df_pretratrained_IDL], ignore_index=True)
|
| 209 |
+
|
| 210 |
+
# Asignar etiquetas de versión
|
| 211 |
+
df_real["version"] = "real"
|
| 212 |
+
df_par["version"] = "synthetic"
|
| 213 |
+
df_line["version"] = "synthetic"
|
| 214 |
+
df_seq["version"] = "synthetic"
|
| 215 |
+
df_rot["version"] = "synthetic"
|
| 216 |
+
df_zoom["version"] = "synthetic"
|
| 217 |
+
df_render["version"] = "synthetic"
|
| 218 |
+
df_pretratrained["version"] = "pretrained"
|
| 219 |
+
|
| 220 |
+
# Asignar fuente (source)
|
| 221 |
+
df_par["source"] = "es-digital-paragraph-degradation-seq"
|
| 222 |
+
df_line["source"] = "es-digital-line-degradation-seq"
|
| 223 |
+
df_seq["source"] = "es-digital-seq"
|
| 224 |
+
df_rot["source"] = "es-digital-rotation-degradation-seq"
|
| 225 |
+
df_zoom["source"] = "es-digital-zoom-degradation-seq"
|
| 226 |
+
df_render["source"] = "es-render-seq"
|
| 227 |
+
df_pretratrained["source"] = "pretrained"
|
| 228 |
+
|
| 229 |
+
return {"real": df_real,
|
| 230 |
+
"synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
|
| 231 |
+
"pretrained": df_pretratrained}
|
| 232 |
+
|
| 233 |
|
| 234 |
else:
|
| 235 |
st.error("Modelo no reconocido")
|
|
|
|
| 1706 |
|
| 1707 |
def main():
|
| 1708 |
config_style()
|
| 1709 |
+
tabs = st.tabs(["Donut", "Idefics2" , "Paligemma", "Llava"])
|
| 1710 |
with tabs[0]:
|
| 1711 |
st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
|
| 1712 |
run_model("Donut")
|
|
|
|
| 1716 |
with tabs[2]:
|
| 1717 |
st.markdown('<h2 class="sub-title">Paligemma 🤗</h2>', unsafe_allow_html=True)
|
| 1718 |
run_model("Paligemma")
|
| 1719 |
+
with tabs[3]:
|
| 1720 |
+
st.markdown('<h2 class="sub-title">Llava 🤗</h2>', unsafe_allow_html=True)
|
| 1721 |
+
run_model("Llava")
|
| 1722 |
|
| 1723 |
if __name__ == "__main__":
|
| 1724 |
model_options = [
|
data/{llava_de_Rodrigo_merit_secret_all_embeddings.csv → llava/vanilla/averaged/de_Rodrigo_merit_secret_all_embeddings.csv}
RENAMED
|
File without changes
|