de-Rodrigo commited on
Commit
7affc5d
·
1 Parent(s): 003c3e3

Try Visualizing Llava Embeddings

Browse files
app.py CHANGED
@@ -191,6 +191,45 @@ def load_embeddings(model, version, embedding_prefix, weight_factor):
191
  "synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
192
  "pretrained": df_pretratrained}
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  else:
196
  st.error("Modelo no reconocido")
@@ -1667,7 +1706,7 @@ def run_model(model_name):
1667
 
1668
  def main():
1669
  config_style()
1670
- tabs = st.tabs(["Donut", "Idefics2" , "Paligemma"])
1671
  with tabs[0]:
1672
  st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
1673
  run_model("Donut")
@@ -1677,6 +1716,9 @@ def main():
1677
  with tabs[2]:
1678
  st.markdown('<h2 class="sub-title">Paligemma 🤗</h2>', unsafe_allow_html=True)
1679
  run_model("Paligemma")
 
 
 
1680
 
1681
  if __name__ == "__main__":
1682
  model_options = [
 
191
  "synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
192
  "pretrained": df_pretratrained}
193
 
194
+ elif model == "Llava":
195
+ df_real = pd.read_csv(f"data/llava/{version}/{embedding_prefix}/de_Rodrigo_merit_secret_all_{weight_factor}embeddings.csv")
196
+ #TODO Embeddings de Paligemma se mantienen para evitar error
197
+ df_par = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-paragraph-degradation-seq_{weight_factor}embeddings.csv")
198
+ df_line = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-line-degradation-seq_{weight_factor}embeddings.csv")
199
+ df_seq = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-seq_{weight_factor}embeddings.csv")
200
+ df_rot = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-rotation-degradation-seq_{weight_factor}embeddings.csv")
201
+ df_zoom = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-zoom-degradation-seq_{weight_factor}embeddings.csv")
202
+ df_render = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-render-seq_{weight_factor}embeddings.csv")
203
+
204
+ # Cargar ambos subconjuntos pretrained y combinarlos
205
+ # TODO Pretrained de idefics2, se mantienen para evitar error, pero se debe meter los de paligemma
206
+ df_pretratrained_PDFA = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_PDFA_{weight_factor}embeddings.csv")
207
+ df_pretratrained_IDL = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_IDL_{weight_factor}embeddings.csv")
208
+ df_pretratrained = pd.concat([df_pretratrained_PDFA, df_pretratrained_IDL], ignore_index=True)
209
+
210
+ # Asignar etiquetas de versión
211
+ df_real["version"] = "real"
212
+ df_par["version"] = "synthetic"
213
+ df_line["version"] = "synthetic"
214
+ df_seq["version"] = "synthetic"
215
+ df_rot["version"] = "synthetic"
216
+ df_zoom["version"] = "synthetic"
217
+ df_render["version"] = "synthetic"
218
+ df_pretratrained["version"] = "pretrained"
219
+
220
+ # Asignar fuente (source)
221
+ df_par["source"] = "es-digital-paragraph-degradation-seq"
222
+ df_line["source"] = "es-digital-line-degradation-seq"
223
+ df_seq["source"] = "es-digital-seq"
224
+ df_rot["source"] = "es-digital-rotation-degradation-seq"
225
+ df_zoom["source"] = "es-digital-zoom-degradation-seq"
226
+ df_render["source"] = "es-render-seq"
227
+ df_pretratrained["source"] = "pretrained"
228
+
229
+ return {"real": df_real,
230
+ "synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
231
+ "pretrained": df_pretratrained}
232
+
233
 
234
  else:
235
  st.error("Modelo no reconocido")
 
1706
 
1707
  def main():
1708
  config_style()
1709
+ tabs = st.tabs(["Donut", "Idefics2" , "Paligemma", "Llava"])
1710
  with tabs[0]:
1711
  st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
1712
  run_model("Donut")
 
1716
  with tabs[2]:
1717
  st.markdown('<h2 class="sub-title">Paligemma 🤗</h2>', unsafe_allow_html=True)
1718
  run_model("Paligemma")
1719
+ with tabs[3]:
1720
+ st.markdown('<h2 class="sub-title">Llava 🤗</h2>', unsafe_allow_html=True)
1721
+ run_model("Llava")
1722
 
1723
  if __name__ == "__main__":
1724
  model_options = [
data/{llava_de_Rodrigo_merit_secret_all_embeddings.csv → llava/vanilla/averaged/de_Rodrigo_merit_secret_all_embeddings.csv} RENAMED
File without changes