Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import spacy
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from docx import Document
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
|
| 7 |
+
# Cargar el modelo de SpaCy en espa帽ol
|
| 8 |
+
nlp = spacy.load('es_core_news_sm')
|
| 9 |
+
|
| 10 |
+
# Funci贸n para extraer nombres de personas
|
| 11 |
+
def extract_names_from_docx(docx_file):
|
| 12 |
+
# Cargar el archivo DOCX
|
| 13 |
+
document = Document(docx_file)
|
| 14 |
+
full_text = []
|
| 15 |
+
for para in document.paragraphs:
|
| 16 |
+
full_text.append(para.text)
|
| 17 |
+
|
| 18 |
+
# Unir todo el texto
|
| 19 |
+
text = ' '.join(full_text)
|
| 20 |
+
|
| 21 |
+
# Procesar el texto con SpaCy
|
| 22 |
+
doc = nlp(text)
|
| 23 |
+
|
| 24 |
+
# Extraer los nombres de las personas (entidades PERSON)
|
| 25 |
+
persons = [ent.text for ent in doc.ents if ent.label_ == 'PER']
|
| 26 |
+
|
| 27 |
+
# Eliminar duplicados
|
| 28 |
+
persons = list(set(persons))
|
| 29 |
+
|
| 30 |
+
# Crear un DataFrame
|
| 31 |
+
df = pd.DataFrame(persons, columns=['Nombres'])
|
| 32 |
+
|
| 33 |
+
# Guardar el resultado en un archivo Excel en memoria
|
| 34 |
+
output = BytesIO()
|
| 35 |
+
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
| 36 |
+
df.to_excel(writer, index=False)
|
| 37 |
+
output.seek(0)
|
| 38 |
+
|
| 39 |
+
return output, "nombres_personas.xlsx"
|
| 40 |
+
|
| 41 |
+
# Interfaz de Gradio
|
| 42 |
+
iface = gr.Interface(
|
| 43 |
+
fn=extract_names_from_docx,
|
| 44 |
+
inputs=gr.File(file_types=[".docx"]),
|
| 45 |
+
outputs=[gr.File(), "text"],
|
| 46 |
+
title="Extractor de Nombres",
|
| 47 |
+
description="Sube un archivo .docx y extrae los nombres de las personas usando NLP con SpaCy. Descarga el resultado en un archivo Excel."
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Iniciar la aplicaci贸n
|
| 51 |
+
iface.launch()
|