Spaces:
Sleeping
Sleeping
Shunfeng Zheng
commited on
Update 1_SpatialParse.py
Browse files- 1_SpatialParse.py +59 -30
1_SpatialParse.py
CHANGED
|
@@ -11,33 +11,36 @@ from spacy.tokens import Span, Doc, Token
|
|
| 11 |
from utils import geoutil
|
| 12 |
import urllib.parse
|
| 13 |
import os
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
import pydantic
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
-
# API_TOKEN = 'hf_'
|
| 23 |
-
# BACKEND_URL = "https://dsbb0707-dockerb2.hf.space/api/predict/"
|
| 24 |
-
# def call_backend(input_text):
|
| 25 |
-
# try:
|
| 26 |
-
# headers = {
|
| 27 |
-
# "Authorization": f"Bearer {API_TOKEN}"
|
| 28 |
-
# }
|
| 29 |
-
# response = requests.post(
|
| 30 |
-
# BACKEND_URL,
|
| 31 |
-
# headers=headers,
|
| 32 |
-
# json={"data": [input_text]},
|
| 33 |
-
# timeout=10
|
| 34 |
-
# )
|
| 35 |
-
# if response.status_code == 200:
|
| 36 |
-
# result = response.json()["data"][0]
|
| 37 |
-
# return f"✅ {result['result']}\n⏰ {result['timestamp']}"
|
| 38 |
-
# return f"❌ Backend Error (HTTP {response.status_code})"
|
| 39 |
-
# except Exception as e:
|
| 40 |
-
# return f"⚠️ Connection Error: {str(e)}"
|
| 41 |
|
| 42 |
|
| 43 |
colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
|
|
@@ -230,18 +233,44 @@ def extract_spatial_entities(text):
|
|
| 230 |
# # doc.to_disk("saved_doc.spacy")
|
| 231 |
# doc.to_disk("/tmp/saved_doc.spacy")
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
|
| 242 |
-
|
| 243 |
|
| 244 |
-
|
| 245 |
pass
|
| 246 |
def show_sentence_selector_table(doc_copy):
|
| 247 |
st.markdown("**______________________________________________________________________________________**")
|
|
|
|
| 11 |
from utils import geoutil
|
| 12 |
import urllib.parse
|
| 13 |
import os
|
| 14 |
+
import requests
|
| 15 |
+
from spacy.tokens import Doc
|
| 16 |
+
from spacy.lang.en import English
|
| 17 |
+
|
| 18 |
|
| 19 |
import pydantic
|
| 20 |
+
print("Pydantic version:", pydantic.__version__)
|
| 21 |
+
|
| 22 |
|
| 23 |
+
API_TOKEN = os.getenv('API_TOKEN1')
|
| 24 |
|
| 25 |
+
BACKEND_URL = "https://dsbb0707-dockerb2.hf.space/api/predict/"
|
| 26 |
+
def call_backend(input_text):
|
| 27 |
+
try:
|
| 28 |
+
headers = {
|
| 29 |
+
"Authorization": f"Bearer {API_TOKEN}"
|
| 30 |
+
}
|
| 31 |
+
response = requests.post(
|
| 32 |
+
BACKEND_URL,
|
| 33 |
+
headers=headers,
|
| 34 |
+
json={"data": [input_text]},
|
| 35 |
+
timeout=10
|
| 36 |
+
)
|
| 37 |
+
if response.status_code == 200:
|
| 38 |
+
return response.json() # ✅ 保留原始 JSON 对象 (dict)
|
| 39 |
+
return {"error": f"❌ Backend Error (HTTP {response.status_code})"}
|
| 40 |
+
except Exception as e:
|
| 41 |
+
return {"error": f"⚠️ Connection Error: {str(e)}"}
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
|
|
|
|
| 233 |
# # doc.to_disk("saved_doc.spacy")
|
| 234 |
# doc.to_disk("/tmp/saved_doc.spacy")
|
| 235 |
|
| 236 |
+
Span.set_extension("rse_id", default="", force=True)
|
| 237 |
+
api_result = call_backend(text)
|
| 238 |
+
print(api_result, 'dadada')
|
| 239 |
+
doc_element = api_result["data"][0]
|
| 240 |
+
st.markdown(type(api_result))
|
| 241 |
+
st.markdown(doc_element)
|
| 242 |
+
|
| 243 |
+
# doc_element = {'text': 'Between Burwood and Glebe.', 'ents': [{'start': 8, 'end': 15, 'label': 'GPE'}, {'start': 20, 'end': 25, 'label': 'GPE'}], 'tokens': [{'id': 0, 'start': 0, 'end': 7}, {'id': 1, 'start': 8, 'end': 15}, {'id': 2, 'start': 16, 'end': 19}, {'id': 3, 'start': 20, 'end': 25}, {'id': 4, 'start': 25, 'end': 26}], 'ents_ext': [{'start': 8, 'end': 15, 'label': 'GPE', 'rse_id': 'Burwood'}, {'start': 20, 'end': 25, 'label': 'GPE', 'rse_id': 'Glebe'}]}
|
| 244 |
+
# doc_element = {'text': 'I would like to know where is the area between Burwood and Glebe. Pyrmont.', 'ents': [{'start': 47, 'end': 54, 'label': 'GPE'}, {'start': 59, 'end': 64, 'label': 'GPE'}, {'start': 66, 'end': 73, 'label': 'GPE'}], 'sents': [{'start': 0, 'end': 65}, {'start': 66, 'end': 74}], 'tokens': [{'id': 0, 'start': 0, 'end': 1}, {'id': 1, 'start': 2, 'end': 7}, {'id': 2, 'start': 8, 'end': 12}, {'id': 3, 'start': 13, 'end': 15}, {'id': 4, 'start': 16, 'end': 20}, {'id': 5, 'start': 21, 'end': 26}, {'id': 6, 'start': 27, 'end': 29}, {'id': 7, 'start': 30, 'end': 33}, {'id': 8, 'start': 34, 'end': 38}, {'id': 9, 'start': 39, 'end': 46}, {'id': 10, 'start': 47, 'end': 54}, {'id': 11, 'start': 55, 'end': 58}, {'id': 12, 'start': 59, 'end': 64}, {'id': 13, 'start': 64, 'end': 65}, {'id': 14, 'start': 66, 'end': 73}, {'id': 15, 'start': 73, 'end': 74}]}
|
| 245 |
+
# doc_element =
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
nlp = English()
|
| 250 |
+
nlp.add_pipe("sentencizer")
|
| 251 |
+
doc = Doc(nlp.vocab).from_json(doc_element)
|
| 252 |
+
doc = nlp.get_pipe("sentencizer")(doc)
|
| 253 |
+
st.markdown(type(doc))
|
| 254 |
+
|
| 255 |
+
for ent_ext in doc_element["ents_ext"]:
|
| 256 |
+
for ent in doc.ents:
|
| 257 |
+
if ent.start_char == ent_ext["start"] and ent.end_char == ent_ext["end"]:
|
| 258 |
+
ent._.rse_id = ent_ext["rse_id"]
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
doc = set_selected_entities(doc)
|
| 262 |
+
doc.to_disk("saved_doc.spacy")
|
| 263 |
+
doc.to_disk("/tmp/saved_doc.spacy")
|
| 264 |
+
|
| 265 |
|
| 266 |
+
html = displacy.render(doc,style="ent", options = options)
|
| 267 |
+
html = html.replace("\n","")
|
| 268 |
+
st.write(HTML_WRAPPER.format(html),unsafe_allow_html=True)
|
| 269 |
+
show_spatial_ent_table(doc, text)
|
| 270 |
|
| 271 |
+
st.markdown("123123")
|
| 272 |
|
| 273 |
+
show_sentence_selector_table(doc)
|
| 274 |
pass
|
| 275 |
def show_sentence_selector_table(doc_copy):
|
| 276 |
st.markdown("**______________________________________________________________________________________**")
|