Spaces:

Linhz
/

Demo_final

Paused

App Files Files Community

Linhz commited on May 26, 2024

Commit

3a940c5

verified ·

1 Parent(s): d9102e7

Rename App3.py to app.py

Browse files

Files changed (1) hide show

App3.py → app.py +60 -61

App3.py → app.py RENAMED Viewed

@@ -1,61 +1,60 @@
-from openai import OpenAI
-import streamlit as st
-import faiss
-from sentence_transformers import SentenceTransformer
-import pickle
-import re
-from transformers import pipeline
-st.title("Vietnamese Legal Question Answering System")
-with open('articles.pkl', 'rb') as file:
-    articles = pickle.load(file)
-index_loaded = faiss.read_index("sentence_embeddings_index_no_citation.faiss")
-if 'model_embedding' not in st.session_state:
-    print("ERROR")
-    st.session_state.model_embedding = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
-# Replace this with your own checkpoint
-model_checkpoint = "model"
-question_answerer = pipeline("question-answering", model=model_checkpoint)
-def question_answering(question):
-    print(question)
-    query_sentence = [question]
-    query_embedding = st.session_state.model_embedding.encode(query_sentence)
-    k = 5
-    D, I = index_loaded.search(query_embedding.astype('float32'), k)  # D is distances, I is indices
-    answer = [question_answerer(question=query_sentence[0], context=articles[I[0][i]], max_answer_len = 512) for i in range(k)]
-    best_answer = max(answer, key=lambda x: x['score'])
-    print(best_answer['answer'])
-    if best_answer['score'] > 0.5:
-        return best_answer['answer']
-    return f"Tôi không chắc lắm nhưng có lẽ câu trả lời là: {best_answer['answer']}"
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-def clean_answer(s):
-    # Sử dụng regex để loại bỏ tất cả các ký tự đặc biệt ở cuối chuỗi
-    return re.sub(r'[^a-zA-Z0-9]+$', '', s)
-if prompt := st.chat_input("What is up?"):
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.markdown(prompt)
-        response = clean_answer(question_answering(prompt))
-        with st.chat_message("assistant"):
-            st.markdown(response)
-        st.session_state.messages.append({"role": "assistant", "content": response})

+import streamlit as st
+import faiss
+from sentence_transformers import SentenceTransformer
+import pickle
+import re
+from transformers import pipeline
+st.title("Vietnamese Legal Question Answering System")
+with open('articles.pkl', 'rb') as file:
+    articles = pickle.load(file)
+index_loaded = faiss.read_index("sentence_embeddings_index_no_citation.faiss")
+if 'model_embedding' not in st.session_state:
+    print("ERROR")
+    st.session_state.model_embedding = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
+# Replace this with your own checkpoint
+model_checkpoint = "model"
+question_answerer = pipeline("question-answering", model=model_checkpoint)
+def question_answering(question):
+    print(question)
+    query_sentence = [question]
+    query_embedding = st.session_state.model_embedding.encode(query_sentence)
+    k = 5
+    D, I = index_loaded.search(query_embedding.astype('float32'), k)  # D is distances, I is indices
+    answer = [question_answerer(question=query_sentence[0], context=articles[I[0][i]], max_answer_len = 512) for i in range(k)]
+    best_answer = max(answer, key=lambda x: x['score'])
+    print(best_answer['answer'])
+    if best_answer['score'] > 0.5:
+        return best_answer['answer']
+    return f"Tôi không chắc lắm nhưng có lẽ câu trả lời là: {best_answer['answer']}"
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+def clean_answer(s):
+    # Sử dụng regex để loại bỏ tất cả các ký tự đặc biệt ở cuối chuỗi
+    return re.sub(r'[^a-zA-Z0-9]+$', '', s)
+if prompt := st.chat_input("What is up?"):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        response = clean_answer(question_answering(prompt))
+        with st.chat_message("assistant"):
+            st.markdown(response)
+        st.session_state.messages.append({"role": "assistant", "content": response})