Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import openai | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.chains import RetrievalQA | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| st.title("📄 PDF Q&A mit OpenAI (LangChain)") | |
| # ------------------------------- | |
| # Seitenleiste: API-Key eingeben | |
| # ------------------------------- | |
| with st.sidebar: | |
| openai_api_key = st.text_input("OpenAI API Key", type="password") | |
| # ------------------------------- | |
| # PDF hochladen | |
| # ------------------------------- | |
| uploaded_file = st.file_uploader("Lade eine PDF-Datei hoch", type=["pdf"]) | |
| # ------------------------------- | |
| # Eingabefeld für Fragen | |
| # ------------------------------- | |
| question = st.text_input( | |
| label="Frage zum Dokument", | |
| placeholder="Worum geht es in diesem Dokument?", | |
| disabled=not uploaded_file | |
| ) | |
| # ------------------------------- | |
| # Hinweis, falls kein API-Key | |
| # ------------------------------- | |
| if uploaded_file and question and not openai_api_key: | |
| st.info("Bitte zuerst deinen OpenAI API Key eingeben, um fortzufahren.") | |
| st.stop() | |
| # ------------------------------- | |
| # Verarbeite die PDF und beantworte die Frage | |
| # ------------------------------- | |
| if uploaded_file and question and openai_api_key: | |
| try: | |
| # 1) PDF laden mit PyPDFLoader | |
| loader = PyPDFLoader(uploaded_file) | |
| # 2) Text in Chunks aufteilen | |
| # Du kannst hier nach Bedarf den CharacterTextSplitter anpassen, | |
| # z. B. chunk_size oder chunk_overlap ändern. | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=1000, | |
| chunk_overlap=100, | |
| length_function=len | |
| ) | |
| # load_and_split() übernimmt das Laden und direkte Splitten in Dokumente: | |
| documents = loader.load_and_split(text_splitter=text_splitter) | |
| # 3) Erstelle Embeddings und Vector Store (FAISS) | |
| embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
| vectorstore = FAISS.from_documents(documents, embeddings) | |
| retriever = vectorstore.as_retriever() | |
| # 4) Erstelle Retrieval-Kette mit LLM | |
| llm = ChatOpenAI( | |
| temperature=0, | |
| model_name="gpt-4o-mini", | |
| openai_api_key=openai_api_key | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", # Simplest "Stuff" Chain | |
| retriever=retriever | |
| ) | |
| # 5) Frage stellen und Antwort bekommen | |
| with st.spinner("Suche relevante Textstellen und generiere Antwort..."): | |
| answer = qa_chain.run(question) | |
| # 6) Ausgabe | |
| st.write("### Antwort:") | |
| st.write(answer) | |
| except Exception as e: | |
| st.error(f"Fehler beim Verarbeiten der PDF: {e}") | |