Spaces:

son9john
/

YENA

Build error

App Files Files Community

son9john commited on Mar 9, 2023

Commit

6e8f0e8

0 Parent(s):

Duplicate from son9john/USMLE

Browse files

Files changed (6) hide show

install_espeak_ng.sh +11 -0
.gitattributes +34 -0
README.md +14 -0
app.py +146 -0
packages.txt +2 -0
requirements.txt +14 -0

install_espeak_ng.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+(
+  mkdir -p espeak
+  cd espeak
+  wget https://github.com/espeak-ng/espeak-ng/archive/refs/tags/1.51.zip
+  unzip -qq 1.51.zip
+  cd espeak-ng-1.51
+  ./autogen.sh
+  ./configure --prefix=`pwd`/../usr
+  make
+  make install
+)

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: USMLE
+emoji: 📈
+colorFrom: blue
+colorTo: gray
+sdk: gradio
+sdk_version: 3.19.1
+app_file: app.py
+pinned: false
+license: bigscience-openrail-m
+duplicated_from: son9john/USMLE
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import openai
+import gradio as gr
+from gradio.components import Audio, Textbox
+import os
+import re
+import tiktoken
+from transformers import GPT2Tokenizer
+import whisper
+import pandas as pd
+from datetime import datetime, timezone, timedelta
+import notion_df
+import concurrent.futures
+# Define the tokenizer and model
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
+model = openai.api_key = os.environ["OPENAI_API_KEY"]
+# Define the initial message and messages list
+initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'}
+messages = [initial_message]
+# Define the answer counter
+answer_count = 0
+# Define the Notion API key
+API_KEY = os.environ["API_KEY"]
+def transcribe(audio, text):
+    global messages
+    global answer_count
+    transcript = {'text': ''}
+    input_text = []
+    # Transcribe the audio if provided
+    if audio is not None:
+        audio_file = open(audio, "rb")
+        transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
+    # Tokenize the text input
+    if text is not None:
+        # Split the input text into sentences
+        sentences = re.split("(?<=[.!?]) +", text)
+        # Initialize a list to store the tokens
+        input_tokens = []
+        # Add each sentence to the input_tokens list
+        for sentence in sentences:
+            # Tokenize the sentence using the GPT-2 tokenizer
+            sentence_tokens = tokenizer.encode(sentence)
+            # Check if adding the sentence would exceed the token limit
+            if len(input_tokens) + len(sentence_tokens) < 1440:
+                # Add the sentence tokens to the input_tokens list
+                input_tokens.extend(sentence_tokens)
+            else:
+                # If adding the sentence would exceed the token limit, truncate it
+                sentence_tokens = sentence_tokens[:1440-len(input_tokens)]
+                input_tokens.extend(sentence_tokens)
+                break
+        # Decode the input tokens into text
+        input_text = tokenizer.decode(input_tokens)
+        # Add the input text to the messages list
+    messages.append({"role": "user", "content": transcript["text"]+input_text})
+    # Check if the accumulated tokens have exceeded 2096
+    num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
+    if num_tokens > 2096:
+        # Concatenate the chat history
+        chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'system'])
+        # Append the number of tokens used to the end of the chat transcript
+        chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
+        # Get the current time in Eastern Time (ET)
+        now_et = datetime.now(timezone(timedelta(hours=-5)))
+        # Format the time as string (YY-MM-DD HH:MM)
+        published_date = now_et.strftime('%m-%d-%y %H:%M')
+        # Upload the chat transcript to Notion
+        df = pd.DataFrame([chat_transcript])
+        notion_df.upload(df, 'https://www.notion.so/page-827360c361f347f7bfefcc6dfbd10e51', title=str(published_date+'back_up'), api_key=API_KEY)
+        # Reset the messages list and answer counter
+        messages = [initial_message]
+        answer_count = 0
+        input_text = 'Can you click the Submit button one more time? (say Yes)'
+        # Add the input text to the messages list
+        messages.append({"role": "user", "content": input_text})
+    else:
+        # Increment the answer counter
+        answer_count += 1
+    # Generate the system message using the OpenAI API
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        prompt = [{"text": f"{message['role']}: {message['content']}\n\n"} for message in messages]
+        system_message = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            max_tokens=2000
+        )["choices"][0]["message"]
+    # Wait for the completion of the OpenAI API call
+    # Add the system message to the messages list
+    messages.append(system_message)
+    # Concatenate the chat history
+    chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'system'])
+    chat_transcript_copy = chat_transcript
+    # Append the number of tokens used to the end of the chat transcript
+    chat_transcript_copy += f"\n\nNumber of tokens used: {num_tokens}\n\n"
+    # Save the chat transcript to a file
+    with open("conversation_history.txt", "a") as f:
+        f.write(chat_transcript_copy)
+    # Upload the chat transcript to Notion
+    now_et = datetime.now(timezone(timedelta(hours=-5)))
+    published_date = now_et.strftime('%m-%d-%y %H:%M')
+    df = pd.DataFrame([chat_transcript_copy])
+    notion_df.upload(df, 'https://www.notion.so/page-827360c361f347f7bfefcc6dfbd10e51', title=str(published_date), api_key=API_KEY)
+    # Return the chat transcript
+    return chat_transcript
+# Define the input and output components for Gradio
+audio_input = Audio(source="microphone", type="filepath", label="Record your message")
+text_input = Textbox(label="Type your message", max_length=4096)
+output_text = gr.outputs.Textbox(label="Response")
+output_audio = Audio()
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=[audio_input, text_input],
+    outputs=[output_text],
+    title="Your Excellence Never Abates (YENA)",
+    description="Talk to the AI Tutor YENA",
+    theme="compact",
+    layout="vertical",
+    allow_flagging=False
+    )
+# Run the Gradio interface
+iface.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ espeak
2	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+torch
+openai
+transformers
+sentencepiece
+pipeline
+py-espeak-ng
+pyttsx3
+nltk
+tiktoken
+openai-whisper
+dropbox
+notion-client
+notion-df
+pandas