Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
|
|
| 16 |
model = openai.api_key = os.environ["OAPI_KEY"]
|
| 17 |
|
| 18 |
# Define the initial message and messages list
|
| 19 |
-
initialt = 'If user asked COLORIZE,
|
| 20 |
to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response. \
|
| 21 |
You are going to keep answer and also challenge the student to learn USMLE anatomy, phsysiology, and pathology.'
|
| 22 |
initial_message = {"role": "system", "content": initialt}
|
|
@@ -29,6 +29,48 @@ answer_count = 0
|
|
| 29 |
# Define the Notion API key
|
| 30 |
API_KEY = os.environ["NAPI_KEY"]
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def transcribe(audio, text):
|
| 33 |
global messages
|
| 34 |
global answer_count
|
|
@@ -104,42 +146,53 @@ def transcribe(audio, text):
|
|
| 104 |
max_tokens=2000
|
| 105 |
)["choices"][0]["message"]
|
| 106 |
# Wait for the completion of the OpenAI API call
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
| 108 |
# Add the system message to the messages list
|
| 109 |
messages.append(system_message)
|
| 110 |
-
|
| 111 |
# Add the system message to the beginning of the messages list
|
| 112 |
messages_rev.insert(0, system_message)
|
| 113 |
# Add the input text to the messages list
|
| 114 |
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]})
|
| 115 |
|
| 116 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system'])
|
| 118 |
|
| 119 |
# chat_transcript_copy = chat_transcript
|
| 120 |
# Append the number of tokens used to the end of the chat transcript
|
| 121 |
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
| 122 |
-
|
| 123 |
-
# Upload the chat transcript to Notion
|
| 124 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
| 125 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
| 126 |
df = pd.DataFrame([chat_transcript])
|
| 127 |
notion_df.upload(df, 'https://www.notion.so/YENA-be569d0a40c940e7b6e0679318215790?pvs=4', title=str(published_date), api_key=API_KEY)
|
| 128 |
|
| 129 |
-
# Return the chat transcript
|
| 130 |
-
return system_message['content']
|
| 131 |
-
|
|
|
|
| 132 |
# Define the input and output components for Gradio
|
| 133 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
| 134 |
text_input = Textbox(label="Type your message", max_length=4096)
|
| 135 |
-
output_text =
|
| 136 |
-
|
| 137 |
|
| 138 |
# Define the Gradio interface
|
| 139 |
iface = gr.Interface(
|
| 140 |
fn=transcribe,
|
| 141 |
inputs=[audio_input, text_input],
|
| 142 |
-
outputs=[output_text],
|
| 143 |
title="Hold On, Pain Ends (HOPE)",
|
| 144 |
description="Talk to Your USMLE Tutor HOPE",
|
| 145 |
theme="compact",
|
|
|
|
| 16 |
model = openai.api_key = os.environ["OAPI_KEY"]
|
| 17 |
|
| 18 |
# Define the initial message and messages list
|
| 19 |
+
initialt = 'If user asked COLORIZE, dont need to do anything but present the input as it is with organized tabs (layers). You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) \
|
| 20 |
to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response. \
|
| 21 |
You are going to keep answer and also challenge the student to learn USMLE anatomy, phsysiology, and pathology.'
|
| 22 |
initial_message = {"role": "system", "content": initialt}
|
|
|
|
| 29 |
# Define the Notion API key
|
| 30 |
API_KEY = os.environ["NAPI_KEY"]
|
| 31 |
|
| 32 |
+
nlp = spacy.load("en_core_web_sm")
|
| 33 |
+
def process_nlp(system_message):
|
| 34 |
+
# Colorize the system message text
|
| 35 |
+
colorized_text = colorize_text(system_message['content'])
|
| 36 |
+
return colorized_text
|
| 37 |
+
|
| 38 |
+
def colorize_text(text):
|
| 39 |
+
colorized_text = ""
|
| 40 |
+
lines = text.split("\n")
|
| 41 |
+
|
| 42 |
+
for line in lines:
|
| 43 |
+
doc = nlp(line)
|
| 44 |
+
for token in doc:
|
| 45 |
+
if token.ent_type_:
|
| 46 |
+
colorized_text += f'**{token.text_with_ws}**'
|
| 47 |
+
elif token.pos_ == 'NOUN':
|
| 48 |
+
colorized_text += f'<span style="color: #FF3300; background-color: transparent;">{token.text_with_ws}</span>'
|
| 49 |
+
elif token.pos_ == 'VERB':
|
| 50 |
+
colorized_text += f'<span style="color: #FFFF00; background-color: transparent;">{token.text_with_ws}</span>'
|
| 51 |
+
elif token.pos_ == 'ADJ':
|
| 52 |
+
colorized_text += f'<span style="color: #00CC00; background-color: transparent;">{token.text_with_ws}</span>'
|
| 53 |
+
elif token.pos_ == 'ADV':
|
| 54 |
+
colorized_text += f'<span style="color: #FF6600; background-color: transparent;">{token.text_with_ws}</span>'
|
| 55 |
+
elif token.is_digit:
|
| 56 |
+
colorized_text += f'<span style="color: #9900CC; background-color: transparent;">{token.text_with_ws}</span>'
|
| 57 |
+
elif token.is_punct:
|
| 58 |
+
colorized_text += f'<span style="color: #8B4513; background-color: transparent;">{token.text_with_ws}</span>'
|
| 59 |
+
elif token.is_quote:
|
| 60 |
+
colorized_text += f'<span style="color: #008080; background-color: transparent;">{token.text_with_ws}</span>'
|
| 61 |
+
else:
|
| 62 |
+
colorized_text += token.text_with_ws
|
| 63 |
+
colorized_text += "<br>"
|
| 64 |
+
|
| 65 |
+
return colorized_text
|
| 66 |
+
|
| 67 |
+
def colorize_and_update(system_message, submit_update):
|
| 68 |
+
colorized_system_message = colorize_text(system_message['content'])
|
| 69 |
+
submit_update(None, colorized_system_message) # Pass the colorized_system_message as the second output
|
| 70 |
+
|
| 71 |
+
def update_text_output(system_message, submit_update):
|
| 72 |
+
submit_update(system_message['content'], None)
|
| 73 |
+
|
| 74 |
def transcribe(audio, text):
|
| 75 |
global messages
|
| 76 |
global answer_count
|
|
|
|
| 146 |
max_tokens=2000
|
| 147 |
)["choices"][0]["message"]
|
| 148 |
# Wait for the completion of the OpenAI API call
|
| 149 |
+
|
| 150 |
+
if submit_update: # Check if submit_update is not None
|
| 151 |
+
update_text_output(system_message, submit_update)
|
| 152 |
+
|
| 153 |
# Add the system message to the messages list
|
| 154 |
messages.append(system_message)
|
| 155 |
+
|
| 156 |
# Add the system message to the beginning of the messages list
|
| 157 |
messages_rev.insert(0, system_message)
|
| 158 |
# Add the input text to the messages list
|
| 159 |
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]})
|
| 160 |
|
| 161 |
+
# Start a separate thread to process the colorization and update the Gradio interface
|
| 162 |
+
if submit_update: # Check if submit_update is not None
|
| 163 |
+
colorize_thread = threading.Thread(target=colorize_and_update, args=(system_message, submit_update))
|
| 164 |
+
colorize_thread.start()
|
| 165 |
+
|
| 166 |
+
# Return the system message immediately
|
| 167 |
+
chat_transcript = system_message['content']
|
| 168 |
+
|
| 169 |
+
# Concatenate the chat
|
| 170 |
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system'])
|
| 171 |
|
| 172 |
# chat_transcript_copy = chat_transcript
|
| 173 |
# Append the number of tokens used to the end of the chat transcript
|
| 174 |
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
| 175 |
+
|
|
|
|
| 176 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
| 177 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
| 178 |
df = pd.DataFrame([chat_transcript])
|
| 179 |
notion_df.upload(df, 'https://www.notion.so/YENA-be569d0a40c940e7b6e0679318215790?pvs=4', title=str(published_date), api_key=API_KEY)
|
| 180 |
|
| 181 |
+
# Return the chat transcript
|
| 182 |
+
return system_message['content'], colorize_text(system_message['content'])
|
| 183 |
+
|
| 184 |
+
|
| 185 |
# Define the input and output components for Gradio
|
| 186 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
| 187 |
text_input = Textbox(label="Type your message", max_length=4096)
|
| 188 |
+
output_text = Textbox(label="Text Output")
|
| 189 |
+
output_html = Markdown()
|
| 190 |
|
| 191 |
# Define the Gradio interface
|
| 192 |
iface = gr.Interface(
|
| 193 |
fn=transcribe,
|
| 194 |
inputs=[audio_input, text_input],
|
| 195 |
+
outputs=[output_text, output_html],
|
| 196 |
title="Hold On, Pain Ends (HOPE)",
|
| 197 |
description="Talk to Your USMLE Tutor HOPE",
|
| 198 |
theme="compact",
|