Spaces:
Runtime error
Runtime error
| import time | |
| import streamlit as st | |
| COST_PER_1000_TOKENS_USD = 0.139 / 80 | |
| def stream_handler(session_state, chat_stream, prompt, placeholder): | |
| # 1. Uses the chat_stream and streams message on placeholder | |
| # 2. returns full_response for token calculation | |
| start_time = time.time() | |
| full_response = "" | |
| for chunk in chat_stream: | |
| if chunk.token.text in ["</s>", "<|im_end|>"]: | |
| break; | |
| full_response += chunk.token.text | |
| placeholder.markdown(full_response + "β") | |
| placeholder.markdown(full_response) | |
| end_time = time.time() | |
| elapsed_time = end_time - start_time | |
| total_tokens_processed = len(full_response.split()) | |
| tokens_per_second = total_tokens_processed // elapsed_time | |
| len_response = (len(prompt.split()) + len(full_response.split())) * 1.25 | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.write(f"**{tokens_per_second} tokens/second**") | |
| with col2: | |
| st.write(f"**{int(len_response)} tokens generated**") | |
| with col3: | |
| st.write( | |
| f"**$ {round(len_response * COST_PER_1000_TOKENS_USD / 1000, 5)} cost incurred**" | |
| ) | |
| session_state["tps"] = tokens_per_second | |
| session_state["tokens_used"] = len_response + session_state["tokens_used"] | |
| return full_response | |