meg-huggingface
commited on
Commit
·
99df58a
1
Parent(s):
86102e5
Please run after fully loading
Browse files- app.py +4 -10
- main_backend_toxicity.py +1 -2
- src/backend/inference_endpoint.py +16 -11
app.py
CHANGED
|
@@ -8,20 +8,15 @@ configure_root_logger()
|
|
| 8 |
from functools import partial
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
-
#from main_backend_lighteval import run_auto_eval
|
| 12 |
-
#from main_backend_harness import run_auto_eval
|
| 13 |
from main_backend_toxicity import run_auto_eval
|
| 14 |
from src.display.log_visualizer import log_file_to_html_string
|
| 15 |
from src.display.css_html_js import dark_mode_gradio_js
|
| 16 |
from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
|
| 17 |
from src.logging import setup_logger, log_file
|
| 18 |
-
from lm_eval import tasks
|
| 19 |
|
| 20 |
logging.basicConfig(level=logging.INFO)
|
| 21 |
logger = setup_logger(__name__)
|
| 22 |
|
| 23 |
-
print(tasks.__dict__)
|
| 24 |
-
|
| 25 |
intro_md = f"""
|
| 26 |
# Intro
|
| 27 |
This is a visual for the auto evaluator.
|
|
@@ -42,7 +37,7 @@ def button_auto_eval():
|
|
| 42 |
run_auto_eval()
|
| 43 |
|
| 44 |
|
| 45 |
-
reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=
|
| 46 |
|
| 47 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
| 48 |
gr.Markdown(intro_md)
|
|
@@ -55,11 +50,10 @@ with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
|
| 55 |
# Add a button that when pressed, triggers run_auto_eval
|
| 56 |
button = gr.Button("Manually Run Evaluation")
|
| 57 |
gr.Markdown(links_md)
|
| 58 |
-
|
| 59 |
-
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
| 60 |
-
|
| 61 |
button.click(fn=button_auto_eval, inputs=[], outputs=[])
|
| 62 |
|
|
|
|
| 63 |
|
| 64 |
if __name__ == '__main__':
|
| 65 |
-
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
|
|
|
|
|
|
| 8 |
from functools import partial
|
| 9 |
|
| 10 |
import gradio as gr
|
|
|
|
|
|
|
| 11 |
from main_backend_toxicity import run_auto_eval
|
| 12 |
from src.display.log_visualizer import log_file_to_html_string
|
| 13 |
from src.display.css_html_js import dark_mode_gradio_js
|
| 14 |
from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
|
| 15 |
from src.logging import setup_logger, log_file
|
|
|
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
logger = setup_logger(__name__)
|
| 19 |
|
|
|
|
|
|
|
| 20 |
intro_md = f"""
|
| 21 |
# Intro
|
| 22 |
This is a visual for the auto evaluator.
|
|
|
|
| 37 |
run_auto_eval()
|
| 38 |
|
| 39 |
|
| 40 |
+
reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
|
| 41 |
|
| 42 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
| 43 |
gr.Markdown(intro_md)
|
|
|
|
| 50 |
# Add a button that when pressed, triggers run_auto_eval
|
| 51 |
button = gr.Button("Manually Run Evaluation")
|
| 52 |
gr.Markdown(links_md)
|
|
|
|
|
|
|
|
|
|
| 53 |
button.click(fn=button_auto_eval, inputs=[], outputs=[])
|
| 54 |
|
| 55 |
+
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
| 56 |
|
| 57 |
if __name__ == '__main__':
|
| 58 |
+
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
|
| 59 |
+
show_error=True, server_port=7860)
|
main_backend_toxicity.py
CHANGED
|
@@ -3,8 +3,6 @@ import pprint
|
|
| 3 |
import re
|
| 4 |
from huggingface_hub import snapshot_download
|
| 5 |
|
| 6 |
-
logging.getLogger("openai").setLevel(logging.DEBUG)
|
| 7 |
-
|
| 8 |
from src.backend.inference_endpoint import create_endpoint
|
| 9 |
from src.backend.run_toxicity_eval import main
|
| 10 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
|
@@ -15,6 +13,7 @@ from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
|
|
| 15 |
#, LIMIT, ACCELERATOR, VENDOR, REGION
|
| 16 |
from src.logging import setup_logger
|
| 17 |
|
|
|
|
| 18 |
logger = setup_logger(__name__)
|
| 19 |
|
| 20 |
# logging.basicConfig(level=logging.ERROR)
|
|
|
|
| 3 |
import re
|
| 4 |
from huggingface_hub import snapshot_download
|
| 5 |
|
|
|
|
|
|
|
| 6 |
from src.backend.inference_endpoint import create_endpoint
|
| 7 |
from src.backend.run_toxicity_eval import main
|
| 8 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
|
|
|
| 13 |
#, LIMIT, ACCELERATOR, VENDOR, REGION
|
| 14 |
from src.logging import setup_logger
|
| 15 |
|
| 16 |
+
logging.getLogger("openai").setLevel(logging.DEBUG)
|
| 17 |
logger = setup_logger(__name__)
|
| 18 |
|
| 19 |
# logging.basicConfig(level=logging.ERROR)
|
src/backend/inference_endpoint.py
CHANGED
|
@@ -1,23 +1,28 @@
|
|
|
|
|
| 1 |
import huggingface_hub.utils._errors
|
| 2 |
from time import sleep
|
|
|
|
| 3 |
from huggingface_hub import create_inference_endpoint, get_inference_endpoint
|
| 4 |
from src.backend.run_toxicity_eval import get_generation
|
| 5 |
-
import
|
|
|
|
|
|
|
| 6 |
TIMEOUT=20
|
| 7 |
|
| 8 |
def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
|
| 9 |
-
|
|
|
|
| 10 |
try:
|
| 11 |
endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
|
| 12 |
)
|
| 13 |
except huggingface_hub.utils._errors.HfHubHTTPError as e:
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
endpoint = get_inference_endpoint(endpoint_name)
|
| 18 |
endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator, instance_size=instance_size, instance_type=instance_type)
|
| 19 |
endpoint.fetch()
|
| 20 |
-
|
| 21 |
if endpoint.status == "scaledToZero":
|
| 22 |
# Send a request to wake it up.
|
| 23 |
get_generation(endpoint.url, "Wake up")
|
|
@@ -25,16 +30,16 @@ def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-g
|
|
| 25 |
i = 0
|
| 26 |
while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
|
| 27 |
if i >= 20:
|
| 28 |
-
|
| 29 |
sys.exit()
|
| 30 |
-
|
| 31 |
sleep(TIMEOUT)
|
| 32 |
endpoint.fetch()
|
| 33 |
-
|
| 34 |
i += 1
|
| 35 |
-
|
|
|
|
| 36 |
generation_url = endpoint.url
|
| 37 |
-
print(generation_url)
|
| 38 |
return generation_url
|
| 39 |
|
| 40 |
|
|
|
|
| 1 |
+
import sys
|
| 2 |
import huggingface_hub.utils._errors
|
| 3 |
from time import sleep
|
| 4 |
+
import logging
|
| 5 |
from huggingface_hub import create_inference_endpoint, get_inference_endpoint
|
| 6 |
from src.backend.run_toxicity_eval import get_generation
|
| 7 |
+
from src.logging import setup_logger
|
| 8 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 9 |
+
logger = setup_logger(__name__)
|
| 10 |
TIMEOUT=20
|
| 11 |
|
| 12 |
def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
|
| 13 |
+
logger.info("Creating endpoint %s..." % endpoint_name)
|
| 14 |
+
# TODO(mm): Handle situation where it's paused
|
| 15 |
try:
|
| 16 |
endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
|
| 17 |
)
|
| 18 |
except huggingface_hub.utils._errors.HfHubHTTPError as e:
|
| 19 |
+
logger.debug("Hit the following exception:")
|
| 20 |
+
logger.debug(e)
|
| 21 |
+
logger.debug("Attempting to continue.")
|
| 22 |
endpoint = get_inference_endpoint(endpoint_name)
|
| 23 |
endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator, instance_size=instance_size, instance_type=instance_type)
|
| 24 |
endpoint.fetch()
|
| 25 |
+
logger.info("Endpoint status: %s." % (endpoint.status))
|
| 26 |
if endpoint.status == "scaledToZero":
|
| 27 |
# Send a request to wake it up.
|
| 28 |
get_generation(endpoint.url, "Wake up")
|
|
|
|
| 30 |
i = 0
|
| 31 |
while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
|
| 32 |
if i >= 20:
|
| 33 |
+
logger.info("Model failed to respond. Exiting.")
|
| 34 |
sys.exit()
|
| 35 |
+
logger.debug("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
|
| 36 |
sleep(TIMEOUT)
|
| 37 |
endpoint.fetch()
|
| 38 |
+
logger.debug("Endpoint status: %s." % (endpoint.status))
|
| 39 |
i += 1
|
| 40 |
+
logger.info("Endpoint created:")
|
| 41 |
+
logger.info(endpoint)
|
| 42 |
generation_url = endpoint.url
|
|
|
|
| 43 |
return generation_url
|
| 44 |
|
| 45 |
|