meg-huggingface
commited on
Commit
·
99df58a
1
Parent(s):
86102e5
Please run after fully loading
Browse files- app.py +4 -10
- main_backend_toxicity.py +1 -2
- src/backend/inference_endpoint.py +16 -11
app.py
CHANGED
@@ -8,20 +8,15 @@ configure_root_logger()
|
|
8 |
from functools import partial
|
9 |
|
10 |
import gradio as gr
|
11 |
-
#from main_backend_lighteval import run_auto_eval
|
12 |
-
#from main_backend_harness import run_auto_eval
|
13 |
from main_backend_toxicity import run_auto_eval
|
14 |
from src.display.log_visualizer import log_file_to_html_string
|
15 |
from src.display.css_html_js import dark_mode_gradio_js
|
16 |
from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
|
17 |
from src.logging import setup_logger, log_file
|
18 |
-
from lm_eval import tasks
|
19 |
|
20 |
logging.basicConfig(level=logging.INFO)
|
21 |
logger = setup_logger(__name__)
|
22 |
|
23 |
-
print(tasks.__dict__)
|
24 |
-
|
25 |
intro_md = f"""
|
26 |
# Intro
|
27 |
This is a visual for the auto evaluator.
|
@@ -42,7 +37,7 @@ def button_auto_eval():
|
|
42 |
run_auto_eval()
|
43 |
|
44 |
|
45 |
-
reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=
|
46 |
|
47 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
48 |
gr.Markdown(intro_md)
|
@@ -55,11 +50,10 @@ with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
|
55 |
# Add a button that when pressed, triggers run_auto_eval
|
56 |
button = gr.Button("Manually Run Evaluation")
|
57 |
gr.Markdown(links_md)
|
58 |
-
|
59 |
-
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
60 |
-
|
61 |
button.click(fn=button_auto_eval, inputs=[], outputs=[])
|
62 |
|
|
|
63 |
|
64 |
if __name__ == '__main__':
|
65 |
-
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
|
|
|
|
8 |
from functools import partial
|
9 |
|
10 |
import gradio as gr
|
|
|
|
|
11 |
from main_backend_toxicity import run_auto_eval
|
12 |
from src.display.log_visualizer import log_file_to_html_string
|
13 |
from src.display.css_html_js import dark_mode_gradio_js
|
14 |
from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
|
15 |
from src.logging import setup_logger, log_file
|
|
|
16 |
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
logger = setup_logger(__name__)
|
19 |
|
|
|
|
|
20 |
intro_md = f"""
|
21 |
# Intro
|
22 |
This is a visual for the auto evaluator.
|
|
|
37 |
run_auto_eval()
|
38 |
|
39 |
|
40 |
+
reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
|
41 |
|
42 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
43 |
gr.Markdown(intro_md)
|
|
|
50 |
# Add a button that when pressed, triggers run_auto_eval
|
51 |
button = gr.Button("Manually Run Evaluation")
|
52 |
gr.Markdown(links_md)
|
|
|
|
|
|
|
53 |
button.click(fn=button_auto_eval, inputs=[], outputs=[])
|
54 |
|
55 |
+
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
56 |
|
57 |
if __name__ == '__main__':
|
58 |
+
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
|
59 |
+
show_error=True, server_port=7860)
|
main_backend_toxicity.py
CHANGED
@@ -3,8 +3,6 @@ import pprint
|
|
3 |
import re
|
4 |
from huggingface_hub import snapshot_download
|
5 |
|
6 |
-
logging.getLogger("openai").setLevel(logging.DEBUG)
|
7 |
-
|
8 |
from src.backend.inference_endpoint import create_endpoint
|
9 |
from src.backend.run_toxicity_eval import main
|
10 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
@@ -15,6 +13,7 @@ from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
|
|
15 |
#, LIMIT, ACCELERATOR, VENDOR, REGION
|
16 |
from src.logging import setup_logger
|
17 |
|
|
|
18 |
logger = setup_logger(__name__)
|
19 |
|
20 |
# logging.basicConfig(level=logging.ERROR)
|
|
|
3 |
import re
|
4 |
from huggingface_hub import snapshot_download
|
5 |
|
|
|
|
|
6 |
from src.backend.inference_endpoint import create_endpoint
|
7 |
from src.backend.run_toxicity_eval import main
|
8 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
|
|
13 |
#, LIMIT, ACCELERATOR, VENDOR, REGION
|
14 |
from src.logging import setup_logger
|
15 |
|
16 |
+
logging.getLogger("openai").setLevel(logging.DEBUG)
|
17 |
logger = setup_logger(__name__)
|
18 |
|
19 |
# logging.basicConfig(level=logging.ERROR)
|
src/backend/inference_endpoint.py
CHANGED
@@ -1,23 +1,28 @@
|
|
|
|
1 |
import huggingface_hub.utils._errors
|
2 |
from time import sleep
|
|
|
3 |
from huggingface_hub import create_inference_endpoint, get_inference_endpoint
|
4 |
from src.backend.run_toxicity_eval import get_generation
|
5 |
-
import
|
|
|
|
|
6 |
TIMEOUT=20
|
7 |
|
8 |
def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
|
9 |
-
|
|
|
10 |
try:
|
11 |
endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
|
12 |
)
|
13 |
except huggingface_hub.utils._errors.HfHubHTTPError as e:
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
endpoint = get_inference_endpoint(endpoint_name)
|
18 |
endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator, instance_size=instance_size, instance_type=instance_type)
|
19 |
endpoint.fetch()
|
20 |
-
|
21 |
if endpoint.status == "scaledToZero":
|
22 |
# Send a request to wake it up.
|
23 |
get_generation(endpoint.url, "Wake up")
|
@@ -25,16 +30,16 @@ def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-g
|
|
25 |
i = 0
|
26 |
while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
|
27 |
if i >= 20:
|
28 |
-
|
29 |
sys.exit()
|
30 |
-
|
31 |
sleep(TIMEOUT)
|
32 |
endpoint.fetch()
|
33 |
-
|
34 |
i += 1
|
35 |
-
|
|
|
36 |
generation_url = endpoint.url
|
37 |
-
print(generation_url)
|
38 |
return generation_url
|
39 |
|
40 |
|
|
|
1 |
+
import sys
|
2 |
import huggingface_hub.utils._errors
|
3 |
from time import sleep
|
4 |
+
import logging
|
5 |
from huggingface_hub import create_inference_endpoint, get_inference_endpoint
|
6 |
from src.backend.run_toxicity_eval import get_generation
|
7 |
+
from src.logging import setup_logger
|
8 |
+
logging.basicConfig(level=logging.DEBUG)
|
9 |
+
logger = setup_logger(__name__)
|
10 |
TIMEOUT=20
|
11 |
|
12 |
def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
|
13 |
+
logger.info("Creating endpoint %s..." % endpoint_name)
|
14 |
+
# TODO(mm): Handle situation where it's paused
|
15 |
try:
|
16 |
endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
|
17 |
)
|
18 |
except huggingface_hub.utils._errors.HfHubHTTPError as e:
|
19 |
+
logger.debug("Hit the following exception:")
|
20 |
+
logger.debug(e)
|
21 |
+
logger.debug("Attempting to continue.")
|
22 |
endpoint = get_inference_endpoint(endpoint_name)
|
23 |
endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator, instance_size=instance_size, instance_type=instance_type)
|
24 |
endpoint.fetch()
|
25 |
+
logger.info("Endpoint status: %s." % (endpoint.status))
|
26 |
if endpoint.status == "scaledToZero":
|
27 |
# Send a request to wake it up.
|
28 |
get_generation(endpoint.url, "Wake up")
|
|
|
30 |
i = 0
|
31 |
while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
|
32 |
if i >= 20:
|
33 |
+
logger.info("Model failed to respond. Exiting.")
|
34 |
sys.exit()
|
35 |
+
logger.debug("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
|
36 |
sleep(TIMEOUT)
|
37 |
endpoint.fetch()
|
38 |
+
logger.debug("Endpoint status: %s." % (endpoint.status))
|
39 |
i += 1
|
40 |
+
logger.info("Endpoint created:")
|
41 |
+
logger.info(endpoint)
|
42 |
generation_url = endpoint.url
|
|
|
43 |
return generation_url
|
44 |
|
45 |
|