meg-huggingface commited on
Commit
99df58a
·
1 Parent(s): 86102e5

Please run after fully loading

Browse files
app.py CHANGED
@@ -8,20 +8,15 @@ configure_root_logger()
8
  from functools import partial
9
 
10
  import gradio as gr
11
- #from main_backend_lighteval import run_auto_eval
12
- #from main_backend_harness import run_auto_eval
13
  from main_backend_toxicity import run_auto_eval
14
  from src.display.log_visualizer import log_file_to_html_string
15
  from src.display.css_html_js import dark_mode_gradio_js
16
  from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
17
  from src.logging import setup_logger, log_file
18
- from lm_eval import tasks
19
 
20
  logging.basicConfig(level=logging.INFO)
21
  logger = setup_logger(__name__)
22
 
23
- print(tasks.__dict__)
24
-
25
  intro_md = f"""
26
  # Intro
27
  This is a visual for the auto evaluator.
@@ -42,7 +37,7 @@ def button_auto_eval():
42
  run_auto_eval()
43
 
44
 
45
- reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=True)
46
 
47
  with gr.Blocks(js=dark_mode_gradio_js) as demo:
48
  gr.Markdown(intro_md)
@@ -55,11 +50,10 @@ with gr.Blocks(js=dark_mode_gradio_js) as demo:
55
  # Add a button that when pressed, triggers run_auto_eval
56
  button = gr.Button("Manually Run Evaluation")
57
  gr.Markdown(links_md)
58
-
59
- dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
60
-
61
  button.click(fn=button_auto_eval, inputs=[], outputs=[])
62
 
 
63
 
64
  if __name__ == '__main__':
65
- demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
 
 
8
  from functools import partial
9
 
10
  import gradio as gr
 
 
11
  from main_backend_toxicity import run_auto_eval
12
  from src.display.log_visualizer import log_file_to_html_string
13
  from src.display.css_html_js import dark_mode_gradio_js
14
  from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
15
  from src.logging import setup_logger, log_file
 
16
 
17
  logging.basicConfig(level=logging.INFO)
18
  logger = setup_logger(__name__)
19
 
 
 
20
  intro_md = f"""
21
  # Intro
22
  This is a visual for the auto evaluator.
 
37
  run_auto_eval()
38
 
39
 
40
+ reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
41
 
42
  with gr.Blocks(js=dark_mode_gradio_js) as demo:
43
  gr.Markdown(intro_md)
 
50
  # Add a button that when pressed, triggers run_auto_eval
51
  button = gr.Button("Manually Run Evaluation")
52
  gr.Markdown(links_md)
 
 
 
53
  button.click(fn=button_auto_eval, inputs=[], outputs=[])
54
 
55
+ dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
56
 
57
  if __name__ == '__main__':
58
+ demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
59
+ show_error=True, server_port=7860)
main_backend_toxicity.py CHANGED
@@ -3,8 +3,6 @@ import pprint
3
  import re
4
  from huggingface_hub import snapshot_download
5
 
6
- logging.getLogger("openai").setLevel(logging.DEBUG)
7
-
8
  from src.backend.inference_endpoint import create_endpoint
9
  from src.backend.run_toxicity_eval import main
10
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
@@ -15,6 +13,7 @@ from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
15
  #, LIMIT, ACCELERATOR, VENDOR, REGION
16
  from src.logging import setup_logger
17
 
 
18
  logger = setup_logger(__name__)
19
 
20
  # logging.basicConfig(level=logging.ERROR)
 
3
  import re
4
  from huggingface_hub import snapshot_download
5
 
 
 
6
  from src.backend.inference_endpoint import create_endpoint
7
  from src.backend.run_toxicity_eval import main
8
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
 
13
  #, LIMIT, ACCELERATOR, VENDOR, REGION
14
  from src.logging import setup_logger
15
 
16
+ logging.getLogger("openai").setLevel(logging.DEBUG)
17
  logger = setup_logger(__name__)
18
 
19
  # logging.basicConfig(level=logging.ERROR)
src/backend/inference_endpoint.py CHANGED
@@ -1,23 +1,28 @@
 
1
  import huggingface_hub.utils._errors
2
  from time import sleep
 
3
  from huggingface_hub import create_inference_endpoint, get_inference_endpoint
4
  from src.backend.run_toxicity_eval import get_generation
5
- import sys
 
 
6
  TIMEOUT=20
7
 
8
  def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
9
- print("Creating endpoint %s..." % endpoint_name)
 
10
  try:
11
  endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
12
  )
13
  except huggingface_hub.utils._errors.HfHubHTTPError as e:
14
- print("Hit the following exception:")
15
- print(e)
16
- print("Attempting to continue.")
17
  endpoint = get_inference_endpoint(endpoint_name)
18
  endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator, instance_size=instance_size, instance_type=instance_type)
19
  endpoint.fetch()
20
- print("Endpoint status: %s." % (endpoint.status))
21
  if endpoint.status == "scaledToZero":
22
  # Send a request to wake it up.
23
  get_generation(endpoint.url, "Wake up")
@@ -25,16 +30,16 @@ def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-g
25
  i = 0
26
  while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
27
  if i >= 20:
28
- print("Model failed to respond. Exiting.")
29
  sys.exit()
30
- print("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
31
  sleep(TIMEOUT)
32
  endpoint.fetch()
33
- print("Endpoint status: %s." % (endpoint.status))
34
  i += 1
35
- print(endpoint)
 
36
  generation_url = endpoint.url
37
- print(generation_url)
38
  return generation_url
39
 
40
 
 
1
+ import sys
2
  import huggingface_hub.utils._errors
3
  from time import sleep
4
+ import logging
5
  from huggingface_hub import create_inference_endpoint, get_inference_endpoint
6
  from src.backend.run_toxicity_eval import get_generation
7
+ from src.logging import setup_logger
8
+ logging.basicConfig(level=logging.DEBUG)
9
+ logger = setup_logger(__name__)
10
  TIMEOUT=20
11
 
12
  def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
13
+ logger.info("Creating endpoint %s..." % endpoint_name)
14
+ # TODO(mm): Handle situation where it's paused
15
  try:
16
  endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
17
  )
18
  except huggingface_hub.utils._errors.HfHubHTTPError as e:
19
+ logger.debug("Hit the following exception:")
20
+ logger.debug(e)
21
+ logger.debug("Attempting to continue.")
22
  endpoint = get_inference_endpoint(endpoint_name)
23
  endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator, instance_size=instance_size, instance_type=instance_type)
24
  endpoint.fetch()
25
+ logger.info("Endpoint status: %s." % (endpoint.status))
26
  if endpoint.status == "scaledToZero":
27
  # Send a request to wake it up.
28
  get_generation(endpoint.url, "Wake up")
 
30
  i = 0
31
  while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
32
  if i >= 20:
33
+ logger.info("Model failed to respond. Exiting.")
34
  sys.exit()
35
+ logger.debug("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
36
  sleep(TIMEOUT)
37
  endpoint.fetch()
38
+ logger.debug("Endpoint status: %s." % (endpoint.status))
39
  i += 1
40
+ logger.info("Endpoint created:")
41
+ logger.info(endpoint)
42
  generation_url = endpoint.url
 
43
  return generation_url
44
 
45