backend

Runtime error

App Files Files Community

meg-huggingface commited on Jul 16, 2024

Commit

99df58a

1 Parent(s): 86102e5

Please run after fully loading

Browse files

Files changed (3) hide show

app.py +4 -10
main_backend_toxicity.py +1 -2
src/backend/inference_endpoint.py +16 -11

app.py CHANGED Viewed

@@ -8,20 +8,15 @@ configure_root_logger()
 from functools import partial
 import gradio as gr
-#from main_backend_lighteval import run_auto_eval
-#from main_backend_harness import run_auto_eval
 from main_backend_toxicity import run_auto_eval
 from src.display.log_visualizer import log_file_to_html_string
 from src.display.css_html_js import dark_mode_gradio_js
 from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
 from src.logging import setup_logger, log_file
-from lm_eval import tasks
 logging.basicConfig(level=logging.INFO)
 logger = setup_logger(__name__)
-print(tasks.__dict__)
 intro_md = f"""
 # Intro
 This is a visual for the auto evaluator.
@@ -42,7 +37,7 @@ def button_auto_eval():
     run_auto_eval()
-reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=True)
 with gr.Blocks(js=dark_mode_gradio_js) as demo:
     gr.Markdown(intro_md)
@@ -55,11 +50,10 @@ with gr.Blocks(js=dark_mode_gradio_js) as demo:
         # Add a button that when pressed, triggers run_auto_eval
         button = gr.Button("Manually Run Evaluation")
         gr.Markdown(links_md)
-        dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
         button.click(fn=button_auto_eval, inputs=[], outputs=[])
 if __name__ == '__main__':
-    demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)

 from functools import partial
 import gradio as gr
 from main_backend_toxicity import run_auto_eval
 from src.display.log_visualizer import log_file_to_html_string
 from src.display.css_html_js import dark_mode_gradio_js
 from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
 from src.logging import setup_logger, log_file
 logging.basicConfig(level=logging.INFO)
 logger = setup_logger(__name__)
 intro_md = f"""
 # Intro
 This is a visual for the auto evaluator.
     run_auto_eval()
+reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
 with gr.Blocks(js=dark_mode_gradio_js) as demo:
     gr.Markdown(intro_md)
         # Add a button that when pressed, triggers run_auto_eval
         button = gr.Button("Manually Run Evaluation")
         gr.Markdown(links_md)
         button.click(fn=button_auto_eval, inputs=[], outputs=[])
+dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
 if __name__ == '__main__':
+    demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
+                                                    show_error=True, server_port=7860)

main_backend_toxicity.py CHANGED Viewed

@@ -3,8 +3,6 @@ import pprint
 import re
 from huggingface_hub import snapshot_download
-logging.getLogger("openai").setLevel(logging.DEBUG)
 from src.backend.inference_endpoint import create_endpoint
 from src.backend.run_toxicity_eval import main
 from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
@@ -15,6 +13,7 @@ from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
 #, LIMIT, ACCELERATOR, VENDOR, REGION
 from src.logging import setup_logger
 logger = setup_logger(__name__)
 # logging.basicConfig(level=logging.ERROR)

 import re
 from huggingface_hub import snapshot_download
 from src.backend.inference_endpoint import create_endpoint
 from src.backend.run_toxicity_eval import main
 from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
 #, LIMIT, ACCELERATOR, VENDOR, REGION
 from src.logging import setup_logger
+logging.getLogger("openai").setLevel(logging.DEBUG)
 logger = setup_logger(__name__)
 # logging.basicConfig(level=logging.ERROR)

src/backend/inference_endpoint.py CHANGED Viewed

@@ -1,23 +1,28 @@
 import huggingface_hub.utils._errors
 from time import sleep
 from huggingface_hub import create_inference_endpoint, get_inference_endpoint
 from src.backend.run_toxicity_eval import get_generation
-import sys
 TIMEOUT=20
 def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
-    print("Creating endpoint %s..." % endpoint_name)
     try:
         endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
         )
     except huggingface_hub.utils._errors.HfHubHTTPError as e:
-        print("Hit the following exception:")
-        print(e)
-        print("Attempting to continue.")
         endpoint = get_inference_endpoint(endpoint_name)
         endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator,  instance_size=instance_size, instance_type=instance_type)
     endpoint.fetch()
-    print("Endpoint status: %s." % (endpoint.status))
     if endpoint.status == "scaledToZero":
         # Send a request to wake it up.
         get_generation(endpoint.url, "Wake up")
@@ -25,16 +30,16 @@ def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-g
     i = 0
     while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
         if i >= 20:
-            print("Model failed to respond. Exiting.")
             sys.exit()
-        print("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
         sleep(TIMEOUT)
         endpoint.fetch()
-        print("Endpoint status: %s." % (endpoint.status))
         i += 1
-    print(endpoint)
     generation_url = endpoint.url
-    print(generation_url)
     return generation_url

+import sys
 import huggingface_hub.utils._errors
 from time import sleep
+import logging
 from huggingface_hub import create_inference_endpoint, get_inference_endpoint
 from src.backend.run_toxicity_eval import get_generation
+from src.logging import setup_logger
+logging.basicConfig(level=logging.DEBUG)
+logger = setup_logger(__name__)
 TIMEOUT=20
 def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-a100"):
+    logger.info("Creating endpoint %s..." % endpoint_name)
+    # TODO(mm): Handle situation where it's paused
     try:
         endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
         )
     except huggingface_hub.utils._errors.HfHubHTTPError as e:
+        logger.debug("Hit the following exception:")
+        logger.debug(e)
+        logger.debug("Attempting to continue.")
         endpoint = get_inference_endpoint(endpoint_name)
         endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator,  instance_size=instance_size, instance_type=instance_type)
     endpoint.fetch()
+    logger.info("Endpoint status: %s." % (endpoint.status))
     if endpoint.status == "scaledToZero":
         # Send a request to wake it up.
         get_generation(endpoint.url, "Wake up")
     i = 0
     while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
         if i >= 20:
+            logger.info("Model failed to respond. Exiting.")
             sys.exit()
+        logger.debug("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
         sleep(TIMEOUT)
         endpoint.fetch()
+        logger.debug("Endpoint status: %s." % (endpoint.status))
         i += 1
+    logger.info("Endpoint created:")
+    logger.info(endpoint)
     generation_url = endpoint.url
     return generation_url