Spaces:

Sevixdd
/

Sevixdd-roberta-base-finetuned-ner

Sleeping

App Files Files Community

Sevixdd commited on May 22, 2024

Commit

bd825b4

verified ·

1 Parent(s): 1c1dad8

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -19

app.py CHANGED Viewed

@@ -1,49 +1,177 @@
-import gradio as gr
 import logging
 import gradio as gr
 from queue import Queue
 import time
-from prometheus_client import start_http_server, Counter, Histogram
 # --- Prometheus Metrics Setup ---
 REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
 REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
 # --- Logging Setup ---
-logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # --- Queue and Metrics ---
-chat_queue = Queue()
 # --- Chat Function with Monitoring ---
 def chat_function(message, history):
     with REQUEST_LATENCY.time():
         REQUEST_COUNT.inc()
         try:
             start_time = time.time()
             chat_queue.put(message)
-            logging.info(f"User: {message}")
-            # ... (Your chatbot processing logic here) ...
-            time.sleep(2)  # Simulate processing delay
-            response = chat_queue.get()
-            logging.info(f"Bot: {response}")
             return response
         except Exception as e:
             logging.error(f"Error in chat processing: {e}")
             return "An error occurred. Please try again."
-# --- Gradio Interface ---
-with gr.Blocks() as demo:
-    gr.Markdown("## Chat with the Bot")
-    chatbot = gr.ChatInterface(fn=chat_function)
-# --- Start Prometheus Metrics Server ---
-start_http_server(8000)  # Expose metrics on port 8000
-gr.load("models/Sevixdd/roberta-base-finetuned-ner").launch()

 import logging
 import gradio as gr
 from queue import Queue
 import time
+from prometheus_client import start_http_server, Counter, Histogram, Gauge
+import threading
+import psutil
+import random
+from transformers import pipeline
+from time import gmtime, strftime
+# Load the model
+ner_pipeline = pipeline("ner", model="Sevixdd/roberta-base-finetuned-ner")
 # --- Prometheus Metrics Setup ---
 REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
 REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
+ERROR_COUNT = Counter('gradio_error_count', 'Total number of errors')
+RESPONSE_SIZE = Histogram('gradio_response_size_bytes', 'Size of responses in bytes')
+CPU_USAGE = Gauge('system_cpu_usage_percent', 'System CPU usage in percent')
+MEM_USAGE = Gauge('system_memory_usage_percent', 'System memory usage in percent')
+QUEUE_LENGTH = Gauge('chat_queue_length', 'Length of the chat queue')
 # --- Logging Setup ---
+logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
 # --- Queue and Metrics ---
+chat_queue = Queue()  # Define chat_queue globally
 # --- Chat Function with Monitoring ---
 def chat_function(message, history):
+    logging.debug("Starting chat_function")
     with REQUEST_LATENCY.time():
         REQUEST_COUNT.inc()
         try:
             start_time = time.time()
             chat_queue.put(message)
+            logging.info(f"Received message from user: {message}")
+            time = "\nGMT: " + time.strftime("%a, %d %b %Y %I:%M:%S %p %Z", time.gmtime())
+            ner_results = ner_pipeline(message)
+            logging.debug(f"NER results: {ner_results}")
+            detailed_response = []
+            for result in ner_results:
+                token = result['word']
+                score = result['score']
+                entity = result['entity']
+                start = result['start']
+                end = result['end']
+                detailed_response.append(f"Token: {token}, Entity: {entity}, Score: {score:.4f}, Start: {start}, End: {end}")
+            response = "\n".join(detailed_response)
+            logging.info(f"Generated response: {response}")
+            response_size = len(response.encode('utf-8'))
+            RESPONSE_SIZE.observe(response_size)
+            time.sleep(random.uniform(0.5, 2.5))  # Simulate processing time
+            chat_queue.get()
+            logging.debug("Finished processing message")
             return response
         except Exception as e:
+            ERROR_COUNT.inc()
             logging.error(f"Error in chat processing: {e}")
             return "An error occurred. Please try again."
+# Function to simulate stress test
+def stress_test(num_requests, message, delay):
+    def send_chat_message():
+        response = requests.post("http://127.0.0.1:7860/api/predict/", json={
+            "data": [message],
+            "fn_index": 0  # This might need to be updated based on your Gradio app's function index
+        })
+        logging.debug(response.json())
+    threads = []
+    for _ in range(num_requests):
+        t = threading.Thread(target=send_chat_message)
+        t.start()
+        threads.append(t)
+        time.sleep(delay)  # Delay between requests
+    for t in threads:
+        t.join()
+# --- Gradio Interface with Background Image and Three Windows ---
+with gr.Blocks(css="""
+body {
+    background-image: url("stag.jpeg");
+    background-size: cover;
+    background-repeat: no-repeat;
+}
+""", title="PLOD Filtered with Monitoring") as demo:  # Load CSS for background image
+    with gr.Tab("Chat"):
+        gr.Markdown("## Chat with the Bot")
+        chatbot = gr.ChatInterface(fn=chat_function)
+    with gr.Tab("Model Parameters"):
+        model_params_display = gr.Textbox(label="Model Parameters", lines=20, interactive=False)  # Display model parameters
+    with gr.Tab("Performance Metrics"):
+        request_count_display = gr.Number(label="Request Count", value=0)
+        avg_latency_display = gr.Number(label="Avg. Response Time (s)", value=0)
+    with gr.Tab("Infrastructure"):
+        cpu_usage_display = gr.Number(label="CPU Usage (%)", value=0)
+        mem_usage_display = gr.Number(label="Memory Usage (%)", value=0)
+    with gr.Tab("Logs"):
+        logs_display = gr.Textbox(label="Logs", lines=10)  # Increased lines for better visibility
+    with gr.Tab("Stress Testing"):
+        num_requests_input = gr.Number(label="Number of Requests", value=10)
+        message_input = gr.Textbox(label="Message", value="Hello bot!")
+        delay_input = gr.Number(label="Delay Between Requests (seconds)", value=0.1)
+        stress_test_button = gr.Button("Start Stress Test")
+        stress_test_status = gr.Textbox(label="Stress Test Status", lines=5, interactive=False)
+        def run_stress_test(num_requests, message, delay):
+            stress_test_status.value = "Stress test started..."
+            try:
+                stress_test(num_requests, message, delay)
+                stress_test_status.value = "Stress test completed."
+            except Exception as e:
+                stress_test_status.value = f"Stress test failed: {e}"
+        stress_test_button.click(run_stress_test, [num_requests_input, message_input, delay_input], stress_test_status)
+    # --- Update Functions ---
+    def update_metrics(request_count_display, avg_latency_display):
+        while True:
+            request_count = REQUEST_COUNT._value.get()
+            latency_samples = REQUEST_LATENCY.collect()[0].samples
+            avg_latency = sum(s.value for s in latency_samples) / len(latency_samples) if latency_samples else 0
+            request_count_display.value = request_count
+            avg_latency_display.value = round(avg_latency, 2)
+            time.sleep(5)  # Update every 5 seconds
+    def update_usage(cpu_usage_display, mem_usage_display):
+        while True:
+            cpu_usage_display.value = psutil.cpu_percent()
+            mem_usage_display.value = psutil.virtual_memory().percent
+            CPU_USAGE.set(psutil.cpu_percent())
+            MEM_USAGE.set(psutil.virtual_memory().percent)
+            time.sleep(5)
+    def update_logs(logs_display):
+        while True:
+            with open("chat_log.txt", "r") as log_file:
+                logs = log_file.readlines()
+                logs_display.value = "".join(logs[-10:])  # Display last 10 lines
+            time.sleep(1)  # Update every 1 second
+    def display_model_params(model_params_display):
+        while True:
+            model_params = ner_pipeline.model.config.to_dict()
+            model_params_str = "\n".join(f"{key}: {value}" for key, value in model_params.items())
+            model_params_display.value = model_params_str
+            time.sleep(10)  # Update every 10 seconds
+    def update_queue_length():
+        while True:
+            QUEUE_LENGTH.set(chat_queue.qsize())
+            time.sleep(1)  # Update every second
+    # --- Start Threads ---
+    threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
+    threading.Thread(target=update_metrics, args=(request_count_display, avg_latency_display), daemon=True).start()
+    threading.Thread(target=update_usage, args=(cpu_usage_display, mem_usage_display), daemon=True).start()
+    threading.Thread(target=update_logs, args=(logs_display), daemon=True).start()
+    threading.Thread(target=display_model_params, args=(model_params_display,), daemon=True).start()
+    threading.Thread(target=update_queue_length, daemon=True).start()
+# Launch the app
+demo.launch(share=True)