Sevixdd commited on
Commit
bd825b4
·
verified ·
1 Parent(s): 1c1dad8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -19
app.py CHANGED
@@ -1,49 +1,177 @@
1
- import gradio as gr
2
-
3
-
4
  import logging
5
  import gradio as gr
6
  from queue import Queue
7
  import time
8
- from prometheus_client import start_http_server, Counter, Histogram
 
 
 
 
 
 
 
9
 
10
  # --- Prometheus Metrics Setup ---
11
  REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
12
  REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
 
 
 
 
 
13
 
14
  # --- Logging Setup ---
15
- logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
 
17
  # --- Queue and Metrics ---
18
- chat_queue = Queue()
19
 
20
  # --- Chat Function with Monitoring ---
21
  def chat_function(message, history):
 
22
  with REQUEST_LATENCY.time():
23
  REQUEST_COUNT.inc()
24
-
25
  try:
26
  start_time = time.time()
27
  chat_queue.put(message)
28
- logging.info(f"User: {message}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # ... (Your chatbot processing logic here) ...
31
- time.sleep(2) # Simulate processing delay
32
- response = chat_queue.get()
33
- logging.info(f"Bot: {response}")
34
 
 
 
 
 
35
  return response
36
  except Exception as e:
 
37
  logging.error(f"Error in chat processing: {e}")
38
  return "An error occurred. Please try again."
39
 
40
- # --- Gradio Interface ---
41
- with gr.Blocks() as demo:
42
- gr.Markdown("## Chat with the Bot")
43
- chatbot = gr.ChatInterface(fn=chat_function)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # --- Start Prometheus Metrics Server ---
46
- start_http_server(8000) # Expose metrics on port 8000
 
 
47
 
48
- gr.load("models/Sevixdd/roberta-base-finetuned-ner").launch()
 
 
 
 
 
 
49
 
 
 
 
 
 
 
1
  import logging
2
  import gradio as gr
3
  from queue import Queue
4
  import time
5
+ from prometheus_client import start_http_server, Counter, Histogram, Gauge
6
+ import threading
7
+ import psutil
8
+ import random
9
+ from transformers import pipeline
10
+ from time import gmtime, strftime
11
+ # Load the model
12
+ ner_pipeline = pipeline("ner", model="Sevixdd/roberta-base-finetuned-ner")
13
 
14
  # --- Prometheus Metrics Setup ---
15
  REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
16
  REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
17
+ ERROR_COUNT = Counter('gradio_error_count', 'Total number of errors')
18
+ RESPONSE_SIZE = Histogram('gradio_response_size_bytes', 'Size of responses in bytes')
19
+ CPU_USAGE = Gauge('system_cpu_usage_percent', 'System CPU usage in percent')
20
+ MEM_USAGE = Gauge('system_memory_usage_percent', 'System memory usage in percent')
21
+ QUEUE_LENGTH = Gauge('chat_queue_length', 'Length of the chat queue')
22
 
23
  # --- Logging Setup ---
24
+ logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
25
 
26
  # --- Queue and Metrics ---
27
+ chat_queue = Queue() # Define chat_queue globally
28
 
29
  # --- Chat Function with Monitoring ---
30
  def chat_function(message, history):
31
+ logging.debug("Starting chat_function")
32
  with REQUEST_LATENCY.time():
33
  REQUEST_COUNT.inc()
 
34
  try:
35
  start_time = time.time()
36
  chat_queue.put(message)
37
+ logging.info(f"Received message from user: {message}")
38
+ time = "\nGMT: " + time.strftime("%a, %d %b %Y %I:%M:%S %p %Z", time.gmtime())
39
+ ner_results = ner_pipeline(message)
40
+ logging.debug(f"NER results: {ner_results}")
41
+
42
+ detailed_response = []
43
+ for result in ner_results:
44
+ token = result['word']
45
+ score = result['score']
46
+ entity = result['entity']
47
+ start = result['start']
48
+ end = result['end']
49
+ detailed_response.append(f"Token: {token}, Entity: {entity}, Score: {score:.4f}, Start: {start}, End: {end}")
50
+
51
+ response = "\n".join(detailed_response)
52
+ logging.info(f"Generated response: {response}")
53
 
54
+ response_size = len(response.encode('utf-8'))
55
+ RESPONSE_SIZE.observe(response_size)
 
 
56
 
57
+ time.sleep(random.uniform(0.5, 2.5)) # Simulate processing time
58
+
59
+ chat_queue.get()
60
+ logging.debug("Finished processing message")
61
  return response
62
  except Exception as e:
63
+ ERROR_COUNT.inc()
64
  logging.error(f"Error in chat processing: {e}")
65
  return "An error occurred. Please try again."
66
 
67
+ # Function to simulate stress test
68
+ def stress_test(num_requests, message, delay):
69
+ def send_chat_message():
70
+ response = requests.post("http://127.0.0.1:7860/api/predict/", json={
71
+ "data": [message],
72
+ "fn_index": 0 # This might need to be updated based on your Gradio app's function index
73
+ })
74
+ logging.debug(response.json())
75
+
76
+ threads = []
77
+ for _ in range(num_requests):
78
+ t = threading.Thread(target=send_chat_message)
79
+ t.start()
80
+ threads.append(t)
81
+ time.sleep(delay) # Delay between requests
82
+
83
+ for t in threads:
84
+ t.join()
85
+
86
+ # --- Gradio Interface with Background Image and Three Windows ---
87
+ with gr.Blocks(css="""
88
+ body {
89
+ background-image: url("stag.jpeg");
90
+ background-size: cover;
91
+ background-repeat: no-repeat;
92
+ }
93
+ """, title="PLOD Filtered with Monitoring") as demo: # Load CSS for background image
94
+ with gr.Tab("Chat"):
95
+ gr.Markdown("## Chat with the Bot")
96
+ chatbot = gr.ChatInterface(fn=chat_function)
97
+
98
+ with gr.Tab("Model Parameters"):
99
+ model_params_display = gr.Textbox(label="Model Parameters", lines=20, interactive=False) # Display model parameters
100
+
101
+ with gr.Tab("Performance Metrics"):
102
+ request_count_display = gr.Number(label="Request Count", value=0)
103
+ avg_latency_display = gr.Number(label="Avg. Response Time (s)", value=0)
104
+
105
+ with gr.Tab("Infrastructure"):
106
+ cpu_usage_display = gr.Number(label="CPU Usage (%)", value=0)
107
+ mem_usage_display = gr.Number(label="Memory Usage (%)", value=0)
108
+
109
+ with gr.Tab("Logs"):
110
+ logs_display = gr.Textbox(label="Logs", lines=10) # Increased lines for better visibility
111
+
112
+ with gr.Tab("Stress Testing"):
113
+ num_requests_input = gr.Number(label="Number of Requests", value=10)
114
+ message_input = gr.Textbox(label="Message", value="Hello bot!")
115
+ delay_input = gr.Number(label="Delay Between Requests (seconds)", value=0.1)
116
+ stress_test_button = gr.Button("Start Stress Test")
117
+ stress_test_status = gr.Textbox(label="Stress Test Status", lines=5, interactive=False)
118
+
119
+ def run_stress_test(num_requests, message, delay):
120
+ stress_test_status.value = "Stress test started..."
121
+ try:
122
+ stress_test(num_requests, message, delay)
123
+ stress_test_status.value = "Stress test completed."
124
+ except Exception as e:
125
+ stress_test_status.value = f"Stress test failed: {e}"
126
+
127
+ stress_test_button.click(run_stress_test, [num_requests_input, message_input, delay_input], stress_test_status)
128
+
129
+ # --- Update Functions ---
130
+ def update_metrics(request_count_display, avg_latency_display):
131
+ while True:
132
+ request_count = REQUEST_COUNT._value.get()
133
+ latency_samples = REQUEST_LATENCY.collect()[0].samples
134
+ avg_latency = sum(s.value for s in latency_samples) / len(latency_samples) if latency_samples else 0
135
+
136
+ request_count_display.value = request_count
137
+ avg_latency_display.value = round(avg_latency, 2)
138
+
139
+ time.sleep(5) # Update every 5 seconds
140
+
141
+ def update_usage(cpu_usage_display, mem_usage_display):
142
+ while True:
143
+ cpu_usage_display.value = psutil.cpu_percent()
144
+ mem_usage_display.value = psutil.virtual_memory().percent
145
+ CPU_USAGE.set(psutil.cpu_percent())
146
+ MEM_USAGE.set(psutil.virtual_memory().percent)
147
+ time.sleep(5)
148
+
149
+ def update_logs(logs_display):
150
+ while True:
151
+ with open("chat_log.txt", "r") as log_file:
152
+ logs = log_file.readlines()
153
+ logs_display.value = "".join(logs[-10:]) # Display last 10 lines
154
+ time.sleep(1) # Update every 1 second
155
+
156
+ def display_model_params(model_params_display):
157
+ while True:
158
+ model_params = ner_pipeline.model.config.to_dict()
159
+ model_params_str = "\n".join(f"{key}: {value}" for key, value in model_params.items())
160
+ model_params_display.value = model_params_str
161
+ time.sleep(10) # Update every 10 seconds
162
 
163
+ def update_queue_length():
164
+ while True:
165
+ QUEUE_LENGTH.set(chat_queue.qsize())
166
+ time.sleep(1) # Update every second
167
 
168
+ # --- Start Threads ---
169
+ threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
170
+ threading.Thread(target=update_metrics, args=(request_count_display, avg_latency_display), daemon=True).start()
171
+ threading.Thread(target=update_usage, args=(cpu_usage_display, mem_usage_display), daemon=True).start()
172
+ threading.Thread(target=update_logs, args=(logs_display), daemon=True).start()
173
+ threading.Thread(target=display_model_params, args=(model_params_display,), daemon=True).start()
174
+ threading.Thread(target=update_queue_length, daemon=True).start()
175
 
176
+ # Launch the app
177
+ demo.launch(share=True)