Hjgugugjhuhjggg commited on
Commit
5b90b0d
·
verified ·
1 Parent(s): ef9123a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -15
app.py CHANGED
@@ -1,21 +1,24 @@
 
1
  from pydantic import BaseModel
2
- from llama_cpp import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import re
5
  import httpx
6
  import asyncio
7
  import gradio as gr
8
  import os
9
- import gptcache
10
  from dotenv import load_dotenv
11
  from fastapi import FastAPI, Request
12
  from fastapi.responses import JSONResponse
13
  import uvicorn
14
  from threading import Thread
 
15
 
16
  load_dotenv()
17
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
18
 
 
 
19
  global_data = {
20
  'models': {},
21
  'tokens': {
@@ -124,10 +127,10 @@ def remove_duplicates(text):
124
  def cache_response(func):
125
  def wrapper(*args, **kwargs):
126
  cache_key = f"{args}-{kwargs}"
127
- if gptcache.get(cache_key):
128
- return gptcache.get(cache_key)
129
  response = func(*args, **kwargs)
130
- gptcache.set(cache_key, response)
131
  return response
132
  return wrapper
133
 
@@ -155,13 +158,13 @@ async def process_message(message):
155
  ]
156
  responses = [
157
  {'model': model_name, 'response': future.result()}
158
- for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
159
- ]
160
- unique_responses = remove_repetitive_responses(responses)
161
- formatted_response = ""
162
- for model, response in unique_responses.items():
163
- formatted_response += f"**{model}:**\n{response}\n\n"
164
- return formatted_response
165
 
166
  app = FastAPI()
167
 
@@ -175,7 +178,7 @@ async def generate(request: ChatRequest):
175
 
176
  def run_uvicorn():
177
  try:
178
- uvicorn.run(app, host="0.0.0.0", port=7860)
179
  except Exception as e:
180
  print(f"Error al ejecutar uvicorn: {e}")
181
 
@@ -184,7 +187,7 @@ iface = gr.Interface(
184
  inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
185
  outputs=gr.Markdown(),
186
  title="Multi-Model LLM API (CPU Optimized)",
187
- description="Enter a message and get responses from multiple LLMs using CPU."
188
  )
189
 
190
  def run_gradio():
@@ -193,4 +196,4 @@ def run_gradio():
193
  if __name__ == "__main__":
194
  Thread(target=run_uvicorn).start()
195
  Thread(target=run_gradio).start()
196
- asyncio.get_event_loop().run_forever()
 
1
+ import cachetools
2
  from pydantic import BaseModel
3
+ from llama_cpp_agent import Llama
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  import re
6
  import httpx
7
  import asyncio
8
  import gradio as gr
9
  import os
 
10
  from dotenv import load_dotenv
11
  from fastapi import FastAPI, Request
12
  from fastapi.responses import JSONResponse
13
  import uvicorn
14
  from threading import Thread
15
+ import gptcache
16
 
17
  load_dotenv()
18
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
19
 
20
+ cache = cachetools.TTLCache(maxsize=100, ttl=60)
21
+
22
  global_data = {
23
  'models': {},
24
  'tokens': {
 
127
  def cache_response(func):
128
  def wrapper(*args, **kwargs):
129
  cache_key = f"{args}-{kwargs}"
130
+ if cache_key in cache:
131
+ return cache[cache_key]
132
  response = func(*args, **kwargs)
133
+ cache[cache_key] = response
134
  return response
135
  return wrapper
136
 
 
158
  ]
159
  responses = [
160
  {'model': model_name, 'response': future.result()}
161
+ for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
162
+ ]
163
+ unique_responses = remove_repetitive_responses(responses)
164
+ formatted_response = ""
165
+ for model, response in unique_responses.items():
166
+ formatted_response += f"**{model}:**\n{response}\n\n"
167
+ return formatted_response
168
 
169
  app = FastAPI()
170
 
 
178
 
179
  def run_uvicorn():
180
  try:
181
+ uvicorn.run(app, host="0.0.0.0", port=7861)
182
  except Exception as e:
183
  print(f"Error al ejecutar uvicorn: {e}")
184
 
 
187
  inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
188
  outputs=gr.Markdown(),
189
  title="Multi-Model LLM API (CPU Optimized)",
190
+ description=""
191
  )
192
 
193
  def run_gradio():
 
196
  if __name__ == "__main__":
197
  Thread(target=run_uvicorn).start()
198
  Thread(target=run_gradio).start()
199
+ asyncio.get_event_loop().run_forever()