Hjgugugjhuhjggg commited on
Commit
af41c5b
·
verified ·
1 Parent(s): 9acf06d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -18
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from pydantic import BaseModel
2
- from llama_cpp import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import re
5
  import httpx
@@ -14,7 +14,6 @@ import uvicorn
14
  from threading import Thread
15
 
16
  load_dotenv()
17
-
18
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
19
 
20
  global_data = {
@@ -30,7 +29,7 @@ global_data = {
30
  'mask': 'mask_token'
31
  },
32
  'model_metadata': {},
33
- 'max_tokens': {},
34
  'tokenizers': {},
35
  'model_params': {},
36
  'model_size': {},
@@ -103,10 +102,7 @@ class ModelManager:
103
  executor.submit(self.load_model, config)
104
  return self.models
105
 
106
-
107
  model_manager = ModelManager()
108
-
109
-
110
  global_data['models'] = model_manager.load_all_models()
111
 
112
  class ChatRequest(BaseModel):
@@ -135,14 +131,6 @@ def cache_response(func):
135
  return response
136
  return wrapper
137
 
138
-
139
- @cache_response
140
- def generate_model_response(model, inputs):
141
- try:
142
- response = model(inputs)
143
- except Exception as e:
144
- return ""
145
-
146
  @cache_response
147
  def generate_model_response(model, inputs):
148
  try:
@@ -179,11 +167,17 @@ app = FastAPI()
179
 
180
  @app.post("/generate")
181
  async def generate(request: ChatRequest):
182
- response = await process_message(request.message)
183
- return JSONResponse(content={"response": response})
 
 
 
184
 
185
  def run_uvicorn():
186
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
187
 
188
  iface = gr.Interface(
189
  fn=process_message,
@@ -193,10 +187,10 @@ iface = gr.Interface(
193
  description="Enter a message and get responses from multiple LLMs using CPU."
194
  )
195
 
196
-
197
  def run_gradio():
198
  iface.launch(server_port=7862, prevent_thread_lock=True)
199
 
200
  if __name__ == "__main__":
201
  Thread(target=run_uvicorn).start()
202
  Thread(target=run_gradio).start()
 
 
1
  from pydantic import BaseModel
2
+ from llama_cpp_agent import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import re
5
  import httpx
 
14
  from threading import Thread
15
 
16
  load_dotenv()
 
17
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
18
 
19
  global_data = {
 
29
  'mask': 'mask_token'
30
  },
31
  'model_metadata': {},
32
+ 'max_tokens': 256,
33
  'tokenizers': {},
34
  'model_params': {},
35
  'model_size': {},
 
102
  executor.submit(self.load_model, config)
103
  return self.models
104
 
 
105
  model_manager = ModelManager()
 
 
106
  global_data['models'] = model_manager.load_all_models()
107
 
108
  class ChatRequest(BaseModel):
 
131
  return response
132
  return wrapper
133
 
 
 
 
 
 
 
 
 
134
  @cache_response
135
  def generate_model_response(model, inputs):
136
  try:
 
167
 
168
  @app.post("/generate")
169
  async def generate(request: ChatRequest):
170
+ try:
171
+ response = await process_message(request.message)
172
+ return JSONResponse(content={"response": response})
173
+ except Exception as e:
174
+ return JSONResponse(content={"error": str(e)})
175
 
176
  def run_uvicorn():
177
+ try:
178
+ uvicorn.run(app, host="0.0.0.0", port=7860)
179
+ except Exception as e:
180
+ print(f"Error al ejecutar uvicorn: {e}")
181
 
182
  iface = gr.Interface(
183
  fn=process_message,
 
187
  description="Enter a message and get responses from multiple LLMs using CPU."
188
  )
189
 
 
190
  def run_gradio():
191
  iface.launch(server_port=7862, prevent_thread_lock=True)
192
 
193
  if __name__ == "__main__":
194
  Thread(target=run_uvicorn).start()
195
  Thread(target=run_gradio).start()
196
+ asyncio.get_event_loop().run_forever()