Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
-
from
|
| 3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 4 |
import re
|
| 5 |
import httpx
|
|
@@ -14,7 +14,6 @@ import uvicorn
|
|
| 14 |
from threading import Thread
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
-
|
| 18 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 19 |
|
| 20 |
global_data = {
|
|
@@ -30,7 +29,7 @@ global_data = {
|
|
| 30 |
'mask': 'mask_token'
|
| 31 |
},
|
| 32 |
'model_metadata': {},
|
| 33 |
-
'max_tokens':
|
| 34 |
'tokenizers': {},
|
| 35 |
'model_params': {},
|
| 36 |
'model_size': {},
|
|
@@ -103,10 +102,7 @@ class ModelManager:
|
|
| 103 |
executor.submit(self.load_model, config)
|
| 104 |
return self.models
|
| 105 |
|
| 106 |
-
|
| 107 |
model_manager = ModelManager()
|
| 108 |
-
|
| 109 |
-
|
| 110 |
global_data['models'] = model_manager.load_all_models()
|
| 111 |
|
| 112 |
class ChatRequest(BaseModel):
|
|
@@ -135,14 +131,6 @@ def cache_response(func):
|
|
| 135 |
return response
|
| 136 |
return wrapper
|
| 137 |
|
| 138 |
-
|
| 139 |
-
@cache_response
|
| 140 |
-
def generate_model_response(model, inputs):
|
| 141 |
-
try:
|
| 142 |
-
response = model(inputs)
|
| 143 |
-
except Exception as e:
|
| 144 |
-
return ""
|
| 145 |
-
|
| 146 |
@cache_response
|
| 147 |
def generate_model_response(model, inputs):
|
| 148 |
try:
|
|
@@ -179,11 +167,17 @@ app = FastAPI()
|
|
| 179 |
|
| 180 |
@app.post("/generate")
|
| 181 |
async def generate(request: ChatRequest):
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
def run_uvicorn():
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
iface = gr.Interface(
|
| 189 |
fn=process_message,
|
|
@@ -193,10 +187,10 @@ iface = gr.Interface(
|
|
| 193 |
description="Enter a message and get responses from multiple LLMs using CPU."
|
| 194 |
)
|
| 195 |
|
| 196 |
-
|
| 197 |
def run_gradio():
|
| 198 |
iface.launch(server_port=7862, prevent_thread_lock=True)
|
| 199 |
|
| 200 |
if __name__ == "__main__":
|
| 201 |
Thread(target=run_uvicorn).start()
|
| 202 |
Thread(target=run_gradio).start()
|
|
|
|
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
+
from llama_cpp_agent import Llama
|
| 3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 4 |
import re
|
| 5 |
import httpx
|
|
|
|
| 14 |
from threading import Thread
|
| 15 |
|
| 16 |
load_dotenv()
|
|
|
|
| 17 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 18 |
|
| 19 |
global_data = {
|
|
|
|
| 29 |
'mask': 'mask_token'
|
| 30 |
},
|
| 31 |
'model_metadata': {},
|
| 32 |
+
'max_tokens': 256,
|
| 33 |
'tokenizers': {},
|
| 34 |
'model_params': {},
|
| 35 |
'model_size': {},
|
|
|
|
| 102 |
executor.submit(self.load_model, config)
|
| 103 |
return self.models
|
| 104 |
|
|
|
|
| 105 |
model_manager = ModelManager()
|
|
|
|
|
|
|
| 106 |
global_data['models'] = model_manager.load_all_models()
|
| 107 |
|
| 108 |
class ChatRequest(BaseModel):
|
|
|
|
| 131 |
return response
|
| 132 |
return wrapper
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
@cache_response
|
| 135 |
def generate_model_response(model, inputs):
|
| 136 |
try:
|
|
|
|
| 167 |
|
| 168 |
@app.post("/generate")
|
| 169 |
async def generate(request: ChatRequest):
|
| 170 |
+
try:
|
| 171 |
+
response = await process_message(request.message)
|
| 172 |
+
return JSONResponse(content={"response": response})
|
| 173 |
+
except Exception as e:
|
| 174 |
+
return JSONResponse(content={"error": str(e)})
|
| 175 |
|
| 176 |
def run_uvicorn():
|
| 177 |
+
try:
|
| 178 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 179 |
+
except Exception as e:
|
| 180 |
+
print(f"Error al ejecutar uvicorn: {e}")
|
| 181 |
|
| 182 |
iface = gr.Interface(
|
| 183 |
fn=process_message,
|
|
|
|
| 187 |
description="Enter a message and get responses from multiple LLMs using CPU."
|
| 188 |
)
|
| 189 |
|
|
|
|
| 190 |
def run_gradio():
|
| 191 |
iface.launch(server_port=7862, prevent_thread_lock=True)
|
| 192 |
|
| 193 |
if __name__ == "__main__":
|
| 194 |
Thread(target=run_uvicorn).start()
|
| 195 |
Thread(target=run_gradio).start()
|
| 196 |
+
asyncio.get_event_loop().run_forever()
|