Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ from llama_cpp import Llama
|
|
4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5 |
import uvicorn
|
6 |
import huggingface_hub
|
7 |
-
import spaces
|
8 |
import re
|
9 |
from dotenv import load_dotenv
|
10 |
|
@@ -75,6 +74,7 @@ class ModelManager:
|
|
75 |
return models
|
76 |
|
77 |
model_manager = ModelManager()
|
|
|
78 |
|
79 |
class ChatRequest(BaseModel):
|
80 |
message: str
|
@@ -119,10 +119,8 @@ def remove_repetitive_responses(responses):
|
|
119 |
return unique_responses
|
120 |
|
121 |
@app.post("/generate")
|
122 |
-
@spaces.GPU(duration=0)
|
123 |
async def generate(request: ChatRequest):
|
124 |
try:
|
125 |
-
global_data['models'] = model_manager.load_all_models()
|
126 |
responses = []
|
127 |
with ThreadPoolExecutor() as executor:
|
128 |
futures = [executor.submit(generate_chat_response, request, model_data) for model_data in global_data['models']]
|
|
|
4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5 |
import uvicorn
|
6 |
import huggingface_hub
|
|
|
7 |
import re
|
8 |
from dotenv import load_dotenv
|
9 |
|
|
|
74 |
return models
|
75 |
|
76 |
model_manager = ModelManager()
|
77 |
+
model_manager.load_all_models() # Cargar modelos una vez al inicio
|
78 |
|
79 |
class ChatRequest(BaseModel):
|
80 |
message: str
|
|
|
119 |
return unique_responses
|
120 |
|
121 |
@app.post("/generate")
|
|
|
122 |
async def generate(request: ChatRequest):
|
123 |
try:
|
|
|
124 |
responses = []
|
125 |
with ThreadPoolExecutor() as executor:
|
126 |
futures = [executor.submit(generate_chat_response, request, model_data) for model_data in global_data['models']]
|