Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -5,20 +5,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5 |
import uvicorn
|
6 |
import re
|
7 |
from dotenv import load_dotenv
|
8 |
-
import gradio as gr
|
9 |
-
from spaces import ZeroGPU
|
10 |
|
11 |
load_dotenv()
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
15 |
-
# Inicializar ZeroGPU
|
16 |
-
try:
|
17 |
-
ZeroGPU.initialize()
|
18 |
-
except Exception as e:
|
19 |
-
print(f"ZeroGPU initialization failed: {e}")
|
20 |
-
|
21 |
-
# Diccionario global para almacenar modelos y tokens
|
22 |
global_data = {
|
23 |
'models': {},
|
24 |
'tokens': {
|
@@ -69,22 +60,18 @@ class ModelManager:
|
|
69 |
def load_all_models(self):
|
70 |
if self.loaded:
|
71 |
return global_data['models']
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
return global_data['models']
|
85 |
-
except Exception as e:
|
86 |
-
print(f"Error loading models: {e}")
|
87 |
-
return {}
|
88 |
|
89 |
model_manager = ModelManager()
|
90 |
model_manager.load_all_models()
|
@@ -111,7 +98,6 @@ def remove_duplicates(text):
|
|
111 |
seen_lines.add(line)
|
112 |
return '\n'.join(unique_lines)
|
113 |
|
114 |
-
@spaces.GPU(duration=0)
|
115 |
def generate_model_response(model, inputs, top_k, top_p, temperature):
|
116 |
try:
|
117 |
response = model.generate(inputs, top_k=top_k, top_p=top_p, temperature=temperature)
|
@@ -124,11 +110,12 @@ def generate_model_response(model, inputs, top_k, top_p, temperature):
|
|
124 |
async def generate(request: ChatRequest):
|
125 |
try:
|
126 |
inputs = normalize_input(request.message)
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
132 |
unique_responses = remove_repetitive_responses(responses)
|
133 |
return unique_responses
|
134 |
except Exception as e:
|
@@ -152,4 +139,4 @@ def remove_repetitive_responses(responses):
|
|
152 |
return unique_responses
|
153 |
|
154 |
if __name__ == "__main__":
|
155 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
5 |
import uvicorn
|
6 |
import re
|
7 |
from dotenv import load_dotenv
|
|
|
|
|
8 |
|
9 |
load_dotenv()
|
10 |
|
11 |
app = FastAPI()
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
global_data = {
|
14 |
'models': {},
|
15 |
'tokens': {
|
|
|
60 |
def load_all_models(self):
|
61 |
if self.loaded:
|
62 |
return global_data['models']
|
63 |
+
|
64 |
+
with ThreadPoolExecutor() as executor:
|
65 |
+
futures = [executor.submit(self.load_model, config) for config in model_configs]
|
66 |
+
models = []
|
67 |
+
for future in as_completed(futures):
|
68 |
+
model = future.result()
|
69 |
+
if model:
|
70 |
+
models.append(model)
|
71 |
+
|
72 |
+
global_data['models'] = {model['name']: model['model'] for model in models}
|
73 |
+
self.loaded = True
|
74 |
+
return global_data['models']
|
|
|
|
|
|
|
|
|
75 |
|
76 |
model_manager = ModelManager()
|
77 |
model_manager.load_all_models()
|
|
|
98 |
seen_lines.add(line)
|
99 |
return '\n'.join(unique_lines)
|
100 |
|
|
|
101 |
def generate_model_response(model, inputs, top_k, top_p, temperature):
|
102 |
try:
|
103 |
response = model.generate(inputs, top_k=top_k, top_p=top_p, temperature=temperature)
|
|
|
110 |
async def generate(request: ChatRequest):
|
111 |
try:
|
112 |
inputs = normalize_input(request.message)
|
113 |
+
with ThreadPoolExecutor() as executor:
|
114 |
+
futures = [
|
115 |
+
executor.submit(generate_model_response, model, inputs, request.top_k, request.top_p, request.temperature)
|
116 |
+
for model in global_data['models'].values()
|
117 |
+
]
|
118 |
+
responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
|
119 |
unique_responses = remove_repetitive_responses(responses)
|
120 |
return unique_responses
|
121 |
except Exception as e:
|
|
|
139 |
return unique_responses
|
140 |
|
141 |
if __name__ == "__main__":
|
142 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|