Uhhy commited on
Commit
95ffb37
·
verified ·
1 Parent(s): 822516c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -32
app.py CHANGED
@@ -5,20 +5,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
  import uvicorn
6
  import re
7
  from dotenv import load_dotenv
8
- import gradio as gr
9
- from spaces import ZeroGPU
10
 
11
  load_dotenv()
12
 
13
  app = FastAPI()
14
 
15
- # Inicializar ZeroGPU
16
- try:
17
- ZeroGPU.initialize()
18
- except Exception as e:
19
- print(f"ZeroGPU initialization failed: {e}")
20
-
21
- # Diccionario global para almacenar modelos y tokens
22
  global_data = {
23
  'models': {},
24
  'tokens': {
@@ -69,22 +60,18 @@ class ModelManager:
69
  def load_all_models(self):
70
  if self.loaded:
71
  return global_data['models']
72
-
73
- try:
74
- with ThreadPoolExecutor() as executor:
75
- futures = [executor.submit(self.load_model, config) for config in model_configs]
76
- models = []
77
- for future in as_completed(futures):
78
- model = future.result()
79
- if model:
80
- models.append(model)
81
-
82
- global_data['models'] = {model['name']: model['model'] for model in models}
83
- self.loaded = True
84
- return global_data['models']
85
- except Exception as e:
86
- print(f"Error loading models: {e}")
87
- return {}
88
 
89
  model_manager = ModelManager()
90
  model_manager.load_all_models()
@@ -111,7 +98,6 @@ def remove_duplicates(text):
111
  seen_lines.add(line)
112
  return '\n'.join(unique_lines)
113
 
114
- @spaces.GPU(duration=0)
115
  def generate_model_response(model, inputs, top_k, top_p, temperature):
116
  try:
117
  response = model.generate(inputs, top_k=top_k, top_p=top_p, temperature=temperature)
@@ -124,11 +110,12 @@ def generate_model_response(model, inputs, top_k, top_p, temperature):
124
  async def generate(request: ChatRequest):
125
  try:
126
  inputs = normalize_input(request.message)
127
- futures = [
128
- executor.submit(generate_model_response, model, inputs, request.top_k, request.top_p, request.temperature)
129
- for model in global_data['models'].values()
130
- ]
131
- responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
 
132
  unique_responses = remove_repetitive_responses(responses)
133
  return unique_responses
134
  except Exception as e:
@@ -152,4 +139,4 @@ def remove_repetitive_responses(responses):
152
  return unique_responses
153
 
154
  if __name__ == "__main__":
155
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
5
  import uvicorn
6
  import re
7
  from dotenv import load_dotenv
 
 
8
 
9
  load_dotenv()
10
 
11
  app = FastAPI()
12
 
 
 
 
 
 
 
 
13
  global_data = {
14
  'models': {},
15
  'tokens': {
 
60
  def load_all_models(self):
61
  if self.loaded:
62
  return global_data['models']
63
+
64
+ with ThreadPoolExecutor() as executor:
65
+ futures = [executor.submit(self.load_model, config) for config in model_configs]
66
+ models = []
67
+ for future in as_completed(futures):
68
+ model = future.result()
69
+ if model:
70
+ models.append(model)
71
+
72
+ global_data['models'] = {model['name']: model['model'] for model in models}
73
+ self.loaded = True
74
+ return global_data['models']
 
 
 
 
75
 
76
  model_manager = ModelManager()
77
  model_manager.load_all_models()
 
98
  seen_lines.add(line)
99
  return '\n'.join(unique_lines)
100
 
 
101
  def generate_model_response(model, inputs, top_k, top_p, temperature):
102
  try:
103
  response = model.generate(inputs, top_k=top_k, top_p=top_p, temperature=temperature)
 
110
  async def generate(request: ChatRequest):
111
  try:
112
  inputs = normalize_input(request.message)
113
+ with ThreadPoolExecutor() as executor:
114
+ futures = [
115
+ executor.submit(generate_model_response, model, inputs, request.top_k, request.top_p, request.temperature)
116
+ for model in global_data['models'].values()
117
+ ]
118
+ responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
119
  unique_responses = remove_repetitive_responses(responses)
120
  return unique_responses
121
  except Exception as e:
 
139
  return unique_responses
140
 
141
  if __name__ == "__main__":
142
+ uvicorn.run(app, host="0.0.0.0", port=7860)