Uhhy commited on
Commit
ec80f26
verified
1 Parent(s): 1b83353

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -12,8 +12,9 @@ load_dotenv()
12
 
13
  app = FastAPI()
14
 
 
15
  global_data = {
16
- 'models': [],
17
  'tokens': {
18
  'eos': 'eos_token',
19
  'pad': 'pad_token',
@@ -26,6 +27,7 @@ global_data = {
26
  }
27
  }
28
 
 
29
  model_configs = [
30
  {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
31
  {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
@@ -50,7 +52,6 @@ model_configs = [
50
 
51
  class ModelManager:
52
  def __init__(self):
53
- self.models = []
54
  self.loaded = False
55
 
56
  def load_model(self, model_config):
@@ -61,7 +62,7 @@ class ModelManager:
61
 
62
  def load_all_models(self):
63
  if self.loaded:
64
- return self.models
65
 
66
  with ThreadPoolExecutor() as executor:
67
  futures = [executor.submit(self.load_model, config) for config in model_configs]
@@ -71,9 +72,9 @@ class ModelManager:
71
  if model:
72
  models.append(model)
73
 
74
- self.models = models
75
  self.loaded = True
76
- return self.models
77
 
78
  model_manager = ModelManager()
79
 
@@ -126,6 +127,7 @@ def select_best_response(responses):
126
  return max(set(responses), key=lambda x: x['response'].count("user"))
127
 
128
  @app.post("/generate")
 
129
  def generate_chat(request: ChatRequest):
130
  try:
131
  global_data['models'] = model_manager.load_all_models()
 
12
 
13
  app = FastAPI()
14
 
15
+ # Diccionario global para almacenar modelos y tokens
16
  global_data = {
17
+ 'models': {},
18
  'tokens': {
19
  'eos': 'eos_token',
20
  'pad': 'pad_token',
 
27
  }
28
  }
29
 
30
+ # Configuraci贸n de modelos
31
  model_configs = [
32
  {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
33
  {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
 
52
 
53
  class ModelManager:
54
  def __init__(self):
 
55
  self.loaded = False
56
 
57
  def load_model(self, model_config):
 
62
 
63
  def load_all_models(self):
64
  if self.loaded:
65
+ return global_data['models']
66
 
67
  with ThreadPoolExecutor() as executor:
68
  futures = [executor.submit(self.load_model, config) for config in model_configs]
 
72
  if model:
73
  models.append(model)
74
 
75
+ global_data['models'] = models
76
  self.loaded = True
77
+ return models
78
 
79
  model_manager = ModelManager()
80
 
 
127
  return max(set(responses), key=lambda x: x['response'].count("user"))
128
 
129
  @app.post("/generate")
130
+ @spaces.GPU(duration=0)
131
  def generate_chat(request: ChatRequest):
132
  try:
133
  global_data['models'] = model_manager.load_all_models()