Hjgugugjhuhjggg commited on
Commit
abccdc4
·
verified ·
1 Parent(s): d0e7d36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -72
app.py CHANGED
@@ -2,9 +2,9 @@ from pydantic import BaseModel
2
  from llama_cpp import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import re
5
- import httpx
6
- import asyncio
7
- import gradio as gr
8
  import os
9
  from dotenv import load_dotenv
10
  import json
@@ -13,50 +13,40 @@ load_dotenv()
13
 
14
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
15
 
16
- global_data = {
17
- 'models': {},
18
- 'tokens': {
19
- 'eos': 'eos_token',
20
- 'pad': 'pad_token',
21
- 'padding': 'padding_token',
22
- 'unk': 'unk_token',
23
- 'bos': 'bos_token',
24
- 'sep': 'sep_token',
25
- 'cls': 'cls_token',
26
- 'mask': 'mask_token'
27
- }
28
- }
29
 
30
  model_configs = [
31
  {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
32
- # Add more models here as needed. Ensure the repo_id and filename are correct.
33
  ]
34
 
35
- class ModelManager:
36
- def __init__(self):
37
- self.models = {}
 
 
 
 
 
 
 
 
38
 
39
- def load_model(self, model_config):
40
- if model_config['name'] not in self.models:
41
- try:
42
- model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
43
- self.models[model_config['name']] = model
44
- print(f"Model '{model_config['name']}' loaded successfully.")
45
- except Exception as e:
46
- print(f"Error loading model {model_config['name']}: {e}")
47
- self.models[model_config['name']] = None # Indicate loading failure
48
 
49
 
50
- def load_all_models(self):
51
- with ThreadPoolExecutor() as executor:
52
- futures = [executor.submit(self.load_model, config) for config in model_configs]
53
- for future in as_completed(futures):
54
- future.result() # Propagate exceptions during loading
55
- return self.models
56
-
57
- model_manager = ModelManager()
58
- global_data['models'] = model_manager.load_all_models()
59
-
60
 
61
  class ChatRequest(BaseModel):
62
  message: str
@@ -69,7 +59,7 @@ def remove_duplicates(text):
69
  unique_lines = []
70
  seen_lines = set()
71
  for line in lines:
72
- line = line.strip() #Added to remove extra whitespace
73
  if line and line not in seen_lines:
74
  unique_lines.append(line)
75
  seen_lines.add(line)
@@ -77,54 +67,37 @@ def remove_duplicates(text):
77
 
78
  def generate_model_response(model, inputs):
79
  try:
80
- if model is None: #Handle failed model loading
81
  return ""
82
  response = model(inputs)
83
  return remove_duplicates(response['choices'][0]['text'])
84
  except Exception as e:
85
  print(f"Error generating model response: {e}")
86
- return f"Error: {e}" #Return informative error message
87
-
88
- def remove_repetitive_responses(responses):
89
- unique_responses = {}
90
- for response in responses:
91
- if response['model'] not in unique_responses and response['response']: #added check for empty responses
92
- unique_responses[response['model']] = response['response']
93
- return unique_responses
94
 
95
 
96
- async def process_message(message, history):
97
- inputs = normalize_input(message)
 
98
  with ThreadPoolExecutor() as executor:
99
  futures = [
100
  executor.submit(generate_model_response, model, inputs)
101
- for model in global_data['models'].values()
102
  ]
103
- responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
 
 
 
 
 
104
 
105
- unique_responses = remove_repetitive_responses(responses)
106
  formatted_response = ""
107
  for model, response in unique_responses.items():
108
  formatted_response += f"**{model}:**\n{response}\n\n"
109
 
110
- history.append((message, formatted_response))
111
- return history, ""
112
-
113
-
114
- iface = gr.Interface(
115
- fn=process_message,
116
- inputs=[
117
- gr.Textbox(lines=2, placeholder="Enter your message here..."),
118
- gr.State([])
119
- ],
120
- outputs=[
121
- gr.Chatbot(),
122
- gr.Textbox(label="cURL command", visible=False) #Hidden cURL command
123
- ],
124
- title="Multi-Model LLM API",
125
- description="Enter a message and get responses from multiple LLMs.",
126
- )
127
 
128
  if __name__ == "__main__":
129
  port = int(os.environ.get("PORT", 7860))
130
- iface.launch(server_port=port)
 
2
  from llama_cpp import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import re
5
+ import uvicorn
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
  import os
9
  from dotenv import load_dotenv
10
  import json
 
13
 
14
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
15
 
16
+ app = FastAPI()
17
+
18
+ origins = ["*"] # Adjust as needed for production
19
+
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=origins,
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
27
+
 
28
 
29
  model_configs = [
30
  {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
31
+ # Add more models here
32
  ]
33
 
34
+ models = {}
35
+ def load_model(model_config):
36
+ if model_config['name'] not in models:
37
+ try:
38
+ model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
39
+ models[model_config['name']] = model
40
+ print(f"Model '{model_config['name']}' loaded successfully.")
41
+ return model
42
+ except Exception as e:
43
+ print(f"Error loading model {model_config['name']}: {e}")
44
+ return None
45
 
46
+ for config in model_configs:
47
+ load_model(config) #Load models on startup
 
 
 
 
 
 
 
48
 
49
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  class ChatRequest(BaseModel):
52
  message: str
 
59
  unique_lines = []
60
  seen_lines = set()
61
  for line in lines:
62
+ line = line.strip()
63
  if line and line not in seen_lines:
64
  unique_lines.append(line)
65
  seen_lines.add(line)
 
67
 
68
  def generate_model_response(model, inputs):
69
  try:
70
+ if model is None:
71
  return ""
72
  response = model(inputs)
73
  return remove_duplicates(response['choices'][0]['text'])
74
  except Exception as e:
75
  print(f"Error generating model response: {e}")
76
+ return f"Error: {e}"
 
 
 
 
 
 
 
77
 
78
 
79
+ @app.post("/generate")
80
+ async def generate(request: ChatRequest):
81
+ inputs = normalize_input(request.message)
82
  with ThreadPoolExecutor() as executor:
83
  futures = [
84
  executor.submit(generate_model_response, model, inputs)
85
+ for model in models.values()
86
  ]
87
+ responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
88
+
89
+ unique_responses = {}
90
+ for response in responses:
91
+ if response['model'] not in unique_responses and response['response']:
92
+ unique_responses[response['model']] = response['response']
93
 
 
94
  formatted_response = ""
95
  for model, response in unique_responses.items():
96
  formatted_response += f"**{model}:**\n{response}\n\n"
97
 
98
+ return {"response": formatted_response}
99
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
102
  port = int(os.environ.get("PORT", 7860))
103
+ uvicorn.run(app, host="0.0.0.0", port=port)