Hjgugugjhuhjggg commited on
Commit
678a7bb
·
verified ·
1 Parent(s): abccdc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -28
app.py CHANGED
@@ -1,4 +1,3 @@
1
- from pydantic import BaseModel
2
  from llama_cpp import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import re
@@ -7,45 +6,49 @@ from fastapi import FastAPI, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
8
  import os
9
  from dotenv import load_dotenv
10
- import json
 
 
 
11
 
12
  load_dotenv()
13
-
14
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
15
 
16
- app = FastAPI()
17
-
18
- origins = ["*"] # Adjust as needed for production
19
-
20
- app.add_middleware(
21
- CORSMiddleware,
22
- allow_origins=origins,
23
- allow_credentials=True,
24
- allow_methods=["*"],
25
- allow_headers=["*"],
26
- )
27
-
 
28
 
29
  model_configs = [
30
  {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
31
- # Add more models here
32
  ]
33
 
34
  models = {}
 
35
  def load_model(model_config):
36
- if model_config['name'] not in models:
 
37
  try:
38
  model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
39
- models[model_config['name']] = model
40
- print(f"Model '{model_config['name']}' loaded successfully.")
41
  return model
42
  except Exception as e:
43
- print(f"Error loading model {model_config['name']}: {e}")
 
44
  return None
45
 
46
  for config in model_configs:
47
- load_model(config) #Load models on startup
48
-
49
 
50
 
51
  class ChatRequest(BaseModel):
@@ -72,18 +75,24 @@ def generate_model_response(model, inputs):
72
  response = model(inputs)
73
  return remove_duplicates(response['choices'][0]['text'])
74
  except Exception as e:
75
- print(f"Error generating model response: {e}")
76
  return f"Error: {e}"
77
 
 
 
 
 
 
 
 
 
 
78
 
79
  @app.post("/generate")
80
  async def generate(request: ChatRequest):
81
  inputs = normalize_input(request.message)
82
  with ThreadPoolExecutor() as executor:
83
- futures = [
84
- executor.submit(generate_model_response, model, inputs)
85
- for model in models.values()
86
- ]
87
  responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
88
 
89
  unique_responses = {}
@@ -97,7 +106,28 @@ async def generate(request: ChatRequest):
97
 
98
  return {"response": formatted_response}
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
102
  port = int(os.environ.get("PORT", 7860))
103
- uvicorn.run(app, host="0.0.0.0", port=port)
 
 
 
1
  from llama_cpp import Llama
2
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
  import re
 
6
  from fastapi.middleware.cors import CORSMiddleware
7
  import os
8
  from dotenv import load_dotenv
9
+ import gradio as gr
10
+ import requests
11
+ import asyncio
12
+ from pydantic import BaseModel
13
 
14
  load_dotenv()
 
15
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
16
 
17
+ global_data = {
18
+ 'models': {},
19
+ 'tokens': {
20
+ 'eos': 'eos_token',
21
+ 'pad': 'pad_token',
22
+ 'padding': 'padding_token',
23
+ 'unk': 'unk_token',
24
+ 'bos': 'bos_token',
25
+ 'sep': 'sep_token',
26
+ 'cls': 'cls_token',
27
+ 'mask': 'mask_token'
28
+ }
29
+ }
30
 
31
  model_configs = [
32
  {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
 
33
  ]
34
 
35
  models = {}
36
+
37
  def load_model(model_config):
38
+ model_name = model_config['name']
39
+ if model_name not in models:
40
  try:
41
  model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
42
+ models[model_name] = model
43
+ global_data['models'] = models
44
  return model
45
  except Exception as e:
46
+ print(f"Error loading model {model_name}: {e}")
47
+ models[model_name] = None
48
  return None
49
 
50
  for config in model_configs:
51
+ load_model(config)
 
52
 
53
 
54
  class ChatRequest(BaseModel):
 
75
  response = model(inputs)
76
  return remove_duplicates(response['choices'][0]['text'])
77
  except Exception as e:
78
+ print(f"Error generating response: {e}")
79
  return f"Error: {e}"
80
 
81
+ app = FastAPI()
82
+ origins = ["*"]
83
+ app.add_middleware(
84
+ CORSMiddleware,
85
+ allow_origins=origins,
86
+ allow_credentials=True,
87
+ allow_methods=["*"],
88
+ allow_headers=["*"],
89
+ )
90
 
91
  @app.post("/generate")
92
  async def generate(request: ChatRequest):
93
  inputs = normalize_input(request.message)
94
  with ThreadPoolExecutor() as executor:
95
+ futures = [executor.submit(generate_model_response, model, inputs) for model in models.values()]
 
 
 
96
  responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
97
 
98
  unique_responses = {}
 
106
 
107
  return {"response": formatted_response}
108
 
109
+ async def process_message(message, history):
110
+ try:
111
+ port = int(os.environ.get("PORT", 7860))
112
+ response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
113
+ formatted_response = response["response"]
114
+ history.append((message, formatted_response))
115
+ return history, ""
116
+ except requests.exceptions.RequestException as e:
117
+ return history, f"Error communicating with the backend: {e}"
118
+
119
+ iface = gr.Interface(
120
+ fn=process_message,
121
+ inputs=[
122
+ gr.Textbox(lines=2, placeholder="Enter your message here..."),
123
+ gr.State([])
124
+ ],
125
+ outputs=[gr.Chatbot(), gr.Textbox(visible=False)],
126
+ title="Multi-Model LLM API",
127
+ description="Enter a message and get responses from multiple LLMs.",
128
+ )
129
 
130
  if __name__ == "__main__":
131
  port = int(os.environ.get("PORT", 7860))
132
+ uvicorn.run(app, host="0.0.0.0", port=port)
133
+ iface.launch(server_port=7860)