om4r932 commited on
Commit
343c758
·
1 Parent(s): 9ae9986

First upload

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +91 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.3
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from fastapi import FastAPI, HTTPException
3
+ from dotenv import load_dotenv
4
+ import os
5
+ import re
6
+ import requests
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+ from typing import List, Dict, Any, Optional, Tuple
10
+
11
+ load_dotenv()
12
+
13
+ app = FastAPI()
14
+ ranked_models = [
15
+ "llama-3.3-70b-versatile",
16
+ "llama3-70b-8192",
17
+ "meta-llama/llama-4-maverick-17b-128e-instruct",
18
+ "meta-llama/llama-4-scout-17b-16e-instruct",
19
+ "mistral-saba-24b",
20
+ "gemma2-9b-it",
21
+ "llama-3.1-8b-instant",
22
+ "llama3-8b-8192"
23
+ ]
24
+
25
+ api_keys = []
26
+
27
+ for k,v in os.environ.items():
28
+ if re.match(r'^GROQ_\d+$', k):
29
+ api_keys.append(v)
30
+
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_credentials=True,
34
+ allow_headers=["*"],
35
+ allow_methods=["GET", "POST"],
36
+ allow_origins=["*"]
37
+ )
38
+
39
+ class ChatRequest(BaseModel):
40
+ models: Optional[List[Any]] = []
41
+ query: str
42
+
43
+ class ChatResponse(BaseModel):
44
+ output: str
45
+
46
+ @app.get("/")
47
+ def main_page():
48
+ return {"status": "ok"}
49
+
50
+ @app.post("/chat", response_model=ChatResponse)
51
+ def ask_groq_llm(req: ChatRequest):
52
+ models = req.models
53
+ query = req.query
54
+ looping = True
55
+ if models == []:
56
+ while looping:
57
+ for model in ranked_models:
58
+ for key in api_keys:
59
+ resp = requests.post("https://api.groq.com/openai/v1/chat/completions", verify=False, headers={"Content-Type": "application/json", "Authorization": f"Bearer {key}"}, data=json.dumps({"model": model, "messages": [{"role": "user", "content": query}]}))
60
+ if resp.status_code == 200:
61
+ respJson = resp.json()
62
+ print("Asked to", model, "with the key ID", str(api_keys.index(key)), ":", query)
63
+ return ChatResponse(output=respJson["choices"][0]["message"]["content"])
64
+ print(resp.status_code, resp.text)
65
+ looping = False
66
+ return ChatResponse(output="ERROR !")
67
+ elif len(models) == 1:
68
+ while looping:
69
+ for key in api_keys:
70
+ resp = requests.post("https://api.groq.com/openai/v1/chat/completions", verify=False, headers={"Content-Type": "application/json", "Authorization": f"Bearer {key}"}, data=json.dumps({"model": models[0], "messages": [{"role": "user", "content": query}]}))
71
+ if resp.status_code == 200:
72
+ respJson = resp.json()
73
+ print("Asked to", model, "with the key ID", str(api_keys.index(key)), ":", query)
74
+ return ChatResponse(output=respJson["choices"][0]["message"]["content"])
75
+ print(resp.status_code, resp.text)
76
+ looping = False
77
+ return ChatResponse(output="ERROR !")
78
+ else:
79
+ while looping:
80
+ order = {val: ind for ind, val in enumerate(ranked_models)}
81
+ sorted_models = sorted(models, key=lambda x: order.get(x, float('inf')))
82
+ for model in sorted_models:
83
+ for key in api_keys:
84
+ resp = requests.post("https://api.groq.com/openai/v1/chat/completions", verify=False, headers={"Content-Type": "application/json", "Authorization": f"Bearer {key}"}, data=json.dumps({"model": model, "messages": [{"role": "user", "content": query}]}))
85
+ if resp.status_code == 200:
86
+ respJson = resp.json()
87
+ print("Asked to", model, "with the key ID", str(api_keys.index(key)), ":", query)
88
+ return ChatResponse(output=respJson["choices"][0]["message"]["content"])
89
+ print(resp.status_code, resp.text)
90
+ looping = False
91
+ return ChatResponse(output="ERROR !")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-dotenv
4
+ requests