Ll2

Running

App Files Files Community

Makhinur commited on Feb 9

Commit

99c818b

verified ·

1 Parent(s): c250541

Create main.py

Browse files

Files changed (1) hide show

main.py +85 -0

main.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+from typing import List, Tuple
+from fastapi import FastAPI, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from text_generation import Client
+# Ensure the HF_TOKEN environment variable is set
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if HF_TOKEN is None:
+    raise ValueError("Please set the HF_TOKEN environment variable.")
+# Model and API setup
+model_id = 'codellama/CodeLlama-34b-Instruct-hf'
+API_URL = "https://api-inference.huggingface.co/models/" + model_id
+client = Client(
+    API_URL,
+    headers={"Authorization": f"Bearer {HF_TOKEN}"},
+)
+EOS_STRING = "</s>"
+EOT_STRING = "<EOT>"
+app = FastAPI()
+# Allow CORS for your frontend application
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Change this to your frontend's URL in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Pydantic model for request body
+class ChatRequest(BaseModel):
+    prompt: str
+    history: List[Tuple[str, str]]
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful, respectful and honest assistant with a deep knowledge of code and software design. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
+"""
+def get_prompt(message: str, chat_history: List[Tuple[str, str]],
+               system_prompt: str) -> str:
+    texts = [f'<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
+    do_strip = False
+    for user_input, response in chat_history:
+        user_input = user_input.strip() if do_strip else user_input
+        do_strip = True
+        texts.append(f'{user_input} [/INST] {response.strip()} </s><s>[INST] ')
+    message = message.strip() if do_strip else message
+    texts.append(f'{message} [/INST]')
+    return ''.join(texts)
+@app.post("/generate/")
+async def generate_response(prompt: str = Form(...), history: str = Form(...)):
+    try:
+        chat_history = eval(history)  # Convert history string back to list
+        system_prompt = DEFAULT_SYSTEM_PROMPT
+        message = prompt
+        prompt_text = get_prompt(message, chat_history, system_prompt)
+        generate_kwargs = dict(
+            max_new_tokens=1024,
+            do_sample=True,
+            top_p=0.9,
+            top_k=50,
+            temperature=0.1,
+        )
+        stream = client.generate_stream(prompt_text, **generate_kwargs)
+        output = ""
+        for response in stream:
+            if any([end_token in response.token.text for end_token in [EOS_STRING, EOT_STRING]]):
+                break
+            else:
+                output += response.token.text
+        return {"response": output}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))