Spaces:

Company270
/

LLM

Paused

LLM

File size: 2,869 Bytes

35d085e
 
 
 
 
 
 
 
f441fbb
 
35d085e
f441fbb
 
 
35d085e
d98a703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c95fdfc
 
f441fbb
c03dd90
 
f441fbb
 
c03dd90
 
35d085e
 
f441fbb
 
 
 
 
 
 
35d085e
c03dd90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35d085e
 
 
f441fbb
 
 
 
 
 
 
 
 
 
 
35d085e
 
 
 
f441fbb
 
35d085e
 
 
f441fbb
 
 
 
 
 
 
9ac7986
f441fbb
 
 
 
 
35d085e

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline

# API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
# headers = {"Authorization": f"Bearer {key}"}

# def query(payload):
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()

def LLM(llm_name, length):
    print(llm_name)
    tokenizer = AutoTokenizer.from_pretrained(llm_name)
    model = AutoModelForCausalLM.from_pretrained(llm_name,
                                                 trust_remote_code=True, 
                                                 device_map="auto",
                                                 load_in_8bit=True)
    pipe = pipeline("text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_length=length,
                    do_sample=True,
                    top_p=0.95,
                    repetition_penalty=1.2,
                   )
    return pipe


pipe = LLM("WizardLM/WizardCoder-Python-7B-V1.0",4000)
# tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# base_model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# Mistral 7B
# mistral_llm = LLM("mistralai/Mistral-7B-v0.1",30000)
mistral_llm = pipe

# WizardCoder 13B
# wizard_llm = LLM("WizardLM/WizardCoder-Python-13B-V1.0",8000)
wizard_llm = pipe
# hf_llm = HuggingFacePipeline(pipeline=pipe)

def ask_model(model, prompt):
    if(model == 'mistral'):
        return mistral_llm(prompt)
    if(model == 'wizard'):
        return wizard_llm(prompt)






key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/LLM/openapi.json", docs_url="/api/v1/LLM/docs")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
    allow_credentials=True,
)


@app.get("/")
def root():
    return {"message": "R&D LLM API"}
    
# @app.get("/get")
# def get():
#     result = pipe("name 5 programming languages",do_sample=False)
#     print(result)
#     return {"message": result}








@app.post("/ask_llm")
async def ask_llm_endpoint(model:str, prompt: str):
    result = ask_model(model,prompt)
    return {"result": result}









# APIs

# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
#     result = query(prompt)
#     return {"result": result}
    
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)

@app.post("/ask_GPT")
def ask_GPT_endpoint(prompt: str):
    result = llm(prompt)
    return {"result": result}