Spaces:
Running
Running
import os | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
from langchain_community.llms import HuggingFacePipeline | |
from langchain import PromptTemplate, LLMChain | |
# β Model setup (small enough to CPU-serve in a Space) β | |
MODEL_ID = "bigcode/starcoder2-3b" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
model = AutoModelForCausalLM.from_pretrained(MODEL_ID) | |
# wrap in a HF pipeline and LangChain LLM | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=64, | |
temperature=0.2, | |
top_p=0.95, | |
do_sample=False, | |
) | |
llm = HuggingFacePipeline(pipeline=pipe) | |
# define a simple prompt β chain | |
prompt = PromptTemplate( | |
input_variables=["description"], | |
template=( | |
"### Convert English description to an Emmet abbreviation\n" | |
"Description: {description}\n" | |
"Emmet:" | |
), | |
) | |
chain = LLMChain(llm=llm, prompt=prompt) | |
# FastAPI app | |
app = FastAPI() | |
class Req(BaseModel): | |
description: str | |
class Res(BaseModel): | |
emmet: str | |
async def generate_emmet(req: Req): | |
raw = chain.run(req.description) | |
# take just the first line after the prompt | |
emmet = raw.strip().splitlines()[0] | |
return {"emmet": emmet} | |