Spaces:
Running
Running
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import ( | |
pipeline, | |
AutoTokenizer, | |
AutoModelForCausalLM, | |
) | |
from langchain_huggingface import HuggingFacePipeline | |
from langchain_core.prompts import PromptTemplate | |
from langchain_core.runnables import RunnableSequence | |
# β Model setup β | |
MODEL_ID = "bigcode/starcoder2-3b" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
# Explicitly set pad_token_id to eos_token_id | |
tokenizer.pad_token_id = tokenizer.eos_token_id | |
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True) | |
# β Pipeline setup (remove unused parameters, set device explicitly) β | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device=-1, # Explicitly use CPU; change to 0 or "cuda" if GPU available | |
max_new_tokens=64, | |
do_sample=False, | |
) | |
llm = HuggingFacePipeline(pipeline=pipe) | |
# β Prompt & chain (using RunnableSequence) β | |
prompt = PromptTemplate( | |
input_variables=["description"], | |
template=( | |
"### Convert English description to an Emmet abbreviation\n" | |
"Description: {description}\n" | |
"Emmet:" | |
), | |
) | |
chain = RunnableSequence(prompt | llm) | |
# β FastAPI app β | |
app = FastAPI() | |
class Req(BaseModel): | |
description: str | |
class Res(BaseModel): | |
emmet: str | |
async def root(): | |
return {"message": "Welcome to the Emmet Generator API. Use POST /generate-emmet."} | |
async def generate_emmet(req: Req): | |
raw = chain.invoke(req.description) | |
emmet = raw.strip().splitlines()[0] | |
return {"emmet": emmet} |