import os from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM from langchain.llms import HuggingFacePipeline from langchain import PromptTemplate, LLMChain # — Model setup (small enough to CPU-serve in a Space) — MODEL_ID = "bigcode/starcoder2-1b" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained(MODEL_ID) # wrap in a HF pipeline and LangChain LLM pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=64, temperature=0.2, top_p=0.95, do_sample=False, ) llm = HuggingFacePipeline(pipeline=pipe) # define a simple prompt → chain prompt = PromptTemplate( input_variables=["description"], template=( "### Convert English description to an Emmet abbreviation\n" "Description: {description}\n" "Emmet:" ), ) chain = LLMChain(llm=llm, prompt=prompt) # FastAPI app app = FastAPI() class Req(BaseModel): description: str class Res(BaseModel): emmet: str @app.post("/generate-emmet", response_model=Res) async def generate_emmet(req: Req): raw = chain.run(req.description) # take just the first line after the prompt emmet = raw.strip().splitlines()[0] return {"emmet": emmet}