api-smollm135m

Sleeping

App Files Files Community

khurrameycon commited on Jan 1

Commit

1749217

verified ·

1 Parent(s): 5eb8313

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -11,34 +11,46 @@ class ModelInput(BaseModel):
 app = FastAPI()
 # Load your model and tokenizer
-model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"  # Update with your model directory
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path)
 # Initialize the pipeline
 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
-@app.post("/generate")
-def generate_response(model, tokenizer, instruction):
     """Generate a response from the model based on an instruction."""
-    messages = [{"role": "user", "content": instruction}]
-    input_text = tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-    inputs = tokenizer.encode(input_text, return_tensors="pt")
-    outputs = model.generate(
-        inputs, max_new_tokens=128, temperature=0.2, top_p=0.9, do_sample=True
-    )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
 def generate_text(input: ModelInput):
     try:
-        response = generate_response(model, tokenizer, ModelInput)
-        return response}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 app = FastAPI()
 # Load your model and tokenizer
+model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path)
 # Initialize the pipeline
 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# Helper function to generate a response
+def generate_response(model, tokenizer, instruction, max_new_tokens=128):
     """Generate a response from the model based on an instruction."""
+    try:
+        # Format the input as chat messages if necessary
+        messages = [{"role": "user", "content": instruction}]
+        input_text = tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        # Tokenize and generate the output
+        inputs = tokenizer.encode(input_text, return_tensors="pt")
+        outputs = model.generate(
+            inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=0.2,
+            top_p=0.9,
+            do_sample=True,
+        )
+        # Decode the output
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response
+    except Exception as e:
+        raise ValueError(f"Error generating response: {e}")
+@app.post("/generate")
 def generate_text(input: ModelInput):
+    """API endpoint to generate text."""
     try:
+        # Call the helper function
+        response = generate_response(
+            model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens
+        )
+        return {"generated_text": response}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))