khurrameycon commited on
Commit
1749217
·
verified ·
1 Parent(s): 5eb8313

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -11,34 +11,46 @@ class ModelInput(BaseModel):
11
  app = FastAPI()
12
 
13
  # Load your model and tokenizer
14
- model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs" # Update with your model directory
15
  tokenizer = AutoTokenizer.from_pretrained(model_path)
16
  model = AutoModelForCausalLM.from_pretrained(model_path)
17
 
18
  # Initialize the pipeline
19
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
 
21
- @app.post("/generate")
22
-
23
- def generate_response(model, tokenizer, instruction):
24
  """Generate a response from the model based on an instruction."""
25
- messages = [{"role": "user", "content": instruction}]
26
- input_text = tokenizer.apply_chat_template(
27
- messages, tokenize=False, add_generation_prompt=True
28
- )
29
- inputs = tokenizer.encode(input_text, return_tensors="pt")
30
- outputs = model.generate(
31
- inputs, max_new_tokens=128, temperature=0.2, top_p=0.9, do_sample=True
32
- )
33
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
- return response
35
-
36
-
 
 
 
 
 
 
 
 
37
 
 
38
  def generate_text(input: ModelInput):
 
39
  try:
40
- response = generate_response(model, tokenizer, ModelInput)
41
- return response}
 
 
 
42
  except Exception as e:
43
  raise HTTPException(status_code=500, detail=str(e))
44
 
 
11
  app = FastAPI()
12
 
13
  # Load your model and tokenizer
14
+ model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
15
  tokenizer = AutoTokenizer.from_pretrained(model_path)
16
  model = AutoModelForCausalLM.from_pretrained(model_path)
17
 
18
  # Initialize the pipeline
19
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
 
21
+ # Helper function to generate a response
22
+ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
 
23
  """Generate a response from the model based on an instruction."""
24
+ try:
25
+ # Format the input as chat messages if necessary
26
+ messages = [{"role": "user", "content": instruction}]
27
+ input_text = tokenizer.apply_chat_template(
28
+ messages, tokenize=False, add_generation_prompt=True
29
+ )
30
+ # Tokenize and generate the output
31
+ inputs = tokenizer.encode(input_text, return_tensors="pt")
32
+ outputs = model.generate(
33
+ inputs,
34
+ max_new_tokens=max_new_tokens,
35
+ temperature=0.2,
36
+ top_p=0.9,
37
+ do_sample=True,
38
+ )
39
+ # Decode the output
40
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
+ return response
42
+ except Exception as e:
43
+ raise ValueError(f"Error generating response: {e}")
44
 
45
+ @app.post("/generate")
46
  def generate_text(input: ModelInput):
47
+ """API endpoint to generate text."""
48
  try:
49
+ # Call the helper function
50
+ response = generate_response(
51
+ model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens
52
+ )
53
+ return {"generated_text": response}
54
  except Exception as e:
55
  raise HTTPException(status_code=500, detail=str(e))
56