khurrameycon commited on
Commit
5eb8313
·
verified ·
1 Parent(s): 8fed3a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -19,14 +19,26 @@ model = AutoModelForCausalLM.from_pretrained(model_path)
19
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
 
21
  @app.post("/generate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def generate_text(input: ModelInput):
23
  try:
24
- result = generator(
25
- input.prompt,
26
- max_new_tokens=input.max_new_tokens,
27
- return_full_text=False,
28
- )
29
- return {"generated_text": result[0]["generated_text"]}
30
  except Exception as e:
31
  raise HTTPException(status_code=500, detail=str(e))
32
 
 
19
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
 
21
  @app.post("/generate")
22
+
23
+ def generate_response(model, tokenizer, instruction):
24
+ """Generate a response from the model based on an instruction."""
25
+ messages = [{"role": "user", "content": instruction}]
26
+ input_text = tokenizer.apply_chat_template(
27
+ messages, tokenize=False, add_generation_prompt=True
28
+ )
29
+ inputs = tokenizer.encode(input_text, return_tensors="pt")
30
+ outputs = model.generate(
31
+ inputs, max_new_tokens=128, temperature=0.2, top_p=0.9, do_sample=True
32
+ )
33
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
+ return response
35
+
36
+
37
+
38
  def generate_text(input: ModelInput):
39
  try:
40
+ response = generate_response(model, tokenizer, ModelInput)
41
+ return response}
 
 
 
 
42
  except Exception as e:
43
  raise HTTPException(status_code=500, detail=str(e))
44