khurrameycon commited on
Commit
9196e30
·
verified ·
1 Parent(s): 0ef7e23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -22
app.py CHANGED
@@ -1,7 +1,6 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
- from peft import PeftModel
5
 
6
  class ModelInput(BaseModel):
7
  prompt: str
@@ -9,33 +8,34 @@ class ModelInput(BaseModel):
9
 
10
  app = FastAPI()
11
 
12
- # Load base model and tokenizer
13
- base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
14
- adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
15
 
16
- # Initialize tokenizer from base model
17
- tokenizer = AutoTokenizer.from_pretrained(base_model_path)
 
 
 
 
 
 
 
 
18
 
19
- # Load base model
20
- base_model = AutoModelForCausalLM.from_pretrained(
21
- base_model_path,
22
- device_map="auto",
23
- trust_remote_code=True
24
- )
25
-
26
- # Load and merge adapter weights
27
- model = PeftModel.from_pretrained(base_model, adapter_path)
28
- model = model.merge_and_unload()
29
-
30
- # Initialize pipeline
31
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
32
 
33
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
 
34
  try:
 
35
  messages = [{"role": "user", "content": instruction}]
36
  input_text = tokenizer.apply_chat_template(
37
  messages, tokenize=False, add_generation_prompt=True
38
  )
 
 
39
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
40
  outputs = model.generate(
41
  inputs,
@@ -44,13 +44,17 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
44
  top_p=0.9,
45
  do_sample=True,
46
  )
 
 
47
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
  return response
 
49
  except Exception as e:
50
  raise ValueError(f"Error generating response: {e}")
51
 
52
  @app.post("/generate")
53
- def generate_text(input: ModelInput):
 
54
  try:
55
  response = generate_response(
56
  model=model,
@@ -59,9 +63,10 @@ def generate_text(input: ModelInput):
59
  max_new_tokens=input.max_new_tokens
60
  )
61
  return {"generated_text": response}
 
62
  except Exception as e:
63
  raise HTTPException(status_code=500, detail=str(e))
64
 
65
  @app.get("/")
66
- def root():
67
- return {"message": "Welcome to the Hugging Face Model API!"}
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 
4
 
5
  class ModelInput(BaseModel):
6
  prompt: str
 
8
 
9
  app = FastAPI()
10
 
11
+ # Since we're getting config errors with PEFT, let's load the fine-tuned model directly
12
+ model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 
13
 
14
+ try:
15
+ # Load the model and tokenizer directly from your fine-tuned version
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_path,
18
+ trust_remote_code=True,
19
+ device_map="auto"
20
+ )
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
23
+ print("Model loaded successfully!")
24
 
25
+ except Exception as e:
26
+ print(f"Error loading model: {e}")
27
+ raise
 
 
 
 
 
 
 
 
 
 
28
 
29
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
30
+ """Generate a response from the model based on an instruction."""
31
  try:
32
+ # Format the input
33
  messages = [{"role": "user", "content": instruction}]
34
  input_text = tokenizer.apply_chat_template(
35
  messages, tokenize=False, add_generation_prompt=True
36
  )
37
+
38
+ # Generate
39
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
40
  outputs = model.generate(
41
  inputs,
 
44
  top_p=0.9,
45
  do_sample=True,
46
  )
47
+
48
+ # Decode
49
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
  return response
51
+
52
  except Exception as e:
53
  raise ValueError(f"Error generating response: {e}")
54
 
55
  @app.post("/generate")
56
+ async def generate_text(input: ModelInput):
57
+ """API endpoint to generate text."""
58
  try:
59
  response = generate_response(
60
  model=model,
 
63
  max_new_tokens=input.max_new_tokens
64
  )
65
  return {"generated_text": response}
66
+
67
  except Exception as e:
68
  raise HTTPException(status_code=500, detail=str(e))
69
 
70
  @app.get("/")
71
+ async def root():
72
+ return {"message": "Welcome to the Model API!"}