khurrameycon commited on
Commit
8dc137a
·
verified ·
1 Parent(s): be89a8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -35
app.py CHANGED
@@ -1,56 +1,48 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
- from safetensors.torch import load_file
5
- import torch
6
 
7
- # Define the input schema
8
  class ModelInput(BaseModel):
9
  prompt: str
10
- max_new_tokens: int = 50 # Optional: Defaults to 50 tokens
11
 
12
- # Initialize FastAPI app
13
  app = FastAPI()
14
 
15
- # Load the base model and tokenizer
16
- base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct" # Base model
17
- adapter_weights_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs/resolve/main/adapter_model.safetensors"
18
-
19
- # Path to the adapter weights
20
 
 
21
  tokenizer = AutoTokenizer.from_pretrained(base_model_path)
22
- model = AutoModelForCausalLM.from_pretrained(base_model_path)
23
-
24
- # Load the adapter weights
25
- def load_adapter_weights(model, adapter_weights_path):
26
- adapter_weights = load_file(adapter_weights_path)
27
- model.load_state_dict(adapter_weights, strict=False) # Apply the weights
28
- return model
29
 
30
- # Apply adapter weights to the model
31
- model = load_adapter_weights(model, adapter_weights_path)
 
 
 
 
32
 
33
- # Ensure the model is in evaluation mode
34
- model.eval()
 
35
 
36
- # Initialize the pipeline
37
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
38
 
39
- # Helper function to generate a response
40
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
41
- """Generate a response from the model based on an instruction."""
42
  try:
43
- # Tokenize and generate the output
44
- inputs = tokenizer(instruction, return_tensors="pt")
45
- inputs = {key: value.to(model.device) for key, value in inputs.items()} # Move tensors to the model's device
 
 
46
  outputs = model.generate(
47
- **inputs,
48
  max_new_tokens=max_new_tokens,
49
- temperature=0.7,
50
  top_p=0.9,
51
  do_sample=True,
52
  )
53
- # Decode the output
54
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
  return response
56
  except Exception as e:
@@ -58,11 +50,12 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
58
 
59
  @app.post("/generate")
60
  def generate_text(input: ModelInput):
61
- """API endpoint to generate text."""
62
  try:
63
- # Call the helper function
64
  response = generate_response(
65
- model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens
 
 
 
66
  )
67
  return {"generated_text": response}
68
  except Exception as e:
@@ -70,4 +63,4 @@ def generate_text(input: ModelInput):
70
 
71
  @app.get("/")
72
  def root():
73
- return {"message": "Welcome to the Hugging Face Model API with Adapter Support!"}
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, PeftModel
 
 
4
 
 
5
  class ModelInput(BaseModel):
6
  prompt: str
7
+ max_new_tokens: int = 50
8
 
 
9
  app = FastAPI()
10
 
11
+ # Load base model and tokenizer
12
+ base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
13
+ adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 
 
14
 
15
+ # Initialize tokenizer from base model
16
  tokenizer = AutoTokenizer.from_pretrained(base_model_path)
 
 
 
 
 
 
 
17
 
18
+ # Load base model
19
+ base_model = AutoModelForCausalLM.from_pretrained(
20
+ base_model_path,
21
+ device_map="auto",
22
+ trust_remote_code=True
23
+ )
24
 
25
+ # Load and merge adapter weights
26
+ model = PeftModel.from_pretrained(base_model, adapter_path)
27
+ model = model.merge_and_unload()
28
 
29
+ # Initialize pipeline
30
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
31
 
 
32
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
 
33
  try:
34
+ messages = [{"role": "user", "content": instruction}]
35
+ input_text = tokenizer.apply_chat_template(
36
+ messages, tokenize=False, add_generation_prompt=True
37
+ )
38
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
39
  outputs = model.generate(
40
+ inputs,
41
  max_new_tokens=max_new_tokens,
42
+ temperature=0.2,
43
  top_p=0.9,
44
  do_sample=True,
45
  )
 
46
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
  return response
48
  except Exception as e:
 
50
 
51
  @app.post("/generate")
52
  def generate_text(input: ModelInput):
 
53
  try:
 
54
  response = generate_response(
55
+ model=model,
56
+ tokenizer=tokenizer,
57
+ instruction=input.prompt,
58
+ max_new_tokens=input.max_new_tokens
59
  )
60
  return {"generated_text": response}
61
  except Exception as e:
 
63
 
64
  @app.get("/")
65
  def root():
66
+ return {"message": "Welcome to the Hugging Face Model API!"}