khurrameycon commited on
Commit
9e09549
·
verified ·
1 Parent(s): 481c951

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -1,6 +1,8 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 
 
4
 
5
  # Define the input schema
6
  class ModelInput(BaseModel):
@@ -10,12 +12,25 @@ class ModelInput(BaseModel):
10
  # Initialize FastAPI app
11
  app = FastAPI()
12
 
13
- # Load your model and tokenizer
14
- model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
15
- # "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 
16
 
17
- tokenizer = AutoTokenizer.from_pretrained(model_path)
18
- model = AutoModelForCausalLM.from_pretrained(model_path)
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # Initialize the pipeline
21
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -24,17 +39,13 @@ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
24
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
25
  """Generate a response from the model based on an instruction."""
26
  try:
27
- # Format the input as chat messages if necessary
28
- messages = [{"role": "user", "content": instruction}]
29
- input_text = tokenizer.apply_chat_template(
30
- messages, tokenize=False, add_generation_prompt=True
31
- )
32
  # Tokenize and generate the output
33
- inputs = tokenizer.encode(input_text, return_tensors="pt")
 
34
  outputs = model.generate(
35
- inputs,
36
  max_new_tokens=max_new_tokens,
37
- temperature=0.2,
38
  top_p=0.9,
39
  do_sample=True,
40
  )
@@ -58,4 +69,4 @@ def generate_text(input: ModelInput):
58
 
59
  @app.get("/")
60
  def root():
61
- return {"message": "Welcome to the Hugging Face Model API!"}
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
+ from safetensors.torch import load_file
5
+ import torch
6
 
7
  # Define the input schema
8
  class ModelInput(BaseModel):
 
12
  # Initialize FastAPI app
13
  app = FastAPI()
14
 
15
+ # Load the base model and tokenizer
16
+ base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct" # Base model
17
+ adapter_weights_path = "https://huggingface.co/khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs/resolve/main/adapter_model.safetensors"
18
+ # Path to the adapter weights
19
 
20
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path)
21
+ model = AutoModelForCausalLM.from_pretrained(base_model_path)
22
+
23
+ # Load the adapter weights
24
+ def load_adapter_weights(model, adapter_weights_path):
25
+ adapter_weights = load_file(adapter_weights_path)
26
+ model.load_state_dict(adapter_weights, strict=False) # Apply the weights
27
+ return model
28
+
29
+ # Apply adapter weights to the model
30
+ model = load_adapter_weights(model, adapter_weights_path)
31
+
32
+ # Ensure the model is in evaluation mode
33
+ model.eval()
34
 
35
  # Initialize the pipeline
36
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
39
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
40
  """Generate a response from the model based on an instruction."""
41
  try:
 
 
 
 
 
42
  # Tokenize and generate the output
43
+ inputs = tokenizer(instruction, return_tensors="pt")
44
+ inputs = {key: value.to(model.device) for key, value in inputs.items()} # Move tensors to the model's device
45
  outputs = model.generate(
46
+ **inputs,
47
  max_new_tokens=max_new_tokens,
48
+ temperature=0.7,
49
  top_p=0.9,
50
  do_sample=True,
51
  )
 
69
 
70
  @app.get("/")
71
  def root():
72
+ return {"message": "Welcome to the Hugging Face Model API with Adapter Support!"}