Spaces:

BidhanAcharya
/

fine_tuned_model

Runtime error

BidhanAcharya commited on Sep 26, 2024

Commit

8644f7f

verified ·

1 Parent(s): 6dd051a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,13 +1,17 @@
-import gradio as gr
 from unsloth import FastLanguageModel
 from transformers import TextStreamer
-import torch
 # Load the model and tokenizer
 model_name = "BidhanAcharya/FineTunedQWENoncoding"  # Replace with your actual model path
 max_seq_length = 512  # Example, adjust according to your model
-dtype = torch.float16  # Adjust if necessary (use torch.float32 for CPU)
-load_in_4bit = True  # If needed, set to False if not using 4-bit precision
 # Load the model and tokenizer with the FastLanguageModel method
 model, tokenizer = FastLanguageModel.from_pretrained(
@@ -20,6 +24,9 @@ model, tokenizer = FastLanguageModel.from_pretrained(
 # Set the model to inference mode
 FastLanguageModel.for_inference(model)
 # Define the Alpaca prompt format
 alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}"
@@ -42,7 +49,6 @@ def generate_response(instruction, input_data):
     )
     # Move input tensors to the correct device (GPU/CPU)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     inputs = inputs.to(device)
     # Generate tokens with the model

+# Import necessary libraries
+import torch
 from unsloth import FastLanguageModel
+import gradio as gr
 from transformers import TextStreamer
 # Load the model and tokenizer
 model_name = "BidhanAcharya/FineTunedQWENoncoding"  # Replace with your actual model path
 max_seq_length = 512  # Example, adjust according to your model
+# Check if a GPU is available, otherwise fall back to CPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+load_in_4bit = torch.cuda.is_available()  # Use 4-bit precision if a GPU is present, otherwise use standard precision
 # Load the model and tokenizer with the FastLanguageModel method
 model, tokenizer = FastLanguageModel.from_pretrained(
 # Set the model to inference mode
 FastLanguageModel.for_inference(model)
+# Move the model to the appropriate device (GPU/CPU)
+model = model.to(device)
 # Define the Alpaca prompt format
 alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}"
     )
     # Move input tensors to the correct device (GPU/CPU)
     inputs = inputs.to(device)
     # Generate tokens with the model