BidhanAcharya commited on
Commit
8644f7f
·
verified ·
1 Parent(s): 6dd051a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -1,13 +1,17 @@
1
- import gradio as gr
 
2
  from unsloth import FastLanguageModel
 
3
  from transformers import TextStreamer
4
- import torch
5
 
6
  # Load the model and tokenizer
7
  model_name = "BidhanAcharya/FineTunedQWENoncoding" # Replace with your actual model path
8
  max_seq_length = 512 # Example, adjust according to your model
9
- dtype = torch.float16 # Adjust if necessary (use torch.float32 for CPU)
10
- load_in_4bit = True # If needed, set to False if not using 4-bit precision
 
 
 
11
 
12
  # Load the model and tokenizer with the FastLanguageModel method
13
  model, tokenizer = FastLanguageModel.from_pretrained(
@@ -20,6 +24,9 @@ model, tokenizer = FastLanguageModel.from_pretrained(
20
  # Set the model to inference mode
21
  FastLanguageModel.for_inference(model)
22
 
 
 
 
23
  # Define the Alpaca prompt format
24
  alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}"
25
 
@@ -42,7 +49,6 @@ def generate_response(instruction, input_data):
42
  )
43
 
44
  # Move input tensors to the correct device (GPU/CPU)
45
- device = "cuda" if torch.cuda.is_available() else "cpu"
46
  inputs = inputs.to(device)
47
 
48
  # Generate tokens with the model
 
1
+ # Import necessary libraries
2
+ import torch
3
  from unsloth import FastLanguageModel
4
+ import gradio as gr
5
  from transformers import TextStreamer
 
6
 
7
  # Load the model and tokenizer
8
  model_name = "BidhanAcharya/FineTunedQWENoncoding" # Replace with your actual model path
9
  max_seq_length = 512 # Example, adjust according to your model
10
+
11
+ # Check if a GPU is available, otherwise fall back to CPU
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
14
+ load_in_4bit = torch.cuda.is_available() # Use 4-bit precision if a GPU is present, otherwise use standard precision
15
 
16
  # Load the model and tokenizer with the FastLanguageModel method
17
  model, tokenizer = FastLanguageModel.from_pretrained(
 
24
  # Set the model to inference mode
25
  FastLanguageModel.for_inference(model)
26
 
27
+ # Move the model to the appropriate device (GPU/CPU)
28
+ model = model.to(device)
29
+
30
  # Define the Alpaca prompt format
31
  alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}"
32
 
 
49
  )
50
 
51
  # Move input tensors to the correct device (GPU/CPU)
 
52
  inputs = inputs.to(device)
53
 
54
  # Generate tokens with the model