Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
import torch | |
from unsloth import FastLanguageModel | |
import gradio as gr | |
from transformers import TextStreamer | |
# Load the model and tokenizer | |
model_name = "BidhanAcharya/FineTunedQWENoncoding" # Replace with your actual model path | |
max_seq_length = 512 # Example, adjust according to your model | |
# Check if a GPU is available, otherwise fall back to CPU | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
load_in_4bit = torch.cuda.is_available() # Use 4-bit precision if a GPU is present, otherwise use standard precision | |
# Load the model and tokenizer with the FastLanguageModel method | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name=model_name, | |
max_seq_length=max_seq_length, | |
dtype=dtype, | |
load_in_4bit=load_in_4bit | |
) | |
# Set the model to inference mode | |
FastLanguageModel.for_inference(model) | |
# Move the model to the appropriate device (GPU/CPU) | |
model = model.to(device) | |
# Define the Alpaca prompt format | |
alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}" | |
# Gradio function for performing inference | |
def generate_response(instruction, input_data): | |
# Handle case where input data is empty | |
if input_data.strip() == "": | |
input_data = "No additional input provided." | |
# Format the prompt using the instruction and input data | |
inputs = tokenizer( | |
[ | |
alpaca_prompt.format( | |
instruction, # user-provided instruction | |
input_data, # optional user input data | |
"" # output (leave blank for generation) | |
) | |
], | |
return_tensors="pt" | |
) | |
# Move input tensors to the correct device (GPU/CPU) | |
inputs = inputs.to(device) | |
# Generate tokens with the model | |
generated_tokens = model.generate(**inputs, max_new_tokens=500) | |
# Decode the generated tokens into text | |
generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True) | |
return generated_text | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# FastLanguageModel Inference App") | |
instruction_input = gr.Textbox(label="Instruction", placeholder="Enter your instruction here") | |
input_data_input = gr.Textbox(label="Input Data (Optional)", placeholder="Enter your input data here (optional)") | |
output_text = gr.Textbox(label="Generated Response") | |
generate_button = gr.Button("Generate Response") | |
# Connect the Gradio button click event to the response generation function | |
generate_button.click( | |
fn=generate_response, | |
inputs=[instruction_input, input_data_input], | |
outputs=[output_text] | |
) | |
# Launch the Gradio app | |
demo.launch() | |