# Import necessary libraries import torch from unsloth import FastLanguageModel import gradio as gr from transformers import TextStreamer # Load the model and tokenizer model_name = "BidhanAcharya/FineTunedQWENoncoding" # Replace with your actual model path max_seq_length = 512 # Example, adjust according to your model # Check if a GPU is available, otherwise fall back to CPU device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if torch.cuda.is_available() else torch.float32 load_in_4bit = torch.cuda.is_available() # Use 4-bit precision if a GPU is present, otherwise use standard precision # Load the model and tokenizer with the FastLanguageModel method model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_name, max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit ) # Set the model to inference mode FastLanguageModel.for_inference(model) # Move the model to the appropriate device (GPU/CPU) model = model.to(device) # Define the Alpaca prompt format alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}" # Gradio function for performing inference def generate_response(instruction, input_data): # Handle case where input data is empty if input_data.strip() == "": input_data = "No additional input provided." # Format the prompt using the instruction and input data inputs = tokenizer( [ alpaca_prompt.format( instruction, # user-provided instruction input_data, # optional user input data "" # output (leave blank for generation) ) ], return_tensors="pt" ) # Move input tensors to the correct device (GPU/CPU) inputs = inputs.to(device) # Generate tokens with the model generated_tokens = model.generate(**inputs, max_new_tokens=500) # Decode the generated tokens into text generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True) return generated_text # Gradio Interface with gr.Blocks() as demo: gr.Markdown("# FastLanguageModel Inference App") instruction_input = gr.Textbox(label="Instruction", placeholder="Enter your instruction here") input_data_input = gr.Textbox(label="Input Data (Optional)", placeholder="Enter your input data here (optional)") output_text = gr.Textbox(label="Generated Response") generate_button = gr.Button("Generate Response") # Connect the Gradio button click event to the response generation function generate_button.click( fn=generate_response, inputs=[instruction_input, input_data_input], outputs=[output_text] ) # Launch the Gradio app demo.launch()