Spaces:
Runtime error
Runtime error
| # Import necessary libraries | |
| import torch | |
| from unsloth import FastLanguageModel | |
| import gradio as gr | |
| from transformers import TextStreamer | |
| # Load the model and tokenizer | |
| model_name = "BidhanAcharya/FineTunedQWENoncoding" # Replace with your actual model path | |
| max_seq_length = 512 # Example, adjust according to your model | |
| # Check if a GPU is available, otherwise fall back to CPU | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| load_in_4bit = torch.cuda.is_available() # Use 4-bit precision if a GPU is present, otherwise use standard precision | |
| # Load the model and tokenizer with the FastLanguageModel method | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=model_name, | |
| max_seq_length=max_seq_length, | |
| dtype=dtype, | |
| load_in_4bit=load_in_4bit | |
| ) | |
| # Set the model to inference mode | |
| FastLanguageModel.for_inference(model) | |
| # Move the model to the appropriate device (GPU/CPU) | |
| model = model.to(device) | |
| # Define the Alpaca prompt format | |
| alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}" | |
| # Gradio function for performing inference | |
| def generate_response(instruction, input_data): | |
| # Handle case where input data is empty | |
| if input_data.strip() == "": | |
| input_data = "No additional input provided." | |
| # Format the prompt using the instruction and input data | |
| inputs = tokenizer( | |
| [ | |
| alpaca_prompt.format( | |
| instruction, # user-provided instruction | |
| input_data, # optional user input data | |
| "" # output (leave blank for generation) | |
| ) | |
| ], | |
| return_tensors="pt" | |
| ) | |
| # Move input tensors to the correct device (GPU/CPU) | |
| inputs = inputs.to(device) | |
| # Generate tokens with the model | |
| generated_tokens = model.generate(**inputs, max_new_tokens=500) | |
| # Decode the generated tokens into text | |
| generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True) | |
| return generated_text | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# FastLanguageModel Inference App") | |
| instruction_input = gr.Textbox(label="Instruction", placeholder="Enter your instruction here") | |
| input_data_input = gr.Textbox(label="Input Data (Optional)", placeholder="Enter your input data here (optional)") | |
| output_text = gr.Textbox(label="Generated Response") | |
| generate_button = gr.Button("Generate Response") | |
| # Connect the Gradio button click event to the response generation function | |
| generate_button.click( | |
| fn=generate_response, | |
| inputs=[instruction_input, input_data_input], | |
| outputs=[output_text] | |
| ) | |
| # Launch the Gradio app | |
| demo.launch() | |