Spaces:

BidhanAcharya
/

fine_tuned_model

Runtime error

App Files Files Community

fine_tuned_model / app.py

BidhanAcharya

Update app.py

8644f7f verified 6 months ago

raw

history blame contribute delete

2.77 kB

	# Import necessary libraries
	import torch
	from unsloth import FastLanguageModel
	import gradio as gr
	from transformers import TextStreamer

	# Load the model and tokenizer
	model_name = "BidhanAcharya/FineTunedQWENoncoding" # Replace with your actual model path
	max_seq_length = 512 # Example, adjust according to your model

	# Check if a GPU is available, otherwise fall back to CPU
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if torch.cuda.is_available() else torch.float32
	load_in_4bit = torch.cuda.is_available() # Use 4-bit precision if a GPU is present, otherwise use standard precision

	# Load the model and tokenizer with the FastLanguageModel method
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_name,
	max_seq_length=max_seq_length,
	dtype=dtype,
	load_in_4bit=load_in_4bit
	)

	# Set the model to inference mode
	FastLanguageModel.for_inference(model)

	# Move the model to the appropriate device (GPU/CPU)
	model = model.to(device)

	# Define the Alpaca prompt format
	alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}"

	# Gradio function for performing inference
	def generate_response(instruction, input_data):
	# Handle case where input data is empty
	if input_data.strip() == "":
	input_data = "No additional input provided."

	# Format the prompt using the instruction and input data
	inputs = tokenizer(
	[
	alpaca_prompt.format(
	instruction, # user-provided instruction
	input_data, # optional user input data
	"" # output (leave blank for generation)
	)
	],
	return_tensors="pt"
	)

	# Move input tensors to the correct device (GPU/CPU)
	inputs = inputs.to(device)

	# Generate tokens with the model
	generated_tokens = model.generate(**inputs, max_new_tokens=500)

	# Decode the generated tokens into text
	generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

	return generated_text

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# FastLanguageModel Inference App")

	instruction_input = gr.Textbox(label="Instruction", placeholder="Enter your instruction here")
	input_data_input = gr.Textbox(label="Input Data (Optional)", placeholder="Enter your input data here (optional)")
	output_text = gr.Textbox(label="Generated Response")

	generate_button = gr.Button("Generate Response")

	# Connect the Gradio button click event to the response generation function
	generate_button.click(
	fn=generate_response,
	inputs=[instruction_input, input_data_input],
	outputs=[output_text]
	)

	# Launch the Gradio app
	demo.launch()