Spaces:

BidhanAcharya
/

fine_tuned_model

Runtime error

App Files Files Community

BidhanAcharya commited on Sep 26, 2024

Commit

9ff433d

verified ·

1 Parent(s): 1f77e35

Create app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gradio as gr
+from unsloth import FastLanguageModel
+from transformers import TextStreamer
+import torch
+# Load the model and tokenizer
+model_name = "BidhanAcharya/FineTunedQWENoncoding"  # Replace with your actual model path
+max_seq_length = 512  # Example, adjust according to your model
+dtype = torch.float16  # Adjust if necessary (use torch.float32 for CPU)
+load_in_4bit = True  # If needed, set to False if not using 4-bit precision
+# Load the model and tokenizer with the FastLanguageModel method
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit
+)
+# Set the model to inference mode
+FastLanguageModel.for_inference(model)
+# Define the Alpaca prompt format
+alpaca_prompt = "### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}"
+# Gradio function for performing inference
+def generate_response(instruction, input_data):
+    # Handle case where input data is empty
+    if input_data.strip() == "":
+        input_data = "No additional input provided."
+    # Format the prompt using the instruction and input data
+    inputs = tokenizer(
+        [
+            alpaca_prompt.format(
+                instruction,  # user-provided instruction
+                input_data,   # optional user input data
+                ""            # output (leave blank for generation)
+            )
+        ],
+        return_tensors="pt"
+    )
+    # Move input tensors to the correct device (GPU/CPU)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    inputs = inputs.to(device)
+    # Generate tokens with the model
+    generated_tokens = model.generate(**inputs, max_new_tokens=500)
+    # Decode the generated tokens into text
+    generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+    return generated_text
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# FastLanguageModel Inference App")
+    instruction_input = gr.Textbox(label="Instruction", placeholder="Enter your instruction here")
+    input_data_input = gr.Textbox(label="Input Data (Optional)", placeholder="Enter your input data here (optional)")
+    output_text = gr.Textbox(label="Generated Response")
+    generate_button = gr.Button("Generate Response")
+    # Connect the Gradio button click event to the response generation function
+    generate_button.click(
+        fn=generate_response,
+        inputs=[instruction_input, input_data_input],
+        outputs=[output_text]
+    )
+# Launch the Gradio app
+demo.launch()