|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
|
|
|
|
|
|
MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF" |
|
DEVICE = "cpu" |
|
|
|
|
|
|
|
def load_model_and_tokenizer(): |
|
""" |
|
Load the model and tokenizer from Hugging Face. |
|
""" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, device_map=DEVICE) |
|
return tokenizer, model |
|
|
|
tokenizer, model = load_model_and_tokenizer() |
|
|
|
|
|
|
|
def generate_text(prompt, max_length=100): |
|
""" |
|
Generate text based on the given prompt. |
|
|
|
Args: |
|
prompt (str): The input prompt for text generation. |
|
max_length (int): The maximum length of the generated text. |
|
|
|
Returns: |
|
str: The generated text. |
|
""" |
|
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) |
|
outputs = model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1) |
|
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return generated_text |
|
|
|
|
|
|
|
def gradio_interface(): |
|
""" |
|
Create and launch the Gradio interface. |
|
""" |
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=[ |
|
gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."), |
|
gr.inputs.Slider(minimum=50, maximum=500, step=10, default=100, label="Max Length") |
|
], |
|
outputs="text", |
|
title="Qwen2.5-Coder-0.5B-Instruct-GGUF Text Generation", |
|
description="Generate text using the Qwen2.5-Coder-0.5B-Instruct-GGUF model." |
|
) |
|
iface.launch() |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
gradio_interface() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|