tiny-coder / app.py
tudormunteanu
first commit
1e0e52a
raw
history blame
2.53 kB
# app.py
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Initialize model and tokenizer
def load_model(model_size: str = "32B"):
"""
Load model and tokenizer based on size selection
Note: You'll need to replace these with actual HuggingFace model IDs
"""
model_map = {
"0.5B": "Qwen/Qwen-0.5B",
"1.5B": "Qwen/Qwen-1.5B",
"7B": "Qwen/Qwen-7B",
# ... add other model sizes as needed
}
model_id = model_map.get(model_size, "Qwen/Qwen-7B") # default to 7B if size not found
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="auto"
)
return model, tokenizer
def process_query(query: str, model_size: str = "7B") -> str:
"""
Process a single query and return the response
"""
if not query:
return ""
try:
model, tokenizer = load_model(model_size)
# Prepare the input
inputs = tokenizer(query, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.pad_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.replace(query, "").strip()
except Exception as e:
return f"Error: {str(e)}"
def main():
with gr.Blocks() as demo:
with gr.Row():
model_size = gr.Radio(
choices=["0.5B", "1.5B", "3B", "7B", "14B", "32B"],
label="Qwen2.5-Coder Model Size:",
value="32B"
)
with gr.Row():
input_text = gr.Textbox(
lines=5,
label="Input",
placeholder="Enter your query here..."
)
with gr.Row():
output_text = gr.Textbox(
lines=10,
label="Output"
)
submit_btn = gr.Button("Generate")
submit_btn.click(
fn=process_query,
inputs=[input_text, model_size],
outputs=output_text
)
demo.launch(max_threads=5)
if __name__ == "__main__":
main()