import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load model model_name = "TheBloke/MythoMax-L2-13B-GPTQ" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) def chat(message, history): input_text = tokenizer.apply_chat_template(history + [[None, message]], tokenize=False) inputs = tokenizer(input_text, return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=300) response = tokenizer.decode(outputs[:, inputs.input_ids.shape[1]:][0], skip_special_tokens=True) return response # Gradio UI with gr.Blocks() as demo: gr.Markdown("## MythoMax AI Chatbot 💬") chatbox = gr.ChatInterface(chat) demo.launch()