import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer model_name = "Dorjzodovsuren/Mongolian_Llama3-v0.1" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def generate_response(input_text): # Tokenize the input text inputs = tokenizer(input_text, return_tensors="pt") # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_length=100, # Adjust for desired response length temperature=0.7, # Adjust for creativity top_p=0.9 # Adjust for response diversity ) # Decode the generated text response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return response_text # Create Gradio interface iface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="Mongolian Llama3 Chatbot", description="Ask anything in Mongolian!" ) # Launch the Gradio app iface.launch()