import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # Download the model from Hugging Face MODEL_REPO = "krishna195/new_model" # Replace with your repo ID MODEL_FILE = "unsloth.Q4_K_M.gguf" # Replace with your file name model_path = hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir="./models" # Cache directory for storing the model ) # Load the LLaMA model llm = Llama(model_path=model_path) # Function for chatbot interaction def chat_with_llama(user_input, history): response = llm.create_chat_completion( messages=[{"role": "user", "content": user_input}] ) return response["choices"][0]["message"]["content"] # Extract response text # Gradio UI chatbot_ui = gr.ChatInterface( fn=chat_with_llama, title="LLaMA Chatbot", description="Chat with a fine-tuned LLaMA model hosted on Hugging Face.", theme="compact" ) # Launch Gradio app if __name__ == "__main__": chatbot_ui.launch()