import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from huggingface_hub import snapshot_download import torch import os import subprocess import gc model_id = "meta-llama/Llama-2-7b" print("\n\nSaving model to Local....\n\n") snapshot_download(repo_id=model_id, local_dir="llama") print("\n\nConverting to suitable type...\n\n") subprocess.run("python converter.py --input_dir llama --model_size 7B --output_dir model".split(" ")) print("\n\nModel converted successfully!!\n\n") print(os.listdir("model")) gc.collect() print("\n\nInitializing model...\n\n") model_interface = pipeline( "text-generation", model="./model", torch_dtype=torch.bfloat16, device="cpu", ) print("\n\nModel initialized successfully!!\n\n") def generate_text(text: str) -> str: response = model_interface(text, do_sample=False) response_text = response[0]["generated_text"] return response_text # Create the Gradio interface iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here"), outputs=gr.Textbox(lines=5), title="Llama 2 Text Generator", description="Generate text using the Llama 2 model.", ) iface.launch()