|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
from huggingface_hub import snapshot_download |
|
import torch |
|
|
|
import os |
|
import subprocess |
|
import gc |
|
|
|
model_id = "meta-llama/Llama-2-7b" |
|
|
|
print("\n\nSaving model to Local....\n\n") |
|
|
|
snapshot_download(repo_id=model_id, local_dir="llama") |
|
|
|
print("\n\nConverting to suitable type...\n\n") |
|
subprocess.run("python converter.py --input_dir llama --model_size 7B --output_dir model".split(" ")) |
|
print("\n\nModel converted successfully!!\n\n") |
|
print(os.listdir("model")) |
|
|
|
gc.collect() |
|
|
|
print("\n\nInitializing model...\n\n") |
|
model_interface = pipeline( |
|
"text-generation", |
|
model="./model", |
|
torch_dtype=torch.bfloat16, |
|
device="cpu", |
|
) |
|
print("\n\nModel initialized successfully!!\n\n") |
|
|
|
def generate_text(text: str) -> str: |
|
response = model_interface(text, do_sample=False) |
|
response_text = response[0]["generated_text"] |
|
return response_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here"), |
|
outputs=gr.Textbox(lines=5), |
|
title="Llama 2 Text Generator", |
|
description="Generate text using the Llama 2 model.", |
|
) |
|
|
|
iface.launch() |