Ketengan-Diffusion-Lab's picture
Update app.py
c33e052 verified
raw
history blame
2.66 kB
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# Set device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Update model path to your local path
model_name = 'failspy/kappa-3-phi-abliterated'
# create model and load it to the specified device
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
def inference(prompt, image, temperature, beam_size):
# Phi-3 uses a chat template
messages = [
{"role": "user", "content": f"Can you describe this image?\n{prompt}"}
]
# Apply chat template and add generation prompt
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(device)
# Process the image
pixel_values = model.prepare_image(image).to(device)
# Add debug prints
print(f"Device of model: {next(model.parameters()).device}")
print(f"Device of inputs: {inputs.input_ids.device}")
print(f"Device of pixel_values: {pixel_values.device}")
# generate
with torch.cuda.amp.autocast():
output_ids = model.generate(
inputs.input_ids,
pixel_values=pixel_values,
max_new_tokens=1024,
temperature=temperature,
num_beams=beam_size,
use_cache=True
)[0]
return tokenizer.decode(output_ids[inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
image_input = gr.Image(label="Image", type="pil")
temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
submit_button = gr.Button("Submit")
with gr.Column():
output_text = gr.Textbox(label="Output")
submit_button.click(
fn=inference,
inputs=[prompt_input, image_input, temperature_input, beam_size_input],
outputs=output_text
)
demo.launch(share=True)