Spaces:
Build error
Build error
File size: 2,660 Bytes
789acc7 fd950ef 66011b0 789acc7 fd950ef 323d186 85baff2 c33e052 323d186 85baff2 323d186 85baff2 323d186 85baff2 323d186 c33e052 323d186 c33e052 323d186 c33e052 85baff2 c33e052 85baff2 323d186 c33e052 85baff2 323d186 c33e052 323d186 85baff2 c33e052 85baff2 323d186 85baff2 323d186 85baff2 323d186 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# Set device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Update model path to your local path
model_name = 'failspy/kappa-3-phi-abliterated'
# create model and load it to the specified device
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
def inference(prompt, image, temperature, beam_size):
# Phi-3 uses a chat template
messages = [
{"role": "user", "content": f"Can you describe this image?\n{prompt}"}
]
# Apply chat template and add generation prompt
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(device)
# Process the image
pixel_values = model.prepare_image(image).to(device)
# Add debug prints
print(f"Device of model: {next(model.parameters()).device}")
print(f"Device of inputs: {inputs.input_ids.device}")
print(f"Device of pixel_values: {pixel_values.device}")
# generate
with torch.cuda.amp.autocast():
output_ids = model.generate(
inputs.input_ids,
pixel_values=pixel_values,
max_new_tokens=1024,
temperature=temperature,
num_beams=beam_size,
use_cache=True
)[0]
return tokenizer.decode(output_ids[inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
image_input = gr.Image(label="Image", type="pil")
temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
submit_button = gr.Button("Submit")
with gr.Column():
output_text = gr.Textbox(label="Output")
submit_button.click(
fn=inference,
inputs=[prompt_input, image_input, temperature_input, beam_size_input],
outputs=output_text
)
demo.launch(share=True) |