VQASynth / app.py
smellslikeml
update app
1cd40cd
raw
history blame
3.56 kB
"""SpaceLlama3.1 demo gradio app."""
import datetime
import logging
import os
import gradio as gr
import torch
import PIL.Image
from prismatic import load
from huggingface_hub import login
# Authenticate with the Hugging Face Hub
def authenticate_huggingface():
hf_token = os.getenv("HF_TOKEN")
if hf_token:
login(token=hf_token)
else:
raise ValueError("Hugging Face API token not found. Please set it as an environment variable named 'HF_TOKEN'.")
# Call the authentication function once at the start
authenticate_huggingface()
INTRO_TEXT = """SpaceLlama3.1 demo\n\n
| [Model](https://huggingface.co/remyxai/SpaceLlama3.1)
| [GitHub](https://github.com/remyxai/VQASynth/tree/main)
| [Demo](https://huggingface.co/spaces/remyxai/SpaceLlama3.1)
| [Discord](https://discord.gg/DAy3P5wYJk)
\n\n
**This is an experimental research model.** Make sure to add appropriate guardrails when using the model for applications.
"""
def compute(image, prompt, model_location):
"""Runs model inference."""
if image is None:
raise gr.Error("Image required")
logging.info('prompt="%s"', prompt)
# Open the image file
if isinstance(image, str):
image = PIL.Image.open(image).convert("RGB")
# Set device and load the model
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
vlm = load(model_location) # No need to pass the token again
vlm.to(device, dtype=torch.bfloat16)
# Prepare prompt
prompt_builder = vlm.get_prompt_builder()
prompt_builder.add_turn(role="human", message=prompt)
prompt_text = prompt_builder.get_prompt()
# Generate the text based on image and prompt
generated_text = vlm.generate(
image,
prompt_text,
do_sample=True,
temperature=0.1,
max_new_tokens=512,
min_length=1,
)
output = generated_text.split("</s>")[0]
logging.info('output="%s"', output)
return output
def reset():
"""Resets the input fields."""
return "", None
def create_app():
"""Creates demo UI."""
with gr.Blocks() as demo:
# Main UI structure
gr.Markdown(INTRO_TEXT)
with gr.Row():
image = gr.Image(value=None, label="Image", type="filepath", visible=True) # input
with gr.Column():
prompt = gr.Textbox(value="", label="Prompt", visible=True)
model_info = gr.Markdown(label="Model Info")
run = gr.Button("Run", variant="primary")
clear = gr.Button("Clear")
highlighted_text = gr.HighlightedText(value="", label="Output", visible=True)
# Model location
model_location = "remyxai/SpaceLlama3.1" # Update as needed
# Button event handlers
run.click(
fn=compute,
inputs=[image, prompt, model_location],
outputs=highlighted_text,
)
clear.click(fn=reset, inputs=None, outputs=[prompt, image])
# Status
status = gr.Markdown(f"Startup: {datetime.datetime.now()}")
gpu_kind = gr.Markdown(f"GPU=?")
demo.load(
fn=lambda: [f"Model `{model_location}` loaded."],
inputs=None,
outputs=model_info,
)
return demo
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
for k, v in os.environ.items():
logging.info('environ["%s"] = %r', k, v)
create_app().queue().launch()