File size: 1,414 Bytes
b0f6ad7 63eb096 321df2e 63eb096 cb0665f b0f6ad7 63eb096 321df2e cb0665f d5986ec 63eb096 321df2e 63eb096 b7b26d6 63eb096 b7b26d6 5429a99 b7b26d6 5429a99 b7b26d6 321df2e 5429a99 321df2e 5429a99 b7b26d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import streamlit as st
from transformers import pipeline
from huggingface_hub import InferenceClient
from PIL import Image
import os
api_key = os.getenv("HUGGINGFACE_TOKEN")
client = InferenceClient(api_key=api_key)
st.header("Character Captions (IN PROGRESS!)")
st.write("Have a character caption any image you upload!")
character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible"])
uploaded_img = st.file_uploader("Upload an image")
if uploaded_img is not None:
image = Image.open(uploaded_img)
st.image(image)
image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
response = image_captioner(image)
caption = response[0]['generated_text']
character_prompts = {
"rapper": f"Describe this scene like you're a rapper: {caption}.",
"shrek": f"Describe this scene like you're Shrek: {caption}.",
"unintelligible": f"Describe this scene in a way that makes no sense: {caption}."
}
prompt = character_prompts[character]
messages = [
{ "role": "user", "content": prompt }
]
stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-3B-Instruct",
messages=messages,
max_tokens=500,
stream=True
)
response = ''
for chunk in stream:
response += chunk.choices[0].delta.content
st.write(response)
|