File size: 1,414 Bytes
b0f6ad7
63eb096
321df2e
63eb096
cb0665f
b0f6ad7
63eb096
321df2e
 
cb0665f
d5986ec
63eb096
321df2e
63eb096
 
 
 
 
 
 
 
 
 
 
 
 
 
b7b26d6
63eb096
 
 
 
 
b7b26d6
5429a99
 
 
b7b26d6
5429a99
 
 
 
 
 
b7b26d6
321df2e
5429a99
321df2e
 
 
5429a99
 
b7b26d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import streamlit as st
from transformers import pipeline
from huggingface_hub import InferenceClient
from PIL import Image
import os


api_key = os.getenv("HUGGINGFACE_TOKEN")
client = InferenceClient(api_key=api_key)

st.header("Character Captions (IN PROGRESS!)")
st.write("Have a character caption any image you upload!")
character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible"])

uploaded_img = st.file_uploader("Upload an image")

if uploaded_img is not None:

    image = Image.open(uploaded_img)
    st.image(image)

    image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

    response = image_captioner(image)
    caption = response[0]['generated_text']

    character_prompts = {
        "rapper": f"Describe this scene like you're a rapper: {caption}.",
        "shrek": f"Describe this scene like you're Shrek: {caption}.",
        "unintelligible": f"Describe this scene in a way that makes no sense: {caption}."
    }

    prompt = character_prompts[character]

    messages = [
        { "role": "user", "content": prompt }
    ]

    stream = client.chat.completions.create(
        model="meta-llama/Llama-3.2-3B-Instruct", 
        messages=messages, 
        max_tokens=500,
        stream=True
    )

    response = ''
    for chunk in stream:
        response += chunk.choices[0].delta.content
    
    st.write(response)