import gradio as gr from transformers import pipeline from PIL import Image # Initialize the pipeline with the image captioning model caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Initialize the pipeline for emotion classification emotion_pipeline = pipeline("image-classification", model="RickyIG/emotion_face_image_classification_v3") def generate_caption_and_emotion(image): # Process the image for captioning caption_result = caption_pipeline(image) caption = caption_result[0]["generated_text"] # Process the image for emotion classification emotion_result = emotion_pipeline(image) emotions = ", ".join([f"{res['label']}: {res['score']:.2f}" for res in emotion_result]) # Combine results combined_result = f"Caption: {caption}\nEmotions: {emotions}" return combined_result # Setup the Gradio interface interface = gr.Interface(fn=generate_caption_and_emotion, inputs=gr.components.Image(type="pil", label="Upload an Image"), outputs=gr.components.Textbox(label="Generated Caption and Emotions")) interface.launch()