Spaces:
Build error
Build error
File size: 3,941 Bytes
8e34f80 893a3e5 8e34f80 893a3e5 8e34f80 0bb8ce2 893a3e5 8e34f80 893a3e5 8e34f80 893a3e5 15b96ac 893a3e5 15b96ac 893a3e5 f51ad44 74fc255 1e2e099 893a3e5 74fc255 1e2e099 74fc255 1e2e099 74fc255 e981e7f bbf9e08 15b96ac 893a3e5 74fc255 080099f 8e34f80 ee4f4a6 bbf9e08 ee4f4a6 e17785f 893a3e5 ee4f4a6 4c0fb4c 141404c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd
from transformers import GitProcessor, GitModel, GitConfig, ImageFeatureProcessor
from PIL import Image
# Load models and processors
git_config = GitConfig.from_pretrained("microsoft/git-large-r")
git_processor_large_textcaps = GitProcessor.from_pretrained("microsoft/git-large-r")
git_model_large_textcaps = GitModel.from_pretrained("microsoft/git-large-r")
itm_model = hub.load("https://tfhub.dev/google/LaViT/1")
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")
# Read statements from the external file 'statements.txt'
with open('statements.txt', 'r') as file:
statements = file.read().splitlines()
# Function to generate image caption
def generate_caption(processor, model, image):
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
caption = processor.batch_decode(outputs.logits.argmax(-1), skip_special_tokens=True)
return caption[0]
# Function to compute textual similarity
def compute_textual_similarity(caption, statement):
captions_embeddings = use_model([caption])[0].numpy()
statements_embeddings = use_model([statement])[0].numpy()
similarity_score = np.inner(captions_embeddings, statements_embeddings)
return similarity_score[0]
# Function to compute ITM score
def compute_itm_score(image, statement):
image_features = itm_model(image)
statement_features = use_model([statement])[0].numpy()
similarity_score = np.inner(image_features, statement_features)
return similarity_score[0][0]
# Function to save DataFrame to CSV
def save_dataframe_to_csv(df):
csv_data = df.to_csv(index=False)
return csv_data
# Main function to perform image captioning and image-text matching for multiple images
def process_images_and_statements(files):
all_results_list = []
# If 'files' is a list, convert it to a dictionary
if isinstance(files, list):
files = {f.name: f for f in files}
for file_name, image_file in files.items():
# Convert the image file to a PIL image
image = Image.open(image_file)
caption = generate_caption(git_processor_large_textcaps, git_model_large_textcaps, image)
for statement in statements:
textual_similarity_score = compute_textual_similarity(caption, statement) * 100
itm_score_statement = compute_itm_score(image, statement) * 100
final_score = 0.5 * textual_similarity_score + 0.5 * itm_score_statement
all_results_list.append({
'Image File Name': file_name, # Include the image file name
'Statement': statement,
'Generated Caption': caption,
'Textual Similarity Score': f"{textual_similarity_score:.2f}%",
'ITM Score': f"{itm_score_statement:.2f}%",
'Final Combined Score': f"{final_score:.2f}%"
})
results_df = pd.concat([pd.DataFrame([result]) for result in all_results_list], ignore_index=True)
csv_results = save_dataframe_to_csv(results_df)
return results_df, csv_results
# Gradio interface with File input to receive
# Gradio interface with File input to receive multiple images and file names
image_input = gr.inputs.File(file_count="multiple", type="file", label="Upload Images")
output_df = gr.outputs.Dataframe(type="pandas", label="Results")
output_csv = gr.outputs.File(label="Download CSV")
iface = gr.Interface(
fn=process_images_and_statements,
inputs=image_input,
outputs=[output_df, output_csv],
title="Image Captioning and Image-Text Matching",
theme='sudeepshouche/minimalist',
css=".output { flex-direction: column; } .output .outputs { width: 100%; }", # Custom CSS
capture_session=True, # Capture errors and exceptions in Gradio interface
)
iface.launch(debug=True)
|