import streamlit as st from transformers import pipeline classifier = pipeline("image-to-text", model="vit-gpt2-image-captioning") def main(): st.title("image-to-text") with st.form("image_field"): image = st.file_uploader('Choose a file') # clicked==True only when the button is clicked clicked = st.form_submit_button("Submit") if clicked: results = classifier([image]) st.json(results) if __name__ == "__main__": main() """'audio-classification', 'automatic-speech-recognition', 'conversational', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'image-to-text', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text2text-generation', 'token-classification', 'translation', 'visual-question-answering', 'vqa', 'zero-shot-classification', 'zero-shot-image-classification', 'translation_XX_to_YY'"""