File size: 2,326 Bytes
583f59a 30bc075 583f59a 68d960f 11e1c78 3b927c8 11e1c78 583f59a 3ee0221 11e1c78 3ee0221 11e1c78 3ee0221 11e1c78 3ee0221 11e1c78 b4a5839 f60fbd6 8dcc082 bbea512 b4a5839 3ee0221 908f3e3 b497264 3ee0221 11e1c78 3ee0221 e60d3bf 3ee0221 eee62a6 11e1c78 583f59a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
import os
import pathlib
import textwrap
from PIL import Image
import google.generativeai as genai
genai.configure(api_key='process.env.GEMINI_API_KEY')
## Function to load OpenAI model and get respones
def get_gemini_response(input,image,prompt):
model = genai.GenerativeModel('gemini-pro-vision')
response = model.generate_content([input,image[0],prompt])
return response.text
def input_image_setup(uploaded_file): # Check if a file has been uploaded
if uploaded_file is not None: # Read the file into bytes
bytes_data = uploaded_file.getvalue()
image_parts = [
{
"mime_type": uploaded_file.type, # Get the mime type of the uploaded file
"data": bytes_data
}
]
return image_parts
else:
raise FileNotFoundError("No file uploaded")
st.set_page_config(page_title="Gemini Image Demo")
st.header("Generative AI : Business Card Reader")
st.caption("""This space is based on Google generative ai API and it uses Gemini pro vision model
to extract text from business card images. You can use your own images for input
or find sample images in example folder of files section in this space.
You can add input prompt below if you want to get specific imnformation from image.
You can modify this space for other input like invoice.""")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
image=""
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image.", use_column_width=True)
input=st.text_input("Input Prompt (Optinal) : ",key="input")
submit=st.button("Submit")
input_prompt ="""
You are an expert in understanding business cards.
Input: Image of a business card.
Task: Extract and label the following information in JSON format:
Labels : person_name, company_name, occupation, contact_number, email addresse, website, address, other_details (services, features, etc.)
Constraints: Do not include missing information.
"""
if submit:
image_data = input_image_setup(uploaded_file)
response = get_gemini_response(input_prompt,image_data,input)
st.subheader("Output :")
st.write(response) |