Spaces:

AI-ANK
/

PalmKosmosVision

Sleeping

App Files Files Community

AI-ANK commited on Nov 4, 2023

Commit

a91a7bc

1 Parent(s): 34026b8

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -93

app.py CHANGED Viewed

@@ -4,60 +4,52 @@ import requests
 from PIL import Image
 from transformers import AutoProcessor, AutoModelForVision2Seq
 from io import BytesIO
-#import replicate
 from llama_index.llms.palm import PaLM
 from llama_index import ServiceContext, VectorStoreIndex, Document
 from llama_index.memory import ChatMemoryBuffer
 import os
 import datetime
-from PIL import Image
-import io
 # Set up the title of the application
-st.title("Image Captioning and Chat")
 # Initialize the cookie manager
 cookie_manager = stx.CookieManager()
-@st.cache_resource
-def get_vision_model():
-    model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
-    processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
-    return model, processor
-model, processor = get_vision_model()
 # Function to get image caption via Kosmos2.
 @st.cache_data
 def get_image_caption(image_data):
-    # Ensure image_data is a bytes stream ready to be read by Image.open
-    if isinstance(image_data, io.BytesIO):
-        # If it's already a BytesIO, we need to seek to the beginning of the file
-        image_data.seek(0)
-        image = Image.open(image_data)
-    else:
-        # If image_data is not a BytesIO object, create one
-        image = Image.open(io.BytesIO(image_data.read()))
-    model, processor = get_vision_model()
-    prompt = "<grounding>An image of"
-    # Pass the PIL image to the processor
-    inputs = processor(text=prompt, images=image, return_tensors="pt")
-    generated_ids = model.generate(
-        pixel_values=inputs["pixel_values"],
-        input_ids=inputs["input_ids"][:, :-1],
-        attention_mask=inputs["attention_mask"][:, :-1],
-        img_features=None,
-        img_attn_mask=inputs["img_attn_mask"][:, :-1],
-        use_cache=True,
-        max_new_tokens=64,
     )
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    text_description, entities = processor.post_process_generation(generated_text)
     return text_description
 # Function to create the chat engine.
@@ -92,46 +84,49 @@ def clear_chat():
 def on_image_upload():
     clear_chat()
-# Add a clear chat button
-if st.button("Clear Chat"):
-    clear_chat()
-# Image upload section.
-image_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"], key="uploaded_image", on_change=on_image_upload)
-if image_file:
-    # Display the uploaded image at a standard width.
-    st.image(image_file, caption='Uploaded Image.', width=200)
-    # Process the uploaded image to get a caption.
-    image_data = BytesIO(image_file.getvalue())
-    img_desc = get_image_caption(image_data)
-    st.write(f"Image description: {img_desc}")
-    # Initialize the chat engine with the image description.
-    chat_engine = create_chat_engine(img_desc, os.environ["GOOGLE_API_KEY"])
-# Initialize session state for messages if it doesn't exist
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Display previous messages
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# Handle new user input
-user_input = st.chat_input("Ask me about the image:", key="chat_input")
-if user_input:
-    # Retrieve the message count from cookies
-    message_count = cookie_manager.get(cookie='message_count')
-    if message_count is None:
-        message_count = 0
-    else:
-        message_count = int(message_count)
-    # Check if the message limit has been reached
-    if message_count >= 20:
-        st.error("Notice: The maximum message limit for this demo version has been reached.")
-    else:
         # Append user message to the session state
         st.session_state.messages.append({"role": "user", "content": user_input})
@@ -140,23 +135,31 @@ if user_input:
             st.markdown(user_input)
         # Call the chat engine to get the response if an image has been uploaded
-        if image_file:
-            # Get the response from your chat engine
-            response = chat_engine.chat(user_input)
-            # Append assistant message to the session state
-            st.session_state.messages.append({"role": "assistant", "content": response})
-            # Display the assistant message
-            with st.chat_message("assistant"):
-                st.markdown(response)
-        # Increment the message count and update the cookie
-        message_count += 1
-        cookie_manager.set('message_count', str(message_count), expires_at=datetime.datetime.now() + datetime.timedelta(days=30))
 # Set Replicate and Google API keys
-#os.environ['REPLICATE_API_TOKEN'] = st.secrets['REPLICATE_API_TOKEN']
 os.environ["GOOGLE_API_KEY"] = st.secrets['GOOGLE_API_KEY']

 from PIL import Image
 from transformers import AutoProcessor, AutoModelForVision2Seq
 from io import BytesIO
+import replicate
 from llama_index.llms.palm import PaLM
 from llama_index import ServiceContext, VectorStoreIndex, Document
 from llama_index.memory import ChatMemoryBuffer
 import os
 import datetime
 # Set up the title of the application
+#st.title("PaLM-Kosmos-Vision")
+st.set_page_config(layout="wide")
+st.write("My version of ChatGPT vision. You can upload an image and start chatting with the LLM about the image")
+# Sidebar
+st.sidebar.markdown('## Created By')
+st.sidebar.markdown("""
+[Harshad Suryawanshi](https://www.linkedin.com/in/harshadsuryawanshi/)
+""")
+st.sidebar.markdown('## Other Projects')
+st.sidebar.markdown("""
+- [AI Equity Research Analyst](https://ai-eqty-rsrch-anlyst.streamlit.app/)
+- [Recasting "The Office" Scene](https://blackmirroroffice.streamlit.app/)
+- [Story Generator](https://appstorycombined-agaf9j4ceit.streamlit.app/)
+""")
+st.sidebar.markdown('## Disclaimer')
+st.sidebar.markdown("""
+This application is a conceptual prototype created to demonstrate the potential of Large Language Models (LLMs) in generating equity research reports. The contents generated by this application are purely illustrative and should not be construed as financial advice, endorsements, or recommendations. The author and the application do not provide any guarantee regarding the accuracy, completeness, or timeliness of the information provided.
+""")
 # Initialize the cookie manager
 cookie_manager = stx.CookieManager()
 # Function to get image caption via Kosmos2.
 @st.cache_data
 def get_image_caption(image_data):
+    input_data = {
+        "image": image_data,
+        "description_type": "Brief"
+    }
+    output = replicate.run(
+        "lucataco/kosmos-2:3e7b211c29c092f4bcc8853922cc986baa52efe255876b80cac2c2fbb4aff805",
+        input=input_data
     )
+    # Split the output string on the newline character and take the first item
+    text_description = output.split('\n\n')[0]
     return text_description
 # Function to create the chat engine.
 def on_image_upload():
     clear_chat()
+# Retrieve the message count from cookies
+message_count = cookie_manager.get(cookie='message_count')
+if message_count is None:
+    message_count = 0
+else:
+    message_count = int(message_count)
+# If the message limit has been reached, disable the inputs
+if message_count >= 20:
+    st.error("Notice: The maximum message limit for this demo version has been reached.")
+    # Disabling the uploader and input by not displaying them
+    image_uploader_placeholder = st.empty()  # Placeholder for the uploader
+    chat_input_placeholder = st.empty()      # Placeholder for the chat input
+else:
+    # Add a clear chat button
+    if st.button("Clear Chat"):
+        clear_chat()
+    # Image upload section.
+    image_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"], key="uploaded_image", on_change=on_image_upload)
+    if image_file:
+        # Display the uploaded image at a standard width.
+        st.image(image_file, caption='Uploaded Image.', width=200)
+        # Process the uploaded image to get a caption.
+        image_data = BytesIO(image_file.getvalue())
+        img_desc = get_image_caption(image_data)
+        st.write("Image Uploaded Successfully. Ask me anything about it.")
+        # Initialize the chat engine with the image description.
+        chat_engine = create_chat_engine(img_desc, os.environ["GOOGLE_API_KEY"])
+    # Initialize session state for messages if it doesn't exist
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display previous messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Handle new user input
+    user_input = st.chat_input("Ask me about the image:", key="chat_input")
+    if user_input:
         # Append user message to the session state
         st.session_state.messages.append({"role": "user", "content": user_input})
             st.markdown(user_input)
         # Call the chat engine to get the response if an image has been uploaded
+        if image_file and user_input:
+            try:
+                with st.spinner('Waiting for the chat engine to respond...'):
+                    # Get the response from your chat engine
+                    response = chat_engine.chat(user_input)
+                # Append assistant message to the session state
+                st.session_state.messages.append({"role": "assistant", "content": response})
+                # Display the assistant message
+                with st.chat_message("assistant"):
+                    st.markdown(response)
+            except Exception as e:
+                st.error(f'An error occurred: {e}')
+                # Optionally, you can choose to break the flow here if a critical error happens
+                # return
+            # Increment the message count and update the cookie
+            message_count += 1
+            cookie_manager.set('message_count', str(message_count), expires_at=datetime.datetime.now() + datetime.timedelta(days=30))
 # Set Replicate and Google API keys
+os.environ['REPLICATE_API_TOKEN'] = st.secrets['REPLICATE_API_TOKEN']
 os.environ["GOOGLE_API_KEY"] = st.secrets['GOOGLE_API_KEY']