Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on Mar 28

Commit

0df46b2

verified ·

1 Parent(s): 6e8578b

Update back.branched.PDFAddedRAG.03282025.app.py

Browse files

Files changed (1) hide show

back.branched.PDFAddedRAG.03282025.app.py +44 -19

back.branched.PDFAddedRAG.03282025.app.py CHANGED Viewed

@@ -99,11 +99,12 @@ def SpeechSynthesis(result):
 def generate_filename(prompt, file_type, original_name=None):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
-    if original_name:
         base_name = os.path.splitext(original_name)[0]
-        return f"{safe_date_time}_{base_name}.{file_type}"
-    replaced_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:240]
-    return f"{safe_date_time}_{replaced_prompt}.{file_type}"
 def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True):
     if not should_save:
@@ -157,9 +158,9 @@ def process_image(image_input, user_prompt):
 # Audio Processing
 def process_audio(audio_input, text_input=''):
     if audio_input:
-        audio_bytes = audio_input.read()
         supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
-        file_ext = os.path.splitext(audio_input.name)[1][1:].lower()
         if file_ext not in supported_formats:
             st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}")
             return
@@ -218,8 +219,8 @@ def process_video(video_path, seconds_per_frame=2):
             clip.audio.write_audiofile(audio_path, bitrate="32k")
             clip.audio.close()
         clip.close()
-    except:
-        st.warning("No audio track found in video.")
         audio_path = None
     return base64Frames, audio_path
@@ -324,8 +325,8 @@ def process_rag_query(query, vector_store_id):
         )
         tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else []
         return response.choices[0].message.content, tool_calls
-    except openai.PermissionDeniedError as e:
-        st.error(f"RAG error: {str(e)}. Ensure your project has access to the model.")
         return None, []
 def evaluate_rag(vector_store_id, questions_dict):
@@ -340,7 +341,7 @@ def evaluate_rag(vector_store_id, questions_dict):
         response, tool_calls = process_rag_query(query, vector_store_id)
         if not tool_calls:
             continue
-        retrieved_files = [call.function.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.function.name][:k]
         if expected_file in retrieved_files:
             rank = retrieved_files.index(expected_file) + 1
             correct_retrievals_at_k += 1
@@ -378,8 +379,8 @@ def rag_pdf_gallery():
                     st.markdown(response)
                     st.write("Retrieved chunks:")
                     for call in tool_calls:
-                        if "file_search" in call.function.name:
-                            st.json(call.function.arguments)
         if st.button("Evaluate RAG Performance"):
             with st.spinner("Evaluating..."):
@@ -464,13 +465,29 @@ def main():
     option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
     if option == "Text":
-        text_input = st.text_input("Enter your text:")
-        if text_input:
             with st.spinner("Processing..."):
                 process_text(text_input)
     elif option == "Image":
-        text_input = st.text_input("Image Prompt:", value="Describe this image and list ten facts in a markdown outline with emojis.")
         image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
         if image_input and text_input:
             if image_input.size > 200 * 1024 * 1024:
@@ -480,6 +497,7 @@ def main():
                     image_response = process_image(image_input, text_input)
                     with st.chat_message("ai", avatar="🦖"):
                         st.markdown(image_response)
     elif option == "Audio":
         text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.")
@@ -488,10 +506,13 @@ def main():
         if audio_bytes:
             with open("recorded_audio.wav", "wb") as f:
                 f.write(audio_bytes)
-            audio_input = open("recorded_audio.wav", "rb")
-        if audio_input and text_input:
             with st.spinner("Processing..."):
                 process_audio(audio_input, text_input)
     elif option == "Video":
         text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.")
@@ -502,6 +523,7 @@ def main():
             else:
                 with st.spinner("Processing..."):
                     process_audio_and_video(video_input)
     elif option == "ArXiv Search":
         query = st.text_input("AI Search ArXiv Scholarly Articles:")
@@ -509,6 +531,7 @@ def main():
             with st.spinner("Searching ArXiv..."):
                 result = search_arxiv(query)
                 st.markdown(result)
     elif option == "RAG PDF Gallery":
         rag_pdf_gallery()
@@ -519,7 +542,9 @@ for message in st.session_state.messages:
         st.markdown(message["content"])
 if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
-    process_text(prompt)
 FileSidebar()
 main()

 def generate_filename(prompt, file_type, original_name=None):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
+    if original_name and file_type == "md":  # For images
         base_name = os.path.splitext(original_name)[0]
+        safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:100]
+        return f"{safe_date_time}_{safe_prompt}_{base_name}.{file_type}"
+    safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:240]
+    return f"{safe_date_time}_{safe_prompt}.{file_type}"
 def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True):
     if not should_save:
 # Audio Processing
 def process_audio(audio_input, text_input=''):
     if audio_input:
+        audio_bytes = audio_input if isinstance(audio_input, bytes) else audio_input.read()
         supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
+        file_ext = "wav" if isinstance(audio_input, bytes) else os.path.splitext(audio_input.name)[1][1:].lower()
         if file_ext not in supported_formats:
             st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}")
             return
             clip.audio.write_audiofile(audio_path, bitrate="32k")
             clip.audio.close()
         clip.close()
+    except Exception as e:
+        st.warning(f"No audio track found or error: {str(e)}")
         audio_path = None
     return base64Frames, audio_path
         )
         tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else []
         return response.choices[0].message.content, tool_calls
+    except openai.BadRequestError as e:
+        st.error(f"RAG query error: {str(e)}")
         return None, []
 def evaluate_rag(vector_store_id, questions_dict):
         response, tool_calls = process_rag_query(query, vector_store_id)
         if not tool_calls:
             continue
+        retrieved_files = [call.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.type][:k]
         if expected_file in retrieved_files:
             rank = retrieved_files.index(expected_file) + 1
             correct_retrievals_at_k += 1
                     st.markdown(response)
                     st.write("Retrieved chunks:")
                     for call in tool_calls:
+                        if "file_search" in call.type:
+                            st.json(call.arguments)
         if st.button("Evaluate RAG Performance"):
             with st.spinner("Evaluating..."):
     option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
     if option == "Text":
+        default_text = "emojis in markdown. Maybe a buckeyball feature rating comparing them against each other in markdown emoji outline or tables."
+        col1, col2 = st.columns([1, 5])
+        with col1:
+            if st.button("📝 MD", key="md_button"):
+                st.session_state["text_input"] = default_text
+                with st.spinner("Processing..."):
+                    process_text(default_text)
+                st.rerun()
+        with col2:
+            text_input = st.text_input("Enter your text:", value=st.session_state.get("text_input", ""), key="text_input_field")
+        if text_input and text_input != st.session_state.get("text_input", ""):  # Only process if changed
             with st.spinner("Processing..."):
                 process_text(text_input)
     elif option == "Image":
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("📝 Describe"):
+                st.session_state["image_prompt"] = "Describe this image and list ten facts in a markdown outline with emojis."
+        with col2:
+            if st.button("🔍 OCR"):
+                st.session_state["image_prompt"] = "Show electronic text of text in the image."
+        text_input = st.text_input("Image Prompt:", value=st.session_state.get("image_prompt", "Describe this image and list ten facts in a markdown outline with emojis."))
         image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
         if image_input and text_input:
             if image_input.size > 200 * 1024 * 1024:
                     image_response = process_image(image_input, text_input)
                     with st.chat_message("ai", avatar="🦖"):
                         st.markdown(image_response)
+                st.rerun()
     elif option == "Audio":
         text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.")
         if audio_bytes:
             with open("recorded_audio.wav", "wb") as f:
                 f.write(audio_bytes)
+            with st.spinner("Processing..."):
+                process_audio(audio_bytes, text_input)
+            st.rerun()
+        elif audio_input and text_input:
             with st.spinner("Processing..."):
                 process_audio(audio_input, text_input)
+            st.rerun()
     elif option == "Video":
         text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.")
             else:
                 with st.spinner("Processing..."):
                     process_audio_and_video(video_input)
+                st.rerun()
     elif option == "ArXiv Search":
         query = st.text_input("AI Search ArXiv Scholarly Articles:")
             with st.spinner("Searching ArXiv..."):
                 result = search_arxiv(query)
                 st.markdown(result)
+            st.rerun()
     elif option == "RAG PDF Gallery":
         rag_pdf_gallery()
         st.markdown(message["content"])
 if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
+    with st.spinner("Processing..."):
+        process_text(prompt)
+    st.rerun()
 FileSidebar()
 main()