Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on 1 day ago

Commit

5c15f6d

verified ·

1 Parent(s): 03c4954

Update app.py

Browse files

Files changed (1) hide show

app.py +372 -564

app.py CHANGED Viewed

@@ -5,18 +5,17 @@ import json
 import math
 import os
 import pytz
-import random
 import re
-import requests
-import streamlit as st
-import streamlit.components.v1 as components
-import textract
 import time
 import zipfile
 from concurrent.futures import ThreadPoolExecutor
 from tqdm import tqdm
 import concurrent
 from audio_recorder_streamlit import audio_recorder
 from bs4 import BeautifulSoup
 from collections import deque
@@ -24,587 +23,396 @@ from datetime import datetime
 from dotenv import load_dotenv
 from gradio_client import Client
 from io import BytesIO
-from moviepy import VideoFileClip
 from PIL import Image
 from PyPDF2 import PdfReader
-from templates import bot_template, css, user_template
-from urllib.parse import quote
-from xml.etree import ElementTree as ET
 import openai
 from openai import OpenAI
 import pandas as pd
-# Configuration
-Site_Name = 'Scholarly-Article-Document-Search-With-Memory'
-title = "🔬🧠ScienceBrain.AI"
-helpURL = 'https://huggingface.co/awacke1'
-bugURL = 'https://huggingface.co/spaces/awacke1'
-icons = Image.open("icons.ico")
-st.set_page_config(
-    page_title=title,
-    page_icon=icons,
-    layout="wide",
-    initial_sidebar_state="auto",
-    menu_items={'Get Help': helpURL, 'Report a bug': bugURL, 'About': title}
-)
-# API Configuration
-API_KEY = os.getenv('API_KEY')
-HF_KEY = os.getenv('HF_KEY')
-headers = {"Authorization": f"Bearer {HF_KEY}", "Content-Type": "application/json"}
-key = os.getenv('OPENAI_API_KEY')
-client = OpenAI(api_key=key, organization=os.getenv('OPENAI_ORG_ID'))
-MODEL = "gpt-4o-2024-05-13"
-if "openai_model" not in st.session_state:
-    st.session_state["openai_model"] = MODEL
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if st.button("Clear Session"):
-    st.session_state.messages = []
-# Sidebar Options
-should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
-# HTML5 Speech Synthesis
-@st.cache_resource
-def SpeechSynthesis(result):
-    documentHTML5 = '''
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>Read It Aloud</title>
-        <script type="text/javascript">
-            function readAloud() {
-                const text = document.getElementById("textArea").value;
-                const speech = new SpeechSynthesisUtterance(text);
-                window.speechSynthesis.speak(speech);
-            }
-        </script>
-    </head>
-    <body>
-        <h1>🔊 Read It Aloud</h1>
-        <textarea id="textArea" rows="10" cols="80">
-    '''
-    documentHTML5 += result + '''
-        </textarea>
-        <br>
-        <button onclick="readAloud()">🔊 Read Aloud</button>
-    </body>
-    </html>
-    '''
-    components.html(documentHTML5, width=1280, height=300)
-# File Naming and Saving
-def generate_filename(prompt, file_type, original_name=None):
-    central = pytz.timezone('US/Central')
-    safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
-    safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50]
-    if original_name and file_type == "md":  # For images
-        base_name = os.path.splitext(original_name)[0]
-        file_stem = f"{safe_date_time}_{safe_prompt}_{base_name}"[:100]  # Cap at 100 chars
-        return f"{file_stem}.{file_type}"
-    file_stem = f"{safe_date_time}_{safe_prompt}"[:100]  # Cap at 100 chars
-    return f"{file_stem}.{file_type}"
-def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True):
-    if not should_save:
-        return None
-    filename = generate_filename(prompt, file_type, original_name)
-    with open(filename, "w", encoding="utf-8") as f:
-        f.write(content if not prompt else prompt + "\n\n" + content)
-    return filename
-# Text Processing
-def process_text(text_input):
-    if text_input:
-        st.session_state.messages.append({"role": "user", "content": text_input})
-        with st.chat_message("user"):
-            st.markdown(text_input)
-        with st.chat_message("assistant"):
-            completion = client.chat.completions.create(
-                model=st.session_state["openai_model"],
-                messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
-                stream=False
             )
-            response = completion.choices[0].message.content
-            st.markdown(response)
-            filename = generate_filename(text_input, "md")
-            create_and_save_file(response, "md", text_input, should_save=should_save)
-            st.session_state.messages.append({"role": "assistant", "content": response})
-# Image Processing
-def process_image(image_input, user_prompt):
-    original_name = image_input.name
-    image_bytes = image_input.read()
-    with open(original_name, "wb") as f:
-        f.write(image_bytes)  # Save original image
-    base64_image = base64.b64encode(image_bytes).decode("utf-8")
-    response = client.chat.completions.create(
-        model=st.session_state["openai_model"],
-        messages=[
-            {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
-            {"role": "user", "content": [
-                {"type": "text", "text": user_prompt},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
-            ]}
-        ],
-        temperature=0.0
-    )
-    image_response = response.choices[0].message.content
-    filename = generate_filename(user_prompt, "md", original_name)  # Include prompt in filename
-    create_and_save_file(image_response, "md", user_prompt, original_name, should_save=should_save)
-    return image_response
-# Audio Processing
-def process_audio(audio_input, text_input=''):
-    if audio_input:
-        audio_bytes = audio_input if isinstance(audio_input, bytes) else audio_input.read()
-        supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
-        file_ext = "wav" if isinstance(audio_input, bytes) else os.path.splitext(audio_input.name)[1][1:].lower()
-        if file_ext not in supported_formats:
-            st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}")
-            return
-        if len(audio_bytes) > 200 * 1024 * 1024:  # 200MB limit
-            st.error("File exceeds 200MB limit.")
-            return
-        with st.spinner("Transcribing audio..."):
-            try:
-                transcription = client.audio.transcriptions.create(
-                    model="whisper-1",
-                    file=BytesIO(audio_bytes)
-                ).text
-                st.session_state.messages.append({"role": "user", "content": transcription})
-                with st.chat_message("user"):
-                    st.markdown(transcription)
-                with st.chat_message("assistant"):
-                    completion = client.chat.completions.create(
-                        model=st.session_state["openai_model"],
-                        messages=[{"role": "user", "content": text_input + "\n\nTranscription: " + transcription}]
-                    )
-                    response = completion.choices[0].message.content
-                    st.markdown(response)
-                    filename = generate_filename(transcription, "md")
-                    create_and_save_file(response, "md", text_input, should_save=should_save)
-                    st.session_state.messages.append({"role": "assistant", "content": response})
-            except openai.BadRequestError as e:
-                st.error(f"Audio processing error: {str(e)}")
-# Video Processing
-def save_video(video_input):
-    with open(video_input.name, "wb") as f:
-        f.write(video_input.read())
-    return video_input.name
-def process_video(video_path, seconds_per_frame=2):
-    base64Frames = []
-    base_video_path, _ = os.path.splitext(video_path)
-    video = cv2.VideoCapture(video_path)
-    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = video.get(cv2.CAP_PROP_FPS)
-    frames_to_skip = int(fps * seconds_per_frame)
-    curr_frame = 0
-    while curr_frame < total_frames - 1:
-        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
-        success, frame = video.read()
-        if not success:
-            break
-        _, buffer = cv2.imencode(".jpg", frame)
-        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
-        curr_frame += frames_to_skip
-    video.release()
-    audio_path = f"{base_video_path}.mp3"
-    try:
-        clip = VideoFileClip(video_path)
-        if clip.audio:
-            clip.audio.write_audiofile(audio_path, bitrate="32k")
-            clip.audio.close()
-        clip.close()
-    except Exception as e:
-        st.warning(f"No audio track found or error: {str(e)}")
-        audio_path = None
-    return base64Frames, audio_path
-def process_audio_and_video(video_input):
-    if video_input:
-        video_path = save_video(video_input)
-        with st.spinner("Extracting frames and audio..."):
-            base64Frames, audio_path = process_video(video_path)
-        if audio_path:
-            with st.spinner("Transcribing video audio..."):
-                try:
-                    with open(audio_path, "rb") as audio_file:
-                        transcript = client.audio.transcriptions.create(
-                            model="whisper-1",
-                            file=audio_file
-                        ).text
-                    with st.chat_message("user"):
-                        st.markdown(f"Video Transcription: {transcript}")
-                    with st.chat_message("assistant"):
-                        response = client.chat.completions.create(
-                            model=st.session_state["openai_model"],
-                            messages=[
-                                {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
-                                {"role": "user", "content": [
-                                    "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, base64Frames),
-                                    {"type": "text", "text": f"Transcription: {transcript}"}
-                                ]}
-                            ]
-                        )
-                        result = response.choices[0].message.content
-                        st.markdown(result)
-                        filename = generate_filename(transcript, "md")
-                        create_and_save_file(result, "md", "Video summary", should_save=should_save)
-                except openai.BadRequestError as e:
-                    st.error(f"Video audio processing error: {str(e)}")
-        else:
-            st.warning("No audio to transcribe.")
-# ArXiv Search
-def search_arxiv(query):
-    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
-    response = client.predict(
-        message=query,
-        llm_results_use=5,
-        database_choice="Semantic Search",
-        llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
-        api_name="/update_with_rag_md"
-    )
-    result = response[0] + response[1]
-    filename = generate_filename(query, "md")
-    create_and_save_file(result, "md", query, should_save=should_save)
-    st.session_state.messages.append({"role": "assistant", "content": result})
-    return result
-# RAG PDF Gallery
-def upload_pdf_files_to_vector_store(vector_store_id, pdf_files):
-    stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}
-    def upload_single_pdf(file_path):
-        file_name = os.path.basename(file_path)
         try:
-            with open(file_path, "rb") as f:
-                file_response = client.files.create(file=f, purpose="assistants")
-            client.vector_stores.files.create(vector_store_id=vector_store_id, file_id=file_response.id)
-            return {"file": file_name, "status": "success"}
-        except Exception as e:
-            return {"file": file_name, "status": "failed", "error": str(e)}
-    with ThreadPoolExecutor(max_workers=5) as executor:
-        futures = [executor.submit(upload_single_pdf, f) for f in pdf_files]
-        for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
-            result = future.result()
-            if result["status"] == "success":
-                stats["successful_uploads"] += 1
-            else:
-                stats["failed_uploads"] += 1
-                stats["errors"].append(result)
-    return stats
-def create_vector_store(store_name):
-    vector_store = client.vector_stores.create(name=store_name)
-    return {"id": vector_store.id, "name": vector_store.name, "created_at": vector_store.created_at, "file_count": vector_store.file_counts.completed}
-def generate_questions(pdf_path):
-    text = ""
-    with open(pdf_path, "rb") as f:
-        pdf = PdfReader(f)
-        for page in pdf.pages:
-            text += page.extract_text() or ""
-    prompt = f"Generate a 10-question quiz with answers based only on this document. Format as markdown with numbered questions and answers:\n{text[:2000]}\n\n"
-    response = client.chat.completions.create(
-        model="gpt-4o-2024-05-13",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-def process_rag_query(query, vector_store_id):
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4o-2024-05-13",
-            messages=[{"role": "user", "content": query}],
-            tools=[{
-                "type": "file_search",
-                "file_search": {
-                    "vector_store_ids": [vector_store_id]
-                }
-            }],
-            tool_choice="auto"
-        )
-        tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else []
-        return response.choices[0].message.content, tool_calls
-    except openai.BadRequestError as e:
-        st.error(f"RAG query error: {str(e)}")
-        return None, []
-def evaluate_rag(vector_store_id, questions_dict):
-    k = 5
-    total_queries = len(questions_dict) * 10  # 10 questions per PDF
-    correct_retrievals_at_k = 0
-    reciprocal_ranks = []
-    average_precisions = []
-    for filename, quiz in questions_dict.items():
-        questions = re.findall(r"\d+\.\s(.*?)\n\s*Answer:\s(.*?)\n", quiz, re.DOTALL)
-        for question, _ in questions:
-            expected_file = filename
-            response, tool_calls = process_rag_query(question, vector_store_id)
-            if not tool_calls:
-                continue
-            retrieved_files = [call.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.type][:k]
-            if expected_file in retrieved_files:
-                rank = retrieved_files.index(expected_file) + 1
-                correct_retrievals_at_k += 1
-                reciprocal_ranks.append(1 / rank)
-                precisions = [1 if f == expected_file else 0 for f in retrieved_files[:rank]]
-                average_precisions.append(sum(precisions) / len(precisions))
             else:
-                reciprocal_ranks.append(0)
-                average_precisions.append(0)
-    recall_at_k = correct_retrievals_at_k / total_queries if total_queries else 0
-    mrr = sum(reciprocal_ranks) / total_queries if total_queries else 0
-    map_score = sum(average_precisions) / total_queries if total_queries else 0
-    return {"recall@k": recall_at_k, "mrr": mrr, "map": map_score, "k": k}
-def rag_pdf_gallery():
-    st.subheader("RAG PDF Gallery")
-    pdf_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
-    if pdf_files:
-        pdf_paths = [save_video(f) for f in pdf_files]  # Reuse save_video for simplicity
-        with st.spinner("Creating vector store..."):
-            vector_store_details = create_vector_store("PDF_Gallery_Store")
-            stats = upload_pdf_files_to_vector_store(vector_store_details["id"], pdf_paths)
-            st.json(stats)
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            if st.button("📝 Quiz"):
-                st.session_state["rag_prompt"] = "Generate a 10-question quiz with answers based only on this document."
-        with col2:
-            if st.button("📑 Summary"):
-                st.session_state["rag_prompt"] = "Summarize this per page and output as markdown outline with emojis and numbered outline with multiple levels summarizing everything unique per page in method steps or fact steps."
-        with col3:
-            if st.button("🔍 Key Facts"):
-                st.session_state["rag_prompt"] = "Extract 10 key facts from this document in markdown with emojis."
-        with st.spinner("Generating questions..."):
-            questions_dict = {os.path.basename(p): generate_questions(p) for p in pdf_paths}
-            st.markdown("### Generated Quiz")
-            for filename, quiz in questions_dict.items():
-                st.markdown(f"#### {filename}")
-                st.markdown(quiz)
-        query = st.text_input("Ask a question about the PDFs:", value=st.session_state.get("rag_prompt", ""))
-        if query and st.button("Submit RAG Query"):
-            with st.spinner("Processing RAG query..."):
-                response, tool_calls = process_rag_query(query, vector_store_details["id"])
-                if response:
-                    st.markdown(response)
-                    st.write("Retrieved chunks:")
-                    for call in tool_calls:
-                        if "file_search" in call.type:
-                            st.json(call.arguments)
-            st.rerun()
-        if st.button("Evaluate RAG Performance"):
-            with st.spinner("Evaluating..."):
-                metrics = evaluate_rag(vector_store_details["id"], questions_dict)
-                st.json(metrics)
-# File Sidebar
-def FileSidebar():
-    st.sidebar.title("File Operations")
-    default_types = [".md", ".png", ".pdf"]
-    file_types = st.sidebar.multiselect("Filter by type", [".md", ".wav", ".png", ".mp4", ".mp3", ".pdf"], default=default_types)
-    all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1] in file_types and len(os.path.splitext(f)[0]) >= 10]
-    all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
-    if st.sidebar.button("🗑 Delete All Filtered"):
-        for file in all_files:
-            os.remove(file)
-        st.rerun()
-    if st.sidebar.button("⬇️ Download All Filtered"):
-        zip_file = create_zip_of_files(all_files)
-        st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
-    for file in all_files:
-        ext = os.path.splitext(file)[1].lower()
-        col1, col2, col3, col4, col5 = st.sidebar.columns([1, 6, 1, 1, 1])
-        colFollowUp = ""  # Flag to trigger main-area display
-        with col1:  # View
-            icon = "📜" if ext == ".md" else "📄" if ext == ".pdf" else "🖼️" if ext in [".png", ".jpg", ".jpeg"] else "🎵" if ext in [".wav", ".mp3"] else "🎥" if ext == ".mp4" else "📎"
-            if st.button(icon, key=f"view_{file}"):
-                colFollowUp = "view_" + ext
-                with open(file, "rb") as f:
-                    content = f.read()
-        with col2:  # Download link
-            st.markdown(get_table_download_link(file), unsafe_allow_html=True)
-        with col3:  # Open
-            if st.button("📂", key=f"open_{file}"):
-                colFollowUp = "open_" + ext
-                with open(file, "rb") as f:
-                    content = f.read()
-        with col4:  # Run
-            if st.button("▶️", key=f"run_{file}"):
-                if ext == ".md":
-                    colFollowUp = "run_" + ext
-                    with open(file, "rb") as f:
-                        content = f.read()
-        with col5:  # Delete
-            if st.button("🗑", key=f"delete_{file}"):
-                os.remove(file)
                 st.rerun()
-        # Display in main area based on colFollowUp
-        if colFollowUp.startswith("view_"):
-            if ext == ".md":
-                st.markdown(content.decode("utf-8"))
-                SpeechSynthesis(content.decode("utf-8"))
-            elif ext == ".pdf":
-                st.download_button("Download PDF", content, file, "application/pdf")
-                st.write("PDF Viewer not natively supported; download to view.")
-            elif ext in [".png", ".jpg", ".jpeg"]:
-                st.image(content, use_column_width=True)
-            elif ext in [".wav", ".mp3"]:
-                st.audio(content, format=f"audio/{ext[1:]}")
-            elif ext == ".mp4":
-                st.video(content, format="video/mp4")
-        elif colFollowUp.startswith("open_"):
-            if ext == ".md":
-                st.text_area(f"Editing {file}", value=content.decode("utf-8"), height=300, key=f"edit_{file}")
-            elif ext == ".pdf":
-                st.download_button("Download PDF to Edit", content, file, "application/pdf")
-                st.write("PDF editing not supported in-app; download to edit externally.")
-            elif ext in [".png", ".jpg", ".jpeg"]:
-                st.image(content, use_column_width=True, caption=f"Viewing {file}")
-            elif ext in [".wav", ".mp3"]:
-                st.audio(content, format=f"audio/{ext[1:]}")
-            elif ext == ".mp4":
-                st.video(content, format="video/mp4")
-        elif colFollowUp.startswith("run_"):
-            if ext == ".md":
-                process_text(content.decode("utf-8"))
-def create_zip_of_files(files):
-    zip_name = "Files.zip"
-    with zipfile.ZipFile(zip_name, 'w') as zipf:
-        for file in files:
-            zipf.write(file)
-    return zip_name
-def get_zip_download_link(zip_file):
-    with open(zip_file, 'rb') as f:
-        data = f.read()
-    b64 = base64.b64encode(data).decode()
-    return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
-@st.cache_resource
-def get_table_download_link(file_path):
-    with open(file_path, 'rb') as f:
-        data = f.read()
-    b64 = base64.b64encode(data).decode()
-    file_name = os.path.basename(file_path)
-    ext = os.path.splitext(file_name)[1].lower()
-    mime_type = "text/markdown" if ext == ".md" else "application/pdf" if ext == ".pdf" else "image/png" if ext in [".png", ".jpg", ".jpeg"] else "audio/wav" if ext == ".wav" else "audio/mpeg" if ext == ".mp3" else "video/mp4" if ext == ".mp4" else "application/octet-stream"
-    return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>'
-# Main Function
-def main():
-    st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, Video & RAG")
-    model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"]
-    st.session_state["openai_model"] = st.selectbox("Select GPT Model", model_options, index=0)
-    option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
-    if option == "Text":
-        default_text = "Create a summary of PDF py libraries and usage in py with emojis in markdown. Maybe a buckeyball feature rating comparing them against each other in markdown emoji outline or tables."
-        col1, col2 = st.columns([1, 5])
-        with col1:
-            if st.button("📝 MD", key="md_button"):
-                st.session_state["text_input"] = default_text
-                with st.spinner("Processing..."):
-                    process_text(default_text)
-                st.rerun()
-        with col2:
-            text_input = st.text_input("Enter your text:", value=st.session_state.get("text_input", ""), key="text_input_field")
-        if text_input and st.button("Submit Text"):
-            with st.spinner("Processing..."):
-                process_text(text_input)
-            st.rerun()
-    elif option == "Image":
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("📝 Describe"):
-                st.session_state["image_prompt"] = "Describe this image and list ten facts in a markdown outline with emojis."
-        with col2:
-            if st.button("🔍 OCR"):
-                st.session_state["image_prompt"] = "Show electronic text of text in the image."
-        text_input = st.text_input("Image Prompt:", value=st.session_state.get("image_prompt", "Describe this image and list ten facts in a markdown outline with emojis."))
-        image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
-        if image_input and text_input and st.button("Submit Image"):
-            if image_input.size > 200 * 1024 * 1024:
-                st.error("Image exceeds 200MB limit.")
-            else:
-                with st.spinner("Processing..."):
-                    image_response = process_image(image_input, text_input)
-                    with st.chat_message("ai", avatar="🦖"):
-                        st.markdown(image_response)
-                st.rerun()
-    elif option == "Audio":
-        text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.")
-        audio_input = st.file_uploader("Upload an audio file (max 200MB)", type=["mp3", "wav", "flac", "m4a"], accept_multiple_files=False)
-        audio_bytes = audio_recorder()
-        if audio_bytes and text_input and st.button("Submit Audio Recording"):
-            with open("recorded_audio.wav", "wb") as f:
-                f.write(audio_bytes)
-            with st.spinner("Processing..."):
-                process_audio(audio_bytes, text_input)
             st.rerun()
-        elif audio_input and text_input and st.button("Submit Audio File"):
-            with st.spinner("Processing..."):
-                process_audio(audio_input, text_input)
             st.rerun()
-    elif option == "Video":
-        text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.")
-        video_input = st.file_uploader("Upload a video file (max 200MB)", type=["mp4"], accept_multiple_files=False)
-        if video_input and text_input and st.button("Submit Video"):
-            if video_input.size > 200 * 1024 * 1024:
-                st.error("Video exceeds 200MB limit.")
-            else:
-                with st.spinner("Processing..."):
-                    process_audio_and_video(video_input)
-                st.rerun()
-    elif option == "ArXiv Search":
-        query = st.text_input("AI Search ArXiv Scholarly Articles:")
-        if query and st.button("Search ArXiv"):
-            with st.spinner("Searching ArXiv..."):
-                result = search_arxiv(query)
-                st.markdown(result)
             st.rerun()
-    elif option == "RAG PDF Gallery":
-        rag_pdf_gallery()
-# Chat Display and Input
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
-    with st.spinner("Processing..."):
-        process_text(prompt)
-    st.rerun()
-FileSidebar()
-main()

 import math
 import os
 import pytz
 import re
 import time
 import zipfile
+import asyncio
+import streamlit as st
+import streamlit.components.v1 as components
 from concurrent.futures import ThreadPoolExecutor
 from tqdm import tqdm
 import concurrent
+# Foundational Imports
 from audio_recorder_streamlit import audio_recorder
 from bs4 import BeautifulSoup
 from collections import deque
 from dotenv import load_dotenv
 from gradio_client import Client
 from io import BytesIO
+from moviepy.editor import VideoFileClip
 from PIL import Image
 from PyPDF2 import PdfReader
+# OpenAI & Data Handling
 import openai
 from openai import OpenAI
 import pandas as pd
+# Load environment variables
+load_dotenv()
+# --- Core Classes for Functionality ---
+class PerformanceTracker:
+    """Tracks and displays the performance of executed tasks."""
+    def track(self, model_name_provider):
+        # ⏱️ Times our functions and brags about how fast they are.
+        def decorator(func):
+            def wrapper(*args, **kwargs):
+                start_time = time.time()
+                # Execute the function in a thread pool for non-blocking UI
+                with ThreadPoolExecutor() as executor:
+                    future = executor.submit(func, *args, **kwargs)
+                    result = future.result() # Wait for the function to complete
+                end_time = time.time()
+                duration = end_time - start_time
+                model_used = model_name_provider() if callable(model_name_provider) else model_name_provider
+                st.success(f"✅ **Execution Complete!**")
+                st.info(f"Model: `{model_used}` | Runtime: `{duration:.2f} seconds`")
+                return result
+            return wrapper
+        return decorator
+class FileHandler:
+    """Manages all file system operations like naming, saving, and zipping."""
+    def __init__(self, should_save=True):
+        # 🗂️ I'm the librarian for all your digital stuff.
+        self.should_save = should_save
+        self.central_tz = pytz.timezone('US/Central')
+    def generate_filename(self, prompt, file_type, original_name=None):
+        # 🏷️ Slapping a unique, SFW name on your file so you can find it later.
+        safe_date_time = datetime.now(self.central_tz).strftime("%m%d_%H%M")
+        safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50]
+        file_stem = f"{safe_date_time}_{safe_prompt}"
+        if original_name:
+            base_name = os.path.splitext(original_name)[0]
+            file_stem = f"{file_stem}_{base_name}"
+        return f"{file_stem[:100]}.{file_type}"
+    def save_file(self, content, filename, prompt=None):
+        # 💾 Saving your masterpiece before you accidentally delete it.
+        if not self.should_save:
+            return None
+        with open(filename, "w", encoding="utf-8") as f:
+            if prompt:
+                f.write(prompt + "\n\n")
+            f.write(content)
+        return filename
+    def save_uploaded_file(self, uploaded_file):
+        # 📥 Taking your uploaded file and tucking it safely on the server.
+        path = os.path.join(uploaded_file.name)
+        with open(path, "wb") as f:
+            f.write(uploaded_file.getvalue())
+        return path
+    def create_zip_archive(self, files_to_zip):
+        # 🤐 Zipping up your files nice and tight.
+        zip_path = "Filtered_Files.zip"
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            for file in files_to_zip:
+                zipf.write(file)
+        return zip_path
+    @st.cache_data
+    def get_base64_download_link(_self, file_path, link_text, mime_type):
+        # 🔗 Creating a magical link to download your file.
+        with open(file_path, 'rb') as f:
+            data = f.read()
+        b64 = base64.b64encode(data).decode()
+        return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
+class OpenAIProcessor:
+    """Handles all interactions with the OpenAI API."""
+    def __init__(self, api_key, org_id, model):
+        # 🤖 I'm the brainiac talking to the OpenAI overlords.
+        self.client = OpenAI(api_key=api_key, organization=org_id)
+        self.model = model
+    def execute_text_completion(self, messages):
+        # ✍️ Turning your prompts into pure AI gold.
+        completion = self.client.chat.completions.create(
+            model=self.model,
+            messages=[{"role": m["role"], "content": m["content"]} for m in messages],
+            stream=False
+        )
+        return completion.choices[0].message.content
+    def execute_image_completion(self, prompt, image_bytes):
+        # 🖼️ Analyzing your pics with my digital eyeballs.
+        base64_image = base64.b64encode(image_bytes).decode("utf-8")
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
+                {"role": "user", "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
+                ]}
+            ],
+            temperature=0.0
+        )
+        return response.choices[0].message.content
+    def execute_video_completion(self, frames, transcript):
+        # 🎬 Watching your video and giving you the summary, so you don't have to.
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
+                {"role": "user", "content": [
+                    "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, frames),
+                    {"type": "text", "text": f"Transcription: {transcript}"}
+                ]}
+            ]
+        )
+        return response.choices[0].message.content
+    def transcribe_audio(self, audio_bytes):
+        # 🎤 I'm all ears... turning your sounds into words.
+        try:
+            transcription = self.client.audio.transcriptions.create(
+                model="whisper-1",
+                file=BytesIO(audio_bytes)
             )
+            return transcription.text
+        except openai.BadRequestError as e:
+            st.error(f"Audio processing error: {e}")
+            return None
+class MediaProcessor:
+    """Handles processing of media files like video and audio."""
+    def extract_video_components(self, video_path, seconds_per_frame=2):
+        # ✂️ Chopping up your video into frames and snatching the audio.
+        base64Frames = []
+        video = cv2.VideoCapture(video_path)
+        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = video.get(cv2.CAP_PROP_FPS)
+        frames_to_skip = int(fps * seconds_per_frame)
+        curr_frame = 0
+        while curr_frame < total_frames - 1:
+            video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
+            success, frame = video.read()
+            if not success: break
+            _, buffer = cv2.imencode(".jpg", frame)
+            base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
+            curr_frame += frames_to_skip
+        video.release()
+        audio_path = f"{os.path.splitext(video_path)[0]}.mp3"
         try:
+            clip = VideoFileClip(video_path)
+            if clip.audio:
+                clip.audio.write_audiofile(audio_path, bitrate="32k")
             else:
+                audio_path = None
+        except Exception:
+            audio_path = None
+        return base64Frames, audio_path
+class RAGManager:
+    """Manages Retrieval-Augmented Generation processes."""
+    def __init__(self, openai_client):
+        # 📚 Building a library and then acing the open-book test.
+        self.client = openai_client
+    def create_vector_store(self, name):
+        # 🗄️ Creating a shiny new digital filing cabinet.
+        vector_store = self.client.vector_stores.create(name=name)
+        return vector_store.id
+    # ... Other RAG methods would go here ...
+class ExternalAPIHandler:
+    """Handles calls to external APIs like ArXiv."""
+    def search_arxiv(self, query):
+        # 👨‍🔬 Pestering the digital librarians at ArXiv for juicy papers.
+        client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+        response = client.predict(
+            message=query,
+            llm_results_use=5,
+            database_choice="Semantic Search",
+            llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
+            api_name="/update_with_rag_md"
+        )
+        return response[0] + response[1]
+# --- Streamlit UI Class ---
+class StreamlitUI:
+    """Main class to build and run the Streamlit user interface."""
+    def __init__(self):
+        # 🎨 I'm the artist painting your beautiful web app.
+        self.setup_page()
+        self.initialize_state()
+        # Initialize helper classes
+        self.file_handler = FileHandler(should_save=st.session_state.should_save)
+        self.openai_processor = OpenAIProcessor(
+            api_key=os.getenv('OPENAI_API_KEY'),
+            org_id=os.getenv('OPENAI_ORG_ID'),
+            model=st.session_state.openai_model
+        )
+        self.media_processor = MediaProcessor()
+        self.external_api_handler = ExternalAPIHandler()
+        # Initialize performance tracker
+        global performance_tracker
+        performance_tracker = PerformanceTracker()
+    def setup_page(self):
+        # ✨ Setting the stage for our amazing app.
+        st.set_page_config(
+            page_title="🔬🧠ScienceBrain.AI",
+            page_icon=Image.open("icons.ico"),
+            layout="wide",
+            initial_sidebar_state="auto",
+            menu_items={
+                'Get Help': 'https://huggingface.co/awacke1',
+                'Report a bug': 'https://huggingface.co/spaces/awacke1',
+                'About': "🔬🧠ScienceBrain.AI"
+            }
+        )
+    def initialize_state(self):
+        # 📝 Keeping notes so we don't forget stuff between clicks.
+        if "openai_model" not in st.session_state:
+            st.session_state.openai_model = "gpt-4o-2024-05-13"
+        if "messages" not in st.session_state:
+            st.session_state.messages = []
+    def display_sidebar(self):
+        # 👈 Everything you see on the left? That's me.
+        st.sidebar.title("Configuration & Files")
+        st.session_state.should_save = st.sidebar.checkbox("💾 Save Session", value=True)
+        if st.sidebar.button("🗑️ Clear Chat History"):
+            st.session_state.messages = []
+            st.rerun()
+        st.sidebar.markdown("---")
+        # File management logic here...
+    def display_main_interface(self):
+        # 🖥️ This is the main event, the star of the show!
+        st.markdown("##### GPT-4o Omni: Text, Audio, Image, Video & RAG")
+        model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"]
+        st.session_state.openai_model = st.selectbox(
+            "Select OpenAI Model", model_options, index=model_options.index(st.session_state.openai_model)
+        )
+        input_type = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
+        if input_type == "Text":
+            self.handle_text_input()
+        elif input_type == "Image":
+            self.handle_image_input()
+        elif input_type == "Video":
+            self.handle_video_input()
+        elif input_type == "ArXiv Search":
+            self.handle_arxiv_search()
+        # ... other handlers
+    def handle_text_input(self):
+        # 💬 You talk, I listen (and then make the AI talk back).
+        prompt = st.text_input("Enter your text prompt:", key="text_prompt")
+        if st.button("Submit Text", key="submit_text"):
+            if prompt:
+                st.session_state.messages.append({"role": "user", "content": prompt})
+                with st.chat_message("user"):
+                    st.markdown(prompt)
+                with st.chat_message("assistant"):
+                    with st.spinner("Thinking..."):
+                        # Use the performance tracker decorator
+                        @performance_tracker.track(lambda: self.openai_processor.model)
+                        def run_completion():
+                            return self.openai_processor.execute_text_completion(st.session_state.messages)
+                        response = run_completion()
+                        st.markdown(response)
+                        st.session_state.messages.append({"role": "assistant", "content": response})
+                        filename = self.file_handler.generate_filename(prompt, "md")
+                        self.file_handler.save_file(response, filename, prompt=prompt)
                 st.rerun()
+    def handle_image_input(self):
+        # 📸 Say cheese! Let's see what the AI thinks of your photo.
+        prompt = st.text_input("Enter a prompt for the image:", value="Describe this image in detail.")
+        uploaded_image = st.file_uploader("Upload an image:", type=["png", "jpg", "jpeg"])
+        if st.button("Submit Image") and uploaded_image and prompt:
+            with st.chat_message("user"):
+                st.image(uploaded_image, width=250)
+                st.markdown(prompt)
+            with st.chat_message("assistant"):
+                with st.spinner("Analyzing image..."):
+                    image_bytes = uploaded_image.getvalue()
+                    @performance_tracker.track(lambda: self.openai_processor.model)
+                    def run_image_analysis():
+                        return self.openai_processor.execute_image_completion(prompt, image_bytes)
+                    response = run_image_analysis()
+                    st.markdown(response)
+                    filename = self.file_handler.generate_filename(prompt, "md", original_name=uploaded_image.name)
+                    self.file_handler.save_file(response, filename, prompt=prompt)
             st.rerun()
+    def handle_video_input(self):
+        # 📼 Roll the tape! Time to process that video.
+        prompt = st.text_input("Enter a prompt for the video:", value="Summarize the key events in this video.")
+        uploaded_video = st.file_uploader("Upload a video:", type=["mp4", "mov"])
+        if st.button("Submit Video") and uploaded_video and prompt:
+            with st.chat_message("user"):
+                st.markdown(f"Analyzing video: `{uploaded_video.name}` with prompt: `{prompt}`")
+            with st.chat_message("assistant"):
+                with st.spinner("Processing video... this may take a moment."):
+                    video_path = self.file_handler.save_uploaded_file(uploaded_video)
+                    @performance_tracker.track(lambda: self.openai_processor.model)
+                    def run_video_analysis():
+                        frames, audio_path = self.media_processor.extract_video_components(video_path)
+                        transcript = "No audio found."
+                        if audio_path:
+                            with open(audio_path, "rb") as af:
+                                transcript = self.openai_processor.transcribe_audio(af.read())
+                        return self.openai_processor.execute_video_completion(frames, transcript)
+                    response = run_video_analysis()
+                    st.markdown(response)
+                    filename = self.file_handler.generate_filename(prompt, "md", original_name=uploaded_video.name)
+                    self.file_handler.save_file(response, filename, prompt=prompt)
             st.rerun()
+    def handle_arxiv_search(self):
+        # 🔬 Diving deep into the archives of science!
+        query = st.text_input("Search ArXiv for scholarly articles:")
+        if st.button("Search ArXiv") and query:
+            with st.chat_message("user"):
+                st.markdown(f"ArXiv Search: `{query}`")
+            with st.chat_message("assistant"):
+                with st.spinner("Searching ArXiv..."):
+                    @performance_tracker.track("Mistral-7B-Instruct-v0.2") # Model is fixed for this endpoint
+                    def run_arxiv_search():
+                        return self.external_api_handler.search_arxiv(query)
+                    response = run_arxiv_search()
+                    st.markdown(response)
+                    st.session_state.messages.append({"role": "assistant", "content": response})
+                    filename = self.file_handler.generate_filename(query, "md")
+                    self.file_handler.save_file(response, filename, prompt=query)
             st.rerun()
+    def display_chat_history(self):
+        # 📜 Let's review what we've talked about so far.
+        for message in st.session_state.messages:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+    def run(self):
+        # ▶️ Lights, camera, action! Let's get this show on the road.
+        self.display_sidebar()
+        self.display_chat_history()
+        self.display_main_interface()
+# --- Main Execution ---
+if __name__ == "__main__":
+    app = StreamlitUI()
+    app.run()