|
import base64 |
|
import cv2 |
|
import glob |
|
import json |
|
import math |
|
import os |
|
import pytz |
|
import random |
|
import re |
|
import requests |
|
import streamlit as st |
|
import streamlit.components.v1 as components |
|
import textract |
|
import time |
|
import zipfile |
|
from concurrent.futures import ThreadPoolExecutor |
|
from tqdm import tqdm |
|
import concurrent |
|
|
|
from audio_recorder_streamlit import audio_recorder |
|
from bs4 import BeautifulSoup |
|
from collections import deque |
|
from datetime import datetime |
|
from dotenv import load_dotenv |
|
from gradio_client import Client |
|
from io import BytesIO |
|
from moviepy import VideoFileClip |
|
from PIL import Image |
|
from PyPDF2 import PdfReader |
|
from templates import bot_template, css, user_template |
|
from urllib.parse import quote |
|
from xml.etree import ElementTree as ET |
|
|
|
import openai |
|
from openai import OpenAI |
|
import pandas as pd |
|
|
|
|
|
Site_Name = 'Scholarly-Article-Document-Search-With-Memory' |
|
title = "🔬🧠ScienceBrain.AI" |
|
helpURL = 'https://huggingface.co/awacke1' |
|
bugURL = 'https://huggingface.co/spaces/awacke1' |
|
icons = Image.open("icons.ico") |
|
st.set_page_config( |
|
page_title=title, |
|
page_icon=icons, |
|
layout="wide", |
|
initial_sidebar_state="auto", |
|
menu_items={'Get Help': helpURL, 'Report a bug': bugURL, 'About': title} |
|
) |
|
|
|
|
|
API_KEY = os.getenv('API_KEY') |
|
HF_KEY = os.getenv('HF_KEY') |
|
headers = {"Authorization": f"Bearer {HF_KEY}", "Content-Type": "application/json"} |
|
key = os.getenv('OPENAI_API_KEY') |
|
client = OpenAI(api_key=key, organization=os.getenv('OPENAI_ORG_ID')) |
|
MODEL = "gpt-4o-2024-05-13" |
|
if "openai_model" not in st.session_state: |
|
st.session_state["openai_model"] = MODEL |
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
if st.button("Clear Session"): |
|
st.session_state.messages = [] |
|
|
|
|
|
should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.") |
|
|
|
|
|
@st.cache_resource |
|
def SpeechSynthesis(result): |
|
documentHTML5 = ''' |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<title>Read It Aloud</title> |
|
<script type="text/javascript"> |
|
function readAloud() { |
|
const text = document.getElementById("textArea").value; |
|
const speech = new SpeechSynthesisUtterance(text); |
|
window.speechSynthesis.speak(speech); |
|
} |
|
</script> |
|
</head> |
|
<body> |
|
<h1>🔊 Read It Aloud</h1> |
|
<textarea id="textArea" rows="10" cols="80"> |
|
''' |
|
documentHTML5 += result + ''' |
|
</textarea> |
|
<br> |
|
<button onclick="readAloud()">🔊 Read Aloud</button> |
|
</body> |
|
</html> |
|
''' |
|
components.html(documentHTML5, width=1280, height=300) |
|
|
|
|
|
def generate_filename(prompt, file_type, original_name=None): |
|
central = pytz.timezone('US/Central') |
|
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") |
|
safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50] |
|
if original_name and file_type == "md": |
|
base_name = os.path.splitext(original_name)[0] |
|
file_stem = f"{safe_date_time}_{safe_prompt}_{base_name}"[:100] |
|
return f"{file_stem}.{file_type}" |
|
file_stem = f"{safe_date_time}_{safe_prompt}"[:100] |
|
return f"{file_stem}.{file_type}" |
|
|
|
def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True): |
|
if not should_save: |
|
return None |
|
filename = generate_filename(prompt, file_type, original_name) |
|
with open(filename, "w", encoding="utf-8") as f: |
|
f.write(content if not prompt else prompt + "\n\n" + content) |
|
return filename |
|
|
|
|
|
def process_text(text_input): |
|
if text_input: |
|
st.session_state.messages.append({"role": "user", "content": text_input}) |
|
with st.chat_message("user"): |
|
st.markdown(text_input) |
|
with st.chat_message("assistant"): |
|
completion = client.chat.completions.create( |
|
model=st.session_state["openai_model"], |
|
messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages], |
|
stream=False |
|
) |
|
response = completion.choices[0].message.content |
|
st.markdown(response) |
|
filename = generate_filename(text_input, "md") |
|
create_and_save_file(response, "md", text_input, should_save=should_save) |
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
|
|
|
|
def process_image(image_input, user_prompt): |
|
original_name = image_input.name |
|
image_bytes = image_input.read() |
|
with open(original_name, "wb") as f: |
|
f.write(image_bytes) |
|
base64_image = base64.b64encode(image_bytes).decode("utf-8") |
|
response = client.chat.completions.create( |
|
model=st.session_state["openai_model"], |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant that responds in Markdown."}, |
|
{"role": "user", "content": [ |
|
{"type": "text", "text": user_prompt}, |
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}} |
|
]} |
|
], |
|
temperature=0.0 |
|
) |
|
image_response = response.choices[0].message.content |
|
filename = generate_filename(user_prompt, "md", original_name) |
|
create_and_save_file(image_response, "md", user_prompt, original_name, should_save=should_save) |
|
return image_response |
|
|
|
|
|
def process_audio(audio_input, text_input=''): |
|
if audio_input: |
|
audio_bytes = audio_input if isinstance(audio_input, bytes) else audio_input.read() |
|
supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm'] |
|
file_ext = "wav" if isinstance(audio_input, bytes) else os.path.splitext(audio_input.name)[1][1:].lower() |
|
if file_ext not in supported_formats: |
|
st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}") |
|
return |
|
if len(audio_bytes) > 200 * 1024 * 1024: |
|
st.error("File exceeds 200MB limit.") |
|
return |
|
with st.spinner("Transcribing audio..."): |
|
try: |
|
transcription = client.audio.transcriptions.create( |
|
model="whisper-1", |
|
file=BytesIO(audio_bytes) |
|
).text |
|
st.session_state.messages.append({"role": "user", "content": transcription}) |
|
with st.chat_message("user"): |
|
st.markdown(transcription) |
|
with st.chat_message("assistant"): |
|
completion = client.chat.completions.create( |
|
model=st.session_state["openai_model"], |
|
messages=[{"role": "user", "content": text_input + "\n\nTranscription: " + transcription}] |
|
) |
|
response = completion.choices[0].message.content |
|
st.markdown(response) |
|
filename = generate_filename(transcription, "md") |
|
create_and_save_file(response, "md", text_input, should_save=should_save) |
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
except openai.BadRequestError as e: |
|
st.error(f"Audio processing error: {str(e)}") |
|
|
|
|
|
def save_video(video_input): |
|
with open(video_input.name, "wb") as f: |
|
f.write(video_input.read()) |
|
return video_input.name |
|
|
|
def process_video(video_path, seconds_per_frame=2): |
|
base64Frames = [] |
|
base_video_path, _ = os.path.splitext(video_path) |
|
video = cv2.VideoCapture(video_path) |
|
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
fps = video.get(cv2.CAP_PROP_FPS) |
|
frames_to_skip = int(fps * seconds_per_frame) |
|
curr_frame = 0 |
|
while curr_frame < total_frames - 1: |
|
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame) |
|
success, frame = video.read() |
|
if not success: |
|
break |
|
_, buffer = cv2.imencode(".jpg", frame) |
|
base64Frames.append(base64.b64encode(buffer).decode("utf-8")) |
|
curr_frame += frames_to_skip |
|
video.release() |
|
audio_path = f"{base_video_path}.mp3" |
|
try: |
|
clip = VideoFileClip(video_path) |
|
if clip.audio: |
|
clip.audio.write_audiofile(audio_path, bitrate="32k") |
|
clip.audio.close() |
|
clip.close() |
|
except Exception as e: |
|
st.warning(f"No audio track found or error: {str(e)}") |
|
audio_path = None |
|
return base64Frames, audio_path |
|
|
|
def process_audio_and_video(video_input): |
|
if video_input: |
|
video_path = save_video(video_input) |
|
with st.spinner("Extracting frames and audio..."): |
|
base64Frames, audio_path = process_video(video_path) |
|
if audio_path: |
|
with st.spinner("Transcribing video audio..."): |
|
try: |
|
with open(audio_path, "rb") as audio_file: |
|
transcript = client.audio.transcriptions.create( |
|
model="whisper-1", |
|
file=audio_file |
|
).text |
|
with st.chat_message("user"): |
|
st.markdown(f"Video Transcription: {transcript}") |
|
with st.chat_message("assistant"): |
|
response = client.chat.completions.create( |
|
model=st.session_state["openai_model"], |
|
messages=[ |
|
{"role": "system", "content": "Summarize the video and its transcript in Markdown."}, |
|
{"role": "user", "content": [ |
|
"Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, base64Frames), |
|
{"type": "text", "text": f"Transcription: {transcript}"} |
|
]} |
|
] |
|
) |
|
result = response.choices[0].message.content |
|
st.markdown(result) |
|
filename = generate_filename(transcript, "md") |
|
create_and_save_file(result, "md", "Video summary", should_save=should_save) |
|
except openai.BadRequestError as e: |
|
st.error(f"Video audio processing error: {str(e)}") |
|
else: |
|
st.warning("No audio to transcribe.") |
|
|
|
|
|
def search_arxiv(query): |
|
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") |
|
response = client.predict( |
|
message=query, |
|
llm_results_use=5, |
|
database_choice="Semantic Search", |
|
llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", |
|
api_name="/update_with_rag_md" |
|
) |
|
result = response[0] + response[1] |
|
filename = generate_filename(query, "md") |
|
create_and_save_file(result, "md", query, should_save=should_save) |
|
st.session_state.messages.append({"role": "assistant", "content": result}) |
|
return result |
|
|
|
|
|
def upload_pdf_files_to_vector_store(vector_store_id, pdf_files): |
|
stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []} |
|
def upload_single_pdf(file_path): |
|
file_name = os.path.basename(file_path) |
|
try: |
|
with open(file_path, "rb") as f: |
|
file_response = client.files.create(file=f, purpose="assistants") |
|
client.vector_stores.files.create(vector_store_id=vector_store_id, file_id=file_response.id) |
|
return {"file": file_name, "status": "success"} |
|
except Exception as e: |
|
return {"file": file_name, "status": "failed", "error": str(e)} |
|
with ThreadPoolExecutor(max_workers=5) as executor: |
|
futures = [executor.submit(upload_single_pdf, f) for f in pdf_files] |
|
for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)): |
|
result = future.result() |
|
if result["status"] == "success": |
|
stats["successful_uploads"] += 1 |
|
else: |
|
stats["failed_uploads"] += 1 |
|
stats["errors"].append(result) |
|
return stats |
|
|
|
def create_vector_store(store_name): |
|
vector_store = client.vector_stores.create(name=store_name) |
|
return {"id": vector_store.id, "name": vector_store.name, "created_at": vector_store.created_at, "file_count": vector_store.file_counts.completed} |
|
|
|
def generate_questions(pdf_path): |
|
text = "" |
|
with open(pdf_path, "rb") as f: |
|
pdf = PdfReader(f) |
|
for page in pdf.pages: |
|
text += page.extract_text() or "" |
|
prompt = f"Generate a 10-question quiz with answers based only on this document. Format as markdown with numbered questions and answers:\n{text[:2000]}\n\n" |
|
response = client.chat.completions.create( |
|
model="gpt-4o-2024-05-13", |
|
messages=[{"role": "user", "content": prompt}] |
|
) |
|
return response.choices[0].message.content |
|
|
|
def process_rag_query(query, vector_store_id): |
|
try: |
|
response = client.chat.completions.create( |
|
model="gpt-4o-2024-05-13", |
|
messages=[{"role": "user", "content": query}], |
|
tools=[{ |
|
"type": "file_search", |
|
"file_search": { |
|
"vector_store_ids": [vector_store_id] |
|
} |
|
}], |
|
tool_choice="auto" |
|
) |
|
tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else [] |
|
return response.choices[0].message.content, tool_calls |
|
except openai.BadRequestError as e: |
|
st.error(f"RAG query error: {str(e)}") |
|
return None, [] |
|
|
|
def evaluate_rag(vector_store_id, questions_dict): |
|
k = 5 |
|
total_queries = len(questions_dict) * 10 |
|
correct_retrievals_at_k = 0 |
|
reciprocal_ranks = [] |
|
average_precisions = [] |
|
|
|
for filename, quiz in questions_dict.items(): |
|
questions = re.findall(r"\d+\.\s(.*?)\n\s*Answer:\s(.*?)\n", quiz, re.DOTALL) |
|
for question, _ in questions: |
|
expected_file = filename |
|
response, tool_calls = process_rag_query(question, vector_store_id) |
|
if not tool_calls: |
|
continue |
|
retrieved_files = [call.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.type][:k] |
|
if expected_file in retrieved_files: |
|
rank = retrieved_files.index(expected_file) + 1 |
|
correct_retrievals_at_k += 1 |
|
reciprocal_ranks.append(1 / rank) |
|
precisions = [1 if f == expected_file else 0 for f in retrieved_files[:rank]] |
|
average_precisions.append(sum(precisions) / len(precisions)) |
|
else: |
|
reciprocal_ranks.append(0) |
|
average_precisions.append(0) |
|
|
|
recall_at_k = correct_retrievals_at_k / total_queries if total_queries else 0 |
|
mrr = sum(reciprocal_ranks) / total_queries if total_queries else 0 |
|
map_score = sum(average_precisions) / total_queries if total_queries else 0 |
|
return {"recall@k": recall_at_k, "mrr": mrr, "map": map_score, "k": k} |
|
|
|
def rag_pdf_gallery(): |
|
st.subheader("RAG PDF Gallery") |
|
pdf_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True) |
|
if pdf_files: |
|
pdf_paths = [save_video(f) for f in pdf_files] |
|
with st.spinner("Creating vector store..."): |
|
vector_store_details = create_vector_store("PDF_Gallery_Store") |
|
stats = upload_pdf_files_to_vector_store(vector_store_details["id"], pdf_paths) |
|
st.json(stats) |
|
|
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
if st.button("📝 Quiz"): |
|
st.session_state["rag_prompt"] = "Generate a 10-question quiz with answers based only on this document." |
|
with col2: |
|
if st.button("📑 Summary"): |
|
st.session_state["rag_prompt"] = "Summarize this per page and output as markdown outline with emojis and numbered outline with multiple levels summarizing everything unique per page in method steps or fact steps." |
|
with col3: |
|
if st.button("🔍 Key Facts"): |
|
st.session_state["rag_prompt"] = "Extract 10 key facts from this document in markdown with emojis." |
|
|
|
with st.spinner("Generating questions..."): |
|
questions_dict = {os.path.basename(p): generate_questions(p) for p in pdf_paths} |
|
st.markdown("### Generated Quiz") |
|
for filename, quiz in questions_dict.items(): |
|
st.markdown(f"#### {filename}") |
|
st.markdown(quiz) |
|
|
|
query = st.text_input("Ask a question about the PDFs:", value=st.session_state.get("rag_prompt", "")) |
|
if query and st.button("Submit RAG Query"): |
|
with st.spinner("Processing RAG query..."): |
|
response, tool_calls = process_rag_query(query, vector_store_details["id"]) |
|
if response: |
|
st.markdown(response) |
|
st.write("Retrieved chunks:") |
|
for call in tool_calls: |
|
if "file_search" in call.type: |
|
st.json(call.arguments) |
|
st.rerun() |
|
|
|
if st.button("Evaluate RAG Performance"): |
|
with st.spinner("Evaluating..."): |
|
metrics = evaluate_rag(vector_store_details["id"], questions_dict) |
|
st.json(metrics) |
|
|
|
|
|
def FileSidebar(): |
|
st.sidebar.title("File Operations") |
|
default_types = [".md", ".png", ".pdf"] |
|
file_types = st.sidebar.multiselect("Filter by type", [".md", ".wav", ".png", ".mp4", ".mp3", ".pdf"], default=default_types) |
|
all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1] in file_types and len(os.path.splitext(f)[0]) >= 10] |
|
all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True) |
|
|
|
if st.sidebar.button("🗑 Delete All Filtered"): |
|
for file in all_files: |
|
os.remove(file) |
|
st.rerun() |
|
|
|
if st.sidebar.button("⬇️ Download All Filtered"): |
|
zip_file = create_zip_of_files(all_files) |
|
st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True) |
|
|
|
for file in all_files: |
|
ext = os.path.splitext(file)[1].lower() |
|
col1, col2, col3, col4, col5 = st.sidebar.columns([1, 6, 1, 1, 1]) |
|
colFollowUp = "" |
|
|
|
with col1: |
|
icon = "📜" if ext == ".md" else "📄" if ext == ".pdf" else "🖼️" if ext in [".png", ".jpg", ".jpeg"] else "🎵" if ext in [".wav", ".mp3"] else "🎥" if ext == ".mp4" else "📎" |
|
if st.button(icon, key=f"view_{file}"): |
|
colFollowUp = "view_" + ext |
|
with open(file, "rb") as f: |
|
content = f.read() |
|
|
|
with col2: |
|
st.markdown(get_table_download_link(file), unsafe_allow_html=True) |
|
|
|
with col3: |
|
if st.button("📂", key=f"open_{file}"): |
|
colFollowUp = "open_" + ext |
|
with open(file, "rb") as f: |
|
content = f.read() |
|
|
|
with col4: |
|
if st.button("▶️", key=f"run_{file}"): |
|
if ext == ".md": |
|
colFollowUp = "run_" + ext |
|
with open(file, "rb") as f: |
|
content = f.read() |
|
|
|
with col5: |
|
if st.button("🗑", key=f"delete_{file}"): |
|
os.remove(file) |
|
st.rerun() |
|
|
|
|
|
if colFollowUp.startswith("view_"): |
|
if ext == ".md": |
|
st.markdown(content.decode("utf-8")) |
|
SpeechSynthesis(content.decode("utf-8")) |
|
elif ext == ".pdf": |
|
st.download_button("Download PDF", content, file, "application/pdf") |
|
st.write("PDF Viewer not natively supported; download to view.") |
|
elif ext in [".png", ".jpg", ".jpeg"]: |
|
st.image(content, use_column_width=True) |
|
elif ext in [".wav", ".mp3"]: |
|
st.audio(content, format=f"audio/{ext[1:]}") |
|
elif ext == ".mp4": |
|
st.video(content, format="video/mp4") |
|
|
|
elif colFollowUp.startswith("open_"): |
|
if ext == ".md": |
|
st.text_area(f"Editing {file}", value=content.decode("utf-8"), height=300, key=f"edit_{file}") |
|
elif ext == ".pdf": |
|
st.download_button("Download PDF to Edit", content, file, "application/pdf") |
|
st.write("PDF editing not supported in-app; download to edit externally.") |
|
elif ext in [".png", ".jpg", ".jpeg"]: |
|
st.image(content, use_column_width=True, caption=f"Viewing {file}") |
|
elif ext in [".wav", ".mp3"]: |
|
st.audio(content, format=f"audio/{ext[1:]}") |
|
elif ext == ".mp4": |
|
st.video(content, format="video/mp4") |
|
|
|
elif colFollowUp.startswith("run_"): |
|
if ext == ".md": |
|
process_text(content.decode("utf-8")) |
|
|
|
def create_zip_of_files(files): |
|
zip_name = "Files.zip" |
|
with zipfile.ZipFile(zip_name, 'w') as zipf: |
|
for file in files: |
|
zipf.write(file) |
|
return zip_name |
|
|
|
def get_zip_download_link(zip_file): |
|
with open(zip_file, 'rb') as f: |
|
data = f.read() |
|
b64 = base64.b64encode(data).decode() |
|
return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>' |
|
|
|
@st.cache_resource |
|
def get_table_download_link(file_path): |
|
with open(file_path, 'rb') as f: |
|
data = f.read() |
|
b64 = base64.b64encode(data).decode() |
|
file_name = os.path.basename(file_path) |
|
ext = os.path.splitext(file_name)[1].lower() |
|
mime_type = "text/markdown" if ext == ".md" else "application/pdf" if ext == ".pdf" else "image/png" if ext in [".png", ".jpg", ".jpeg"] else "audio/wav" if ext == ".wav" else "audio/mpeg" if ext == ".mp3" else "video/mp4" if ext == ".mp4" else "application/octet-stream" |
|
return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>' |
|
|
|
|
|
def main(): |
|
st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, Video & RAG") |
|
model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"] |
|
st.session_state["openai_model"] = st.selectbox("Select GPT Model", model_options, index=0) |
|
|
|
option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery")) |
|
|
|
if option == "Text": |
|
default_text = "Create a summary of PDF py libraries and usage in py with emojis in markdown. Maybe a buckeyball feature rating comparing them against each other in markdown emoji outline or tables." |
|
col1, col2 = st.columns([1, 5]) |
|
with col1: |
|
if st.button("📝 MD", key="md_button"): |
|
st.session_state["text_input"] = default_text |
|
with st.spinner("Processing..."): |
|
process_text(default_text) |
|
st.rerun() |
|
with col2: |
|
text_input = st.text_input("Enter your text:", value=st.session_state.get("text_input", ""), key="text_input_field") |
|
if text_input and st.button("Submit Text"): |
|
with st.spinner("Processing..."): |
|
process_text(text_input) |
|
st.rerun() |
|
|
|
elif option == "Image": |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
if st.button("📝 Describe"): |
|
st.session_state["image_prompt"] = "Describe this image and list ten facts in a markdown outline with emojis." |
|
with col2: |
|
if st.button("🔍 OCR"): |
|
st.session_state["image_prompt"] = "Show electronic text of text in the image." |
|
text_input = st.text_input("Image Prompt:", value=st.session_state.get("image_prompt", "Describe this image and list ten facts in a markdown outline with emojis.")) |
|
image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False) |
|
if image_input and text_input and st.button("Submit Image"): |
|
if image_input.size > 200 * 1024 * 1024: |
|
st.error("Image exceeds 200MB limit.") |
|
else: |
|
with st.spinner("Processing..."): |
|
image_response = process_image(image_input, text_input) |
|
with st.chat_message("ai", avatar="🦖"): |
|
st.markdown(image_response) |
|
st.rerun() |
|
|
|
elif option == "Audio": |
|
text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.") |
|
audio_input = st.file_uploader("Upload an audio file (max 200MB)", type=["mp3", "wav", "flac", "m4a"], accept_multiple_files=False) |
|
audio_bytes = audio_recorder() |
|
if audio_bytes and text_input and st.button("Submit Audio Recording"): |
|
with open("recorded_audio.wav", "wb") as f: |
|
f.write(audio_bytes) |
|
with st.spinner("Processing..."): |
|
process_audio(audio_bytes, text_input) |
|
st.rerun() |
|
elif audio_input and text_input and st.button("Submit Audio File"): |
|
with st.spinner("Processing..."): |
|
process_audio(audio_input, text_input) |
|
st.rerun() |
|
|
|
elif option == "Video": |
|
text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.") |
|
video_input = st.file_uploader("Upload a video file (max 200MB)", type=["mp4"], accept_multiple_files=False) |
|
if video_input and text_input and st.button("Submit Video"): |
|
if video_input.size > 200 * 1024 * 1024: |
|
st.error("Video exceeds 200MB limit.") |
|
else: |
|
with st.spinner("Processing..."): |
|
process_audio_and_video(video_input) |
|
st.rerun() |
|
|
|
elif option == "ArXiv Search": |
|
query = st.text_input("AI Search ArXiv Scholarly Articles:") |
|
if query and st.button("Search ArXiv"): |
|
with st.spinner("Searching ArXiv..."): |
|
result = search_arxiv(query) |
|
st.markdown(result) |
|
st.rerun() |
|
|
|
elif option == "RAG PDF Gallery": |
|
rag_pdf_gallery() |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"): |
|
with st.spinner("Processing..."): |
|
process_text(prompt) |
|
st.rerun() |
|
|
|
FileSidebar() |
|
main() |