awacke1 commited on
Commit
a7ab48f
Β·
verified Β·
1 Parent(s): e82eace

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +539 -0
app.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import cv2
3
+ import glob
4
+ import json
5
+ import math
6
+ import os
7
+ import pytz
8
+ import random
9
+ import re
10
+ import requests
11
+ import streamlit as st
12
+ import streamlit.components.v1 as components
13
+ import textract
14
+ import time
15
+ import zipfile
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from tqdm import tqdm
18
+ import concurrent
19
+
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import deque
23
+ from datetime import datetime
24
+ from dotenv import load_dotenv
25
+ from gradio_client import Client
26
+ from io import BytesIO
27
+ from moviepy import VideoFileClip
28
+ from PIL import Image
29
+ from PyPDF2 import PdfReader
30
+ from templates import bot_template, css, user_template
31
+ from urllib.parse import quote
32
+ from xml.etree import ElementTree as ET
33
+
34
+ import openai
35
+ from openai import OpenAI
36
+ import pandas as pd
37
+
38
+ # Configuration
39
+ Site_Name = 'Scholarly-Article-Document-Search-With-Memory'
40
+ title = "πŸ”¬πŸ§ ScienceBrain.AI"
41
+ helpURL = 'https://huggingface.co/awacke1'
42
+ bugURL = 'https://huggingface.co/spaces/awacke1'
43
+ icons = Image.open("icons.ico")
44
+ st.set_page_config(
45
+ page_title=title,
46
+ page_icon=icons,
47
+ layout="wide",
48
+ initial_sidebar_state="auto",
49
+ menu_items={'Get Help': helpURL, 'Report a bug': bugURL, 'About': title}
50
+ )
51
+
52
+ # API Configuration
53
+ API_KEY = os.getenv('API_KEY')
54
+ HF_KEY = os.getenv('HF_KEY')
55
+ headers = {"Authorization": f"Bearer {HF_KEY}", "Content-Type": "application/json"}
56
+ key = os.getenv('OPENAI_API_KEY')
57
+ client = OpenAI(api_key=key, organization=os.getenv('OPENAI_ORG_ID'))
58
+ MODEL = "gpt-4o-2024-05-13"
59
+ if "openai_model" not in st.session_state:
60
+ st.session_state["openai_model"] = MODEL
61
+ if "messages" not in st.session_state:
62
+ st.session_state.messages = []
63
+ if st.button("Clear Session"):
64
+ st.session_state.messages = []
65
+
66
+ # Sidebar Options
67
+ should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True, help="Save your session data.")
68
+
69
+ # HTML5 Speech Synthesis
70
+ @st.cache_resource
71
+ def SpeechSynthesis(result):
72
+ documentHTML5 = '''
73
+ <!DOCTYPE html>
74
+ <html>
75
+ <head>
76
+ <title>Read It Aloud</title>
77
+ <script type="text/javascript">
78
+ function readAloud() {
79
+ const text = document.getElementById("textArea").value;
80
+ const speech = new SpeechSynthesisUtterance(text);
81
+ window.speechSynthesis.speak(speech);
82
+ }
83
+ </script>
84
+ </head>
85
+ <body>
86
+ <h1>πŸ”Š Read It Aloud</h1>
87
+ <textarea id="textArea" rows="10" cols="80">
88
+ '''
89
+ documentHTML5 += result + '''
90
+ </textarea>
91
+ <br>
92
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
93
+ </body>
94
+ </html>
95
+ '''
96
+ components.html(documentHTML5, width=1280, height=300)
97
+
98
+ # File Naming and Saving
99
+ def generate_filename(prompt, file_type, original_name=None):
100
+ central = pytz.timezone('US/Central')
101
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
102
+ if original_name and file_type == "md": # For images
103
+ base_name = os.path.splitext(original_name)[0]
104
+ safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:100]
105
+ return f"{safe_date_time}_{safe_prompt}_{base_name}.{file_type}"
106
+ safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:240]
107
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
108
+
109
+ def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True):
110
+ if not should_save:
111
+ return None
112
+ filename = generate_filename(prompt, file_type, original_name)
113
+ with open(filename, "w", encoding="utf-8") as f:
114
+ f.write(content if not prompt else prompt + "\n\n" + content)
115
+ return filename
116
+
117
+ # Text Processing
118
+ def process_text(text_input):
119
+ if text_input:
120
+ st.session_state.messages.append({"role": "user", "content": text_input})
121
+ with st.chat_message("user"):
122
+ st.markdown(text_input)
123
+ with st.chat_message("assistant"):
124
+ completion = client.chat.completions.create(
125
+ model=st.session_state["openai_model"],
126
+ messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
127
+ stream=False
128
+ )
129
+ response = completion.choices[0].message.content
130
+ st.markdown(response)
131
+ filename = generate_filename(text_input, "md")
132
+ create_and_save_file(response, "md", text_input, should_save=should_save)
133
+ st.session_state.messages.append({"role": "assistant", "content": response})
134
+ st.rerun()
135
+
136
+ # Image Processing
137
+ def process_image(image_input, user_prompt):
138
+ original_name = image_input.name
139
+ image_bytes = image_input.read()
140
+ with open(original_name, "wb") as f:
141
+ f.write(image_bytes) # Save original image
142
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
143
+ response = client.chat.completions.create(
144
+ model=st.session_state["openai_model"],
145
+ messages=[
146
+ {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
147
+ {"role": "user", "content": [
148
+ {"type": "text", "text": user_prompt},
149
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
150
+ ]}
151
+ ],
152
+ temperature=0.0
153
+ )
154
+ image_response = response.choices[0].message.content
155
+ filename = generate_filename(user_prompt, "md", original_name)
156
+ create_and_save_file(image_response, "md", user_prompt, original_name, should_save=should_save)
157
+ st.rerun()
158
+ return image_response
159
+
160
+ # Audio Processing
161
+ def process_audio(audio_input, text_input=''):
162
+ if audio_input:
163
+ audio_bytes = audio_input if isinstance(audio_input, bytes) else audio_input.read()
164
+ supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
165
+ file_ext = "wav" if isinstance(audio_input, bytes) else os.path.splitext(audio_input.name)[1][1:].lower()
166
+ if file_ext not in supported_formats:
167
+ st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}")
168
+ return
169
+ if len(audio_bytes) > 200 * 1024 * 1024: # 200MB limit
170
+ st.error("File exceeds 200MB limit.")
171
+ return
172
+ with st.spinner("Transcribing audio..."):
173
+ try:
174
+ transcription = client.audio.transcriptions.create(
175
+ model="whisper-1",
176
+ file=BytesIO(audio_bytes)
177
+ ).text
178
+ st.session_state.messages.append({"role": "user", "content": transcription})
179
+ with st.chat_message("user"):
180
+ st.markdown(transcription)
181
+ with st.chat_message("assistant"):
182
+ completion = client.chat.completions.create(
183
+ model=st.session_state["openai_model"],
184
+ messages=[{"role": "user", "content": text_input + "\n\nTranscription: " + transcription}]
185
+ )
186
+ response = completion.choices[0].message.content
187
+ st.markdown(response)
188
+ filename = generate_filename(transcription, "md")
189
+ create_and_save_file(response, "md", text_input, should_save=should_save)
190
+ st.session_state.messages.append({"role": "assistant", "content": response})
191
+ st.rerun()
192
+ except openai.BadRequestError as e:
193
+ st.error(f"Audio processing error: {str(e)}")
194
+
195
+ # Video Processing
196
+ def save_video(video_input):
197
+ with open(video_input.name, "wb") as f:
198
+ f.write(video_input.read())
199
+ return video_input.name
200
+
201
+ def process_video(video_path, seconds_per_frame=2):
202
+ base64Frames = []
203
+ base_video_path, _ = os.path.splitext(video_path)
204
+ video = cv2.VideoCapture(video_path)
205
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
206
+ fps = video.get(cv2.CAP_PROP_FPS)
207
+ frames_to_skip = int(fps * seconds_per_frame)
208
+ curr_frame = 0
209
+ while curr_frame < total_frames - 1:
210
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
211
+ success, frame = video.read()
212
+ if not success:
213
+ break
214
+ _, buffer = cv2.imencode(".jpg", frame)
215
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
216
+ curr_frame += frames_to_skip
217
+ video.release()
218
+ audio_path = f"{base_video_path}.mp3"
219
+ try:
220
+ clip = VideoFileClip(video_path)
221
+ if clip.audio:
222
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
223
+ clip.audio.close()
224
+ clip.close()
225
+ except Exception as e:
226
+ st.warning(f"No audio track found or error: {str(e)}")
227
+ audio_path = None
228
+ return base64Frames, audio_path
229
+
230
+ def process_audio_and_video(video_input):
231
+ if video_input:
232
+ video_path = save_video(video_input)
233
+ with st.spinner("Extracting frames and audio..."):
234
+ base64Frames, audio_path = process_video(video_path)
235
+ if audio_path:
236
+ with st.spinner("Transcribing video audio..."):
237
+ try:
238
+ with open(audio_path, "rb") as audio_file:
239
+ transcript = client.audio.transcriptions.create(
240
+ model="whisper-1",
241
+ file=audio_file
242
+ ).text
243
+ with st.chat_message("user"):
244
+ st.markdown(f"Video Transcription: {transcript}")
245
+ with st.chat_message("assistant"):
246
+ response = client.chat.completions.create(
247
+ model=st.session_state["openai_model"],
248
+ messages=[
249
+ {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
250
+ {"role": "user", "content": [
251
+ "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, base64Frames),
252
+ {"type": "text", "text": f"Transcription: {transcript}"}
253
+ ]}
254
+ ]
255
+ )
256
+ result = response.choices[0].message.content
257
+ st.markdown(result)
258
+ filename = generate_filename(transcript, "md")
259
+ create_and_save_file(result, "md", "Video summary", should_save=should_save)
260
+ st.rerun()
261
+ except openai.BadRequestError as e:
262
+ st.error(f"Video audio processing error: {str(e)}")
263
+ else:
264
+ st.warning("No audio to transcribe.")
265
+
266
+ # ArXiv Search
267
+ def search_arxiv(query):
268
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
269
+ response = client.predict(
270
+ message=query,
271
+ llm_results_use=5,
272
+ database_choice="Semantic Search",
273
+ llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
274
+ api_name="/update_with_rag_md"
275
+ )
276
+ result = response[0] + response[1]
277
+ filename = generate_filename(query, "md")
278
+ create_and_save_file(result, "md", query, should_save=should_save)
279
+ st.session_state.messages.append({"role": "assistant", "content": result})
280
+ st.rerun()
281
+ return result
282
+
283
+ # RAG PDF Gallery
284
+ def upload_pdf_files_to_vector_store(vector_store_id, pdf_files):
285
+ stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}
286
+ def upload_single_pdf(file_path):
287
+ file_name = os.path.basename(file_path)
288
+ try:
289
+ with open(file_path, "rb") as f:
290
+ file_response = client.files.create(file=f, purpose="assistants")
291
+ client.vector_stores.files.create(vector_store_id=vector_store_id, file_id=file_response.id)
292
+ return {"file": file_name, "status": "success"}
293
+ except Exception as e:
294
+ return {"file": file_name, "status": "failed", "error": str(e)}
295
+ with ThreadPoolExecutor(max_workers=5) as executor:
296
+ futures = [executor.submit(upload_single_pdf, f) for f in pdf_files]
297
+ for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
298
+ result = future.result()
299
+ if result["status"] == "success":
300
+ stats["successful_uploads"] += 1
301
+ else:
302
+ stats["failed_uploads"] += 1
303
+ stats["errors"].append(result)
304
+ return stats
305
+
306
+ def create_vector_store(store_name):
307
+ vector_store = client.vector_stores.create(name=store_name)
308
+ return {"id": vector_store.id, "name": vector_store.name, "created_at": vector_store.created_at, "file_count": vector_store.file_counts.completed}
309
+
310
+ def generate_questions(pdf_path):
311
+ text = ""
312
+ with open(pdf_path, "rb") as f:
313
+ pdf = PdfReader(f)
314
+ for page in pdf.pages:
315
+ text += page.extract_text() or ""
316
+ prompt = f"Can you generate a question that can only be answered from this document?:\n{text[:2000]}\n\n"
317
+ response = client.chat.completions.create(
318
+ model="gpt-4o-2024-05-13",
319
+ messages=[{"role": "user", "content": prompt}]
320
+ )
321
+ return response.choices[0].message.content
322
+
323
+ def process_rag_query(query, vector_store_id):
324
+ try:
325
+ response = client.chat.completions.create(
326
+ model="gpt-4o-2024-05-13",
327
+ messages=[{"role": "user", "content": query}],
328
+ tools=[{"type": "file_search", "file_search": {"vector_store_ids": [vector_store_id]}}],
329
+ tool_choice="auto"
330
+ )
331
+ tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else []
332
+ return response.choices[0].message.content, tool_calls
333
+ except openai.BadRequestError as e:
334
+ st.error(f"RAG query error: {str(e)}")
335
+ return None, []
336
+
337
+ def evaluate_rag(vector_store_id, questions_dict):
338
+ k = 5
339
+ total_queries = len(questions_dict)
340
+ correct_retrievals_at_k = 0
341
+ reciprocal_ranks = []
342
+ average_precisions = []
343
+
344
+ for filename, query in questions_dict.items():
345
+ expected_file = filename
346
+ response, tool_calls = process_rag_query(query, vector_store_id)
347
+ if not tool_calls:
348
+ continue
349
+ retrieved_files = [call.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.type][:k]
350
+ if expected_file in retrieved_files:
351
+ rank = retrieved_files.index(expected_file) + 1
352
+ correct_retrievals_at_k += 1
353
+ reciprocal_ranks.append(1 / rank)
354
+ precisions = [1 if f == expected_file else 0 for f in retrieved_files[:rank]]
355
+ average_precisions.append(sum(precisions) / len(precisions))
356
+ else:
357
+ reciprocal_ranks.append(0)
358
+ average_precisions.append(0)
359
+
360
+ recall_at_k = correct_retrievals_at_k / total_queries if total_queries else 0
361
+ mrr = sum(reciprocal_ranks) / total_queries if total_queries else 0
362
+ map_score = sum(average_precisions) / total_queries if total_queries else 0
363
+ return {"recall@k": recall_at_k, "mrr": mrr, "map": map_score, "k": k}
364
+
365
+ def rag_pdf_gallery():
366
+ st.subheader("RAG PDF Gallery")
367
+ pdf_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
368
+ if pdf_files:
369
+ pdf_paths = [save_video(f) for f in pdf_files] # Reuse save_video for simplicity
370
+ with st.spinner("Creating vector store..."):
371
+ vector_store_details = create_vector_store("PDF_Gallery_Store")
372
+ stats = upload_pdf_files_to_vector_store(vector_store_details["id"], pdf_paths)
373
+ st.json(stats)
374
+
375
+ with st.spinner("Generating evaluation questions..."):
376
+ questions_dict = {os.path.basename(p): generate_questions(p) for p in pdf_paths}
377
+ st.json(questions_dict)
378
+
379
+ query = st.text_input("Ask a question about the PDFs:")
380
+ if query:
381
+ with st.spinner("Processing RAG query..."):
382
+ response, tool_calls = process_rag_query(query, vector_store_details["id"])
383
+ if response:
384
+ st.markdown(response)
385
+ st.write("Retrieved chunks:")
386
+ for call in tool_calls:
387
+ if "file_search" in call.type:
388
+ st.json(call.arguments)
389
+
390
+ if st.button("Evaluate RAG Performance"):
391
+ with st.spinner("Evaluating..."):
392
+ metrics = evaluate_rag(vector_store_details["id"], questions_dict)
393
+ st.json(metrics)
394
+
395
+ # File Sidebar
396
+ def FileSidebar():
397
+ st.sidebar.title("File Operations")
398
+ default_types = [".md", ".png", ".pdf"]
399
+ file_types = st.sidebar.multiselect("Filter by type", [".md", ".wav", ".png", ".mp4", ".mp3", ".pdf"], default=default_types)
400
+ all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1] in file_types and len(os.path.splitext(f)[0]) >= 10]
401
+ all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
402
+
403
+ if st.sidebar.button("πŸ—‘ Delete All Filtered"):
404
+ for file in all_files:
405
+ os.remove(file)
406
+ st.rerun()
407
+
408
+ if st.sidebar.button("⬇️ Download All Filtered"):
409
+ zip_file = create_zip_of_files(all_files)
410
+ st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
411
+
412
+ for file in all_files:
413
+ ext = os.path.splitext(file)[1]
414
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1, 6, 1, 1, 1])
415
+ with col1:
416
+ icon = "πŸ“œ" if ext == ".md" else "πŸ“„" if ext == ".pdf" else "πŸ–ΌοΈ" if ext == ".png" else "🎡" if ext in [".wav", ".mp3"] else "πŸŽ₯"
417
+ if st.button(icon, key=f"view_{file}"):
418
+ with open(file, "rb") as f:
419
+ content = f.read()
420
+ if ext == ".md":
421
+ st.markdown(content.decode("utf-8"))
422
+ SpeechSynthesis(content.decode("utf-8"))
423
+ elif ext == ".pdf":
424
+ st.download_button("Download PDF", content, file, "application/pdf")
425
+ st.write("PDF Viewer not natively supported; download to view.")
426
+ elif ext == ".png":
427
+ st.image(content, use_column_width=True)
428
+ with col2:
429
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
430
+ with col3:
431
+ if st.button("πŸ“‚", key=f"open_{file}"):
432
+ st.session_state.update({'filename': file, 'filetext': open(file, "r", encoding="utf-8").read()})
433
+ with col4:
434
+ if st.button("▢️", key=f"run_{file}"):
435
+ process_text(open(file, "r", encoding="utf-8").read())
436
+ with col5:
437
+ if st.button("πŸ—‘", key=f"delete_{file}"):
438
+ os.remove(file)
439
+ st.rerun()
440
+
441
+ def create_zip_of_files(files):
442
+ zip_name = "Files.zip"
443
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
444
+ for file in files:
445
+ zipf.write(file)
446
+ return zip_name
447
+
448
+ def get_zip_download_link(zip_file):
449
+ with open(zip_file, 'rb') as f:
450
+ data = f.read()
451
+ b64 = base64.b64encode(data).decode()
452
+ return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
453
+
454
+ @st.cache_resource
455
+ def get_table_download_link(file_path):
456
+ with open(file_path, 'rb') as f:
457
+ data = f.read()
458
+ b64 = base64.b64encode(data).decode()
459
+ file_name = os.path.basename(file_path)
460
+ ext = os.path.splitext(file_name)[1]
461
+ mime_type = "text/markdown" if ext == ".md" else "application/pdf" if ext == ".pdf" else "image/png" if ext == ".png" else "audio/wav" if ext == ".wav" else "audio/mpeg" if ext == ".mp3" else "video/mp4" if ext == ".mp4" else "application/octet-stream"
462
+ return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>'
463
+
464
+ # Main Function
465
+ def main():
466
+ st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, Video & RAG")
467
+ model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"]
468
+ st.session_state["openai_model"] = st.selectbox("Select GPT Model", model_options, index=0)
469
+
470
+ option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
471
+
472
+ if option == "Text":
473
+ default_text = "emojis in markdown. Maybe a buckeyball feature rating comparing them against each other in markdown emoji outline or tables."
474
+ text_input = st.text_input("Enter your text:", value=default_text)
475
+ if text_input:
476
+ with st.spinner("Processing..."):
477
+ process_text(text_input)
478
+
479
+ elif option == "Image":
480
+ col1, col2 = st.columns(2)
481
+ with col1:
482
+ if st.button("πŸ“ Describe"):
483
+ st.session_state["image_prompt"] = "Describe this image and list ten facts in a markdown outline with emojis."
484
+ with col2:
485
+ if st.button("πŸ” OCR"):
486
+ st.session_state["image_prompt"] = "Show electronic text of text in the image."
487
+ text_input = st.text_input("Image Prompt:", value=st.session_state.get("image_prompt", "Describe this image and list ten facts in a markdown outline with emojis."))
488
+ image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
489
+ if image_input and text_input:
490
+ if image_input.size > 200 * 1024 * 1024:
491
+ st.error("Image exceeds 200MB limit.")
492
+ else:
493
+ with st.spinner("Processing..."):
494
+ image_response = process_image(image_input, text_input)
495
+ with st.chat_message("ai", avatar="πŸ¦–"):
496
+ st.markdown(image_response)
497
+
498
+ elif option == "Audio":
499
+ text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.")
500
+ audio_input = st.file_uploader("Upload an audio file (max 200MB)", type=["mp3", "wav", "flac", "m4a"], accept_multiple_files=False)
501
+ audio_bytes = audio_recorder()
502
+ if audio_bytes:
503
+ with open("recorded_audio.wav", "wb") as f:
504
+ f.write(audio_bytes)
505
+ process_audio(audio_bytes, text_input)
506
+ elif audio_input and text_input:
507
+ with st.spinner("Processing..."):
508
+ process_audio(audio_input, text_input)
509
+
510
+ elif option == "Video":
511
+ text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.")
512
+ video_input = st.file_uploader("Upload a video file (max 200MB)", type=["mp4"], accept_multiple_files=False)
513
+ if video_input and text_input:
514
+ if video_input.size > 200 * 1024 * 1024:
515
+ st.error("Video exceeds 200MB limit.")
516
+ else:
517
+ with st.spinner("Processing..."):
518
+ process_audio_and_video(video_input)
519
+
520
+ elif option == "ArXiv Search":
521
+ query = st.text_input("AI Search ArXiv Scholarly Articles:")
522
+ if query:
523
+ with st.spinner("Searching ArXiv..."):
524
+ result = search_arxiv(query)
525
+ st.markdown(result)
526
+
527
+ elif option == "RAG PDF Gallery":
528
+ rag_pdf_gallery()
529
+
530
+ # Chat Display and Input
531
+ for message in st.session_state.messages:
532
+ with st.chat_message(message["role"]):
533
+ st.markdown(message["content"])
534
+
535
+ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
536
+ process_text(prompt)
537
+
538
+ FileSidebar()
539
+ main()