awacke1 commited on
Commit
03c4954
Β·
verified Β·
1 Parent(s): 5fd43ea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +610 -0
app.py ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import cv2
3
+ import glob
4
+ import json
5
+ import math
6
+ import os
7
+ import pytz
8
+ import random
9
+ import re
10
+ import requests
11
+ import streamlit as st
12
+ import streamlit.components.v1 as components
13
+ import textract
14
+ import time
15
+ import zipfile
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from tqdm import tqdm
18
+ import concurrent
19
+
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import deque
23
+ from datetime import datetime
24
+ from dotenv import load_dotenv
25
+ from gradio_client import Client
26
+ from io import BytesIO
27
+ from moviepy import VideoFileClip
28
+ from PIL import Image
29
+ from PyPDF2 import PdfReader
30
+ from templates import bot_template, css, user_template
31
+ from urllib.parse import quote
32
+ from xml.etree import ElementTree as ET
33
+
34
+ import openai
35
+ from openai import OpenAI
36
+ import pandas as pd
37
+
38
+ # Configuration
39
+ Site_Name = 'Scholarly-Article-Document-Search-With-Memory'
40
+ title = "πŸ”¬πŸ§ ScienceBrain.AI"
41
+ helpURL = 'https://huggingface.co/awacke1'
42
+ bugURL = 'https://huggingface.co/spaces/awacke1'
43
+ icons = Image.open("icons.ico")
44
+ st.set_page_config(
45
+ page_title=title,
46
+ page_icon=icons,
47
+ layout="wide",
48
+ initial_sidebar_state="auto",
49
+ menu_items={'Get Help': helpURL, 'Report a bug': bugURL, 'About': title}
50
+ )
51
+
52
+ # API Configuration
53
+ API_KEY = os.getenv('API_KEY')
54
+ HF_KEY = os.getenv('HF_KEY')
55
+ headers = {"Authorization": f"Bearer {HF_KEY}", "Content-Type": "application/json"}
56
+ key = os.getenv('OPENAI_API_KEY')
57
+ client = OpenAI(api_key=key, organization=os.getenv('OPENAI_ORG_ID'))
58
+ MODEL = "gpt-4o-2024-05-13"
59
+ if "openai_model" not in st.session_state:
60
+ st.session_state["openai_model"] = MODEL
61
+ if "messages" not in st.session_state:
62
+ st.session_state.messages = []
63
+ if st.button("Clear Session"):
64
+ st.session_state.messages = []
65
+
66
+ # Sidebar Options
67
+ should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True, help="Save your session data.")
68
+
69
+ # HTML5 Speech Synthesis
70
+ @st.cache_resource
71
+ def SpeechSynthesis(result):
72
+ documentHTML5 = '''
73
+ <!DOCTYPE html>
74
+ <html>
75
+ <head>
76
+ <title>Read It Aloud</title>
77
+ <script type="text/javascript">
78
+ function readAloud() {
79
+ const text = document.getElementById("textArea").value;
80
+ const speech = new SpeechSynthesisUtterance(text);
81
+ window.speechSynthesis.speak(speech);
82
+ }
83
+ </script>
84
+ </head>
85
+ <body>
86
+ <h1>πŸ”Š Read It Aloud</h1>
87
+ <textarea id="textArea" rows="10" cols="80">
88
+ '''
89
+ documentHTML5 += result + '''
90
+ </textarea>
91
+ <br>
92
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
93
+ </body>
94
+ </html>
95
+ '''
96
+ components.html(documentHTML5, width=1280, height=300)
97
+
98
+ # File Naming and Saving
99
+ def generate_filename(prompt, file_type, original_name=None):
100
+ central = pytz.timezone('US/Central')
101
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
102
+ safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50]
103
+ if original_name and file_type == "md": # For images
104
+ base_name = os.path.splitext(original_name)[0]
105
+ file_stem = f"{safe_date_time}_{safe_prompt}_{base_name}"[:100] # Cap at 100 chars
106
+ return f"{file_stem}.{file_type}"
107
+ file_stem = f"{safe_date_time}_{safe_prompt}"[:100] # Cap at 100 chars
108
+ return f"{file_stem}.{file_type}"
109
+
110
+ def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True):
111
+ if not should_save:
112
+ return None
113
+ filename = generate_filename(prompt, file_type, original_name)
114
+ with open(filename, "w", encoding="utf-8") as f:
115
+ f.write(content if not prompt else prompt + "\n\n" + content)
116
+ return filename
117
+
118
+ # Text Processing
119
+ def process_text(text_input):
120
+ if text_input:
121
+ st.session_state.messages.append({"role": "user", "content": text_input})
122
+ with st.chat_message("user"):
123
+ st.markdown(text_input)
124
+ with st.chat_message("assistant"):
125
+ completion = client.chat.completions.create(
126
+ model=st.session_state["openai_model"],
127
+ messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
128
+ stream=False
129
+ )
130
+ response = completion.choices[0].message.content
131
+ st.markdown(response)
132
+ filename = generate_filename(text_input, "md")
133
+ create_and_save_file(response, "md", text_input, should_save=should_save)
134
+ st.session_state.messages.append({"role": "assistant", "content": response})
135
+
136
+ # Image Processing
137
+ def process_image(image_input, user_prompt):
138
+ original_name = image_input.name
139
+ image_bytes = image_input.read()
140
+ with open(original_name, "wb") as f:
141
+ f.write(image_bytes) # Save original image
142
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
143
+ response = client.chat.completions.create(
144
+ model=st.session_state["openai_model"],
145
+ messages=[
146
+ {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
147
+ {"role": "user", "content": [
148
+ {"type": "text", "text": user_prompt},
149
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
150
+ ]}
151
+ ],
152
+ temperature=0.0
153
+ )
154
+ image_response = response.choices[0].message.content
155
+ filename = generate_filename(user_prompt, "md", original_name) # Include prompt in filename
156
+ create_and_save_file(image_response, "md", user_prompt, original_name, should_save=should_save)
157
+ return image_response
158
+
159
+ # Audio Processing
160
+ def process_audio(audio_input, text_input=''):
161
+ if audio_input:
162
+ audio_bytes = audio_input if isinstance(audio_input, bytes) else audio_input.read()
163
+ supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
164
+ file_ext = "wav" if isinstance(audio_input, bytes) else os.path.splitext(audio_input.name)[1][1:].lower()
165
+ if file_ext not in supported_formats:
166
+ st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}")
167
+ return
168
+ if len(audio_bytes) > 200 * 1024 * 1024: # 200MB limit
169
+ st.error("File exceeds 200MB limit.")
170
+ return
171
+ with st.spinner("Transcribing audio..."):
172
+ try:
173
+ transcription = client.audio.transcriptions.create(
174
+ model="whisper-1",
175
+ file=BytesIO(audio_bytes)
176
+ ).text
177
+ st.session_state.messages.append({"role": "user", "content": transcription})
178
+ with st.chat_message("user"):
179
+ st.markdown(transcription)
180
+ with st.chat_message("assistant"):
181
+ completion = client.chat.completions.create(
182
+ model=st.session_state["openai_model"],
183
+ messages=[{"role": "user", "content": text_input + "\n\nTranscription: " + transcription}]
184
+ )
185
+ response = completion.choices[0].message.content
186
+ st.markdown(response)
187
+ filename = generate_filename(transcription, "md")
188
+ create_and_save_file(response, "md", text_input, should_save=should_save)
189
+ st.session_state.messages.append({"role": "assistant", "content": response})
190
+ except openai.BadRequestError as e:
191
+ st.error(f"Audio processing error: {str(e)}")
192
+
193
+ # Video Processing
194
+ def save_video(video_input):
195
+ with open(video_input.name, "wb") as f:
196
+ f.write(video_input.read())
197
+ return video_input.name
198
+
199
+ def process_video(video_path, seconds_per_frame=2):
200
+ base64Frames = []
201
+ base_video_path, _ = os.path.splitext(video_path)
202
+ video = cv2.VideoCapture(video_path)
203
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
204
+ fps = video.get(cv2.CAP_PROP_FPS)
205
+ frames_to_skip = int(fps * seconds_per_frame)
206
+ curr_frame = 0
207
+ while curr_frame < total_frames - 1:
208
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
209
+ success, frame = video.read()
210
+ if not success:
211
+ break
212
+ _, buffer = cv2.imencode(".jpg", frame)
213
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
214
+ curr_frame += frames_to_skip
215
+ video.release()
216
+ audio_path = f"{base_video_path}.mp3"
217
+ try:
218
+ clip = VideoFileClip(video_path)
219
+ if clip.audio:
220
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
221
+ clip.audio.close()
222
+ clip.close()
223
+ except Exception as e:
224
+ st.warning(f"No audio track found or error: {str(e)}")
225
+ audio_path = None
226
+ return base64Frames, audio_path
227
+
228
+ def process_audio_and_video(video_input):
229
+ if video_input:
230
+ video_path = save_video(video_input)
231
+ with st.spinner("Extracting frames and audio..."):
232
+ base64Frames, audio_path = process_video(video_path)
233
+ if audio_path:
234
+ with st.spinner("Transcribing video audio..."):
235
+ try:
236
+ with open(audio_path, "rb") as audio_file:
237
+ transcript = client.audio.transcriptions.create(
238
+ model="whisper-1",
239
+ file=audio_file
240
+ ).text
241
+ with st.chat_message("user"):
242
+ st.markdown(f"Video Transcription: {transcript}")
243
+ with st.chat_message("assistant"):
244
+ response = client.chat.completions.create(
245
+ model=st.session_state["openai_model"],
246
+ messages=[
247
+ {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
248
+ {"role": "user", "content": [
249
+ "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, base64Frames),
250
+ {"type": "text", "text": f"Transcription: {transcript}"}
251
+ ]}
252
+ ]
253
+ )
254
+ result = response.choices[0].message.content
255
+ st.markdown(result)
256
+ filename = generate_filename(transcript, "md")
257
+ create_and_save_file(result, "md", "Video summary", should_save=should_save)
258
+ except openai.BadRequestError as e:
259
+ st.error(f"Video audio processing error: {str(e)}")
260
+ else:
261
+ st.warning("No audio to transcribe.")
262
+
263
+ # ArXiv Search
264
+ def search_arxiv(query):
265
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
266
+ response = client.predict(
267
+ message=query,
268
+ llm_results_use=5,
269
+ database_choice="Semantic Search",
270
+ llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
271
+ api_name="/update_with_rag_md"
272
+ )
273
+ result = response[0] + response[1]
274
+ filename = generate_filename(query, "md")
275
+ create_and_save_file(result, "md", query, should_save=should_save)
276
+ st.session_state.messages.append({"role": "assistant", "content": result})
277
+ return result
278
+
279
+ # RAG PDF Gallery
280
+ def upload_pdf_files_to_vector_store(vector_store_id, pdf_files):
281
+ stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}
282
+ def upload_single_pdf(file_path):
283
+ file_name = os.path.basename(file_path)
284
+ try:
285
+ with open(file_path, "rb") as f:
286
+ file_response = client.files.create(file=f, purpose="assistants")
287
+ client.vector_stores.files.create(vector_store_id=vector_store_id, file_id=file_response.id)
288
+ return {"file": file_name, "status": "success"}
289
+ except Exception as e:
290
+ return {"file": file_name, "status": "failed", "error": str(e)}
291
+ with ThreadPoolExecutor(max_workers=5) as executor:
292
+ futures = [executor.submit(upload_single_pdf, f) for f in pdf_files]
293
+ for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
294
+ result = future.result()
295
+ if result["status"] == "success":
296
+ stats["successful_uploads"] += 1
297
+ else:
298
+ stats["failed_uploads"] += 1
299
+ stats["errors"].append(result)
300
+ return stats
301
+
302
+ def create_vector_store(store_name):
303
+ vector_store = client.vector_stores.create(name=store_name)
304
+ return {"id": vector_store.id, "name": vector_store.name, "created_at": vector_store.created_at, "file_count": vector_store.file_counts.completed}
305
+
306
+ def generate_questions(pdf_path):
307
+ text = ""
308
+ with open(pdf_path, "rb") as f:
309
+ pdf = PdfReader(f)
310
+ for page in pdf.pages:
311
+ text += page.extract_text() or ""
312
+ prompt = f"Generate a 10-question quiz with answers based only on this document. Format as markdown with numbered questions and answers:\n{text[:2000]}\n\n"
313
+ response = client.chat.completions.create(
314
+ model="gpt-4o-2024-05-13",
315
+ messages=[{"role": "user", "content": prompt}]
316
+ )
317
+ return response.choices[0].message.content
318
+
319
+ def process_rag_query(query, vector_store_id):
320
+ try:
321
+ response = client.chat.completions.create(
322
+ model="gpt-4o-2024-05-13",
323
+ messages=[{"role": "user", "content": query}],
324
+ tools=[{
325
+ "type": "file_search",
326
+ "file_search": {
327
+ "vector_store_ids": [vector_store_id]
328
+ }
329
+ }],
330
+ tool_choice="auto"
331
+ )
332
+ tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else []
333
+ return response.choices[0].message.content, tool_calls
334
+ except openai.BadRequestError as e:
335
+ st.error(f"RAG query error: {str(e)}")
336
+ return None, []
337
+
338
+ def evaluate_rag(vector_store_id, questions_dict):
339
+ k = 5
340
+ total_queries = len(questions_dict) * 10 # 10 questions per PDF
341
+ correct_retrievals_at_k = 0
342
+ reciprocal_ranks = []
343
+ average_precisions = []
344
+
345
+ for filename, quiz in questions_dict.items():
346
+ questions = re.findall(r"\d+\.\s(.*?)\n\s*Answer:\s(.*?)\n", quiz, re.DOTALL)
347
+ for question, _ in questions:
348
+ expected_file = filename
349
+ response, tool_calls = process_rag_query(question, vector_store_id)
350
+ if not tool_calls:
351
+ continue
352
+ retrieved_files = [call.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.type][:k]
353
+ if expected_file in retrieved_files:
354
+ rank = retrieved_files.index(expected_file) + 1
355
+ correct_retrievals_at_k += 1
356
+ reciprocal_ranks.append(1 / rank)
357
+ precisions = [1 if f == expected_file else 0 for f in retrieved_files[:rank]]
358
+ average_precisions.append(sum(precisions) / len(precisions))
359
+ else:
360
+ reciprocal_ranks.append(0)
361
+ average_precisions.append(0)
362
+
363
+ recall_at_k = correct_retrievals_at_k / total_queries if total_queries else 0
364
+ mrr = sum(reciprocal_ranks) / total_queries if total_queries else 0
365
+ map_score = sum(average_precisions) / total_queries if total_queries else 0
366
+ return {"recall@k": recall_at_k, "mrr": mrr, "map": map_score, "k": k}
367
+
368
+ def rag_pdf_gallery():
369
+ st.subheader("RAG PDF Gallery")
370
+ pdf_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
371
+ if pdf_files:
372
+ pdf_paths = [save_video(f) for f in pdf_files] # Reuse save_video for simplicity
373
+ with st.spinner("Creating vector store..."):
374
+ vector_store_details = create_vector_store("PDF_Gallery_Store")
375
+ stats = upload_pdf_files_to_vector_store(vector_store_details["id"], pdf_paths)
376
+ st.json(stats)
377
+
378
+ col1, col2, col3 = st.columns(3)
379
+ with col1:
380
+ if st.button("πŸ“ Quiz"):
381
+ st.session_state["rag_prompt"] = "Generate a 10-question quiz with answers based only on this document."
382
+ with col2:
383
+ if st.button("πŸ“‘ Summary"):
384
+ st.session_state["rag_prompt"] = "Summarize this per page and output as markdown outline with emojis and numbered outline with multiple levels summarizing everything unique per page in method steps or fact steps."
385
+ with col3:
386
+ if st.button("πŸ” Key Facts"):
387
+ st.session_state["rag_prompt"] = "Extract 10 key facts from this document in markdown with emojis."
388
+
389
+ with st.spinner("Generating questions..."):
390
+ questions_dict = {os.path.basename(p): generate_questions(p) for p in pdf_paths}
391
+ st.markdown("### Generated Quiz")
392
+ for filename, quiz in questions_dict.items():
393
+ st.markdown(f"#### {filename}")
394
+ st.markdown(quiz)
395
+
396
+ query = st.text_input("Ask a question about the PDFs:", value=st.session_state.get("rag_prompt", ""))
397
+ if query and st.button("Submit RAG Query"):
398
+ with st.spinner("Processing RAG query..."):
399
+ response, tool_calls = process_rag_query(query, vector_store_details["id"])
400
+ if response:
401
+ st.markdown(response)
402
+ st.write("Retrieved chunks:")
403
+ for call in tool_calls:
404
+ if "file_search" in call.type:
405
+ st.json(call.arguments)
406
+ st.rerun()
407
+
408
+ if st.button("Evaluate RAG Performance"):
409
+ with st.spinner("Evaluating..."):
410
+ metrics = evaluate_rag(vector_store_details["id"], questions_dict)
411
+ st.json(metrics)
412
+
413
+ # File Sidebar
414
+ def FileSidebar():
415
+ st.sidebar.title("File Operations")
416
+ default_types = [".md", ".png", ".pdf"]
417
+ file_types = st.sidebar.multiselect("Filter by type", [".md", ".wav", ".png", ".mp4", ".mp3", ".pdf"], default=default_types)
418
+ all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1] in file_types and len(os.path.splitext(f)[0]) >= 10]
419
+ all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
420
+
421
+ if st.sidebar.button("πŸ—‘ Delete All Filtered"):
422
+ for file in all_files:
423
+ os.remove(file)
424
+ st.rerun()
425
+
426
+ if st.sidebar.button("⬇️ Download All Filtered"):
427
+ zip_file = create_zip_of_files(all_files)
428
+ st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
429
+
430
+ for file in all_files:
431
+ ext = os.path.splitext(file)[1].lower()
432
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1, 6, 1, 1, 1])
433
+ colFollowUp = "" # Flag to trigger main-area display
434
+
435
+ with col1: # View
436
+ icon = "πŸ“œ" if ext == ".md" else "πŸ“„" if ext == ".pdf" else "πŸ–ΌοΈ" if ext in [".png", ".jpg", ".jpeg"] else "🎡" if ext in [".wav", ".mp3"] else "πŸŽ₯" if ext == ".mp4" else "πŸ“Ž"
437
+ if st.button(icon, key=f"view_{file}"):
438
+ colFollowUp = "view_" + ext
439
+ with open(file, "rb") as f:
440
+ content = f.read()
441
+
442
+ with col2: # Download link
443
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
444
+
445
+ with col3: # Open
446
+ if st.button("πŸ“‚", key=f"open_{file}"):
447
+ colFollowUp = "open_" + ext
448
+ with open(file, "rb") as f:
449
+ content = f.read()
450
+
451
+ with col4: # Run
452
+ if st.button("▢️", key=f"run_{file}"):
453
+ if ext == ".md":
454
+ colFollowUp = "run_" + ext
455
+ with open(file, "rb") as f:
456
+ content = f.read()
457
+
458
+ with col5: # Delete
459
+ if st.button("πŸ—‘", key=f"delete_{file}"):
460
+ os.remove(file)
461
+ st.rerun()
462
+
463
+ # Display in main area based on colFollowUp
464
+ if colFollowUp.startswith("view_"):
465
+ if ext == ".md":
466
+ st.markdown(content.decode("utf-8"))
467
+ SpeechSynthesis(content.decode("utf-8"))
468
+ elif ext == ".pdf":
469
+ st.download_button("Download PDF", content, file, "application/pdf")
470
+ st.write("PDF Viewer not natively supported; download to view.")
471
+ elif ext in [".png", ".jpg", ".jpeg"]:
472
+ st.image(content, use_column_width=True)
473
+ elif ext in [".wav", ".mp3"]:
474
+ st.audio(content, format=f"audio/{ext[1:]}")
475
+ elif ext == ".mp4":
476
+ st.video(content, format="video/mp4")
477
+
478
+ elif colFollowUp.startswith("open_"):
479
+ if ext == ".md":
480
+ st.text_area(f"Editing {file}", value=content.decode("utf-8"), height=300, key=f"edit_{file}")
481
+ elif ext == ".pdf":
482
+ st.download_button("Download PDF to Edit", content, file, "application/pdf")
483
+ st.write("PDF editing not supported in-app; download to edit externally.")
484
+ elif ext in [".png", ".jpg", ".jpeg"]:
485
+ st.image(content, use_column_width=True, caption=f"Viewing {file}")
486
+ elif ext in [".wav", ".mp3"]:
487
+ st.audio(content, format=f"audio/{ext[1:]}")
488
+ elif ext == ".mp4":
489
+ st.video(content, format="video/mp4")
490
+
491
+ elif colFollowUp.startswith("run_"):
492
+ if ext == ".md":
493
+ process_text(content.decode("utf-8"))
494
+
495
+ def create_zip_of_files(files):
496
+ zip_name = "Files.zip"
497
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
498
+ for file in files:
499
+ zipf.write(file)
500
+ return zip_name
501
+
502
+ def get_zip_download_link(zip_file):
503
+ with open(zip_file, 'rb') as f:
504
+ data = f.read()
505
+ b64 = base64.b64encode(data).decode()
506
+ return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
507
+
508
+ @st.cache_resource
509
+ def get_table_download_link(file_path):
510
+ with open(file_path, 'rb') as f:
511
+ data = f.read()
512
+ b64 = base64.b64encode(data).decode()
513
+ file_name = os.path.basename(file_path)
514
+ ext = os.path.splitext(file_name)[1].lower()
515
+ mime_type = "text/markdown" if ext == ".md" else "application/pdf" if ext == ".pdf" else "image/png" if ext in [".png", ".jpg", ".jpeg"] else "audio/wav" if ext == ".wav" else "audio/mpeg" if ext == ".mp3" else "video/mp4" if ext == ".mp4" else "application/octet-stream"
516
+ return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>'
517
+
518
+ # Main Function
519
+ def main():
520
+ st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, Video & RAG")
521
+ model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"]
522
+ st.session_state["openai_model"] = st.selectbox("Select GPT Model", model_options, index=0)
523
+
524
+ option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
525
+
526
+ if option == "Text":
527
+ default_text = "Create a summary of PDF py libraries and usage in py with emojis in markdown. Maybe a buckeyball feature rating comparing them against each other in markdown emoji outline or tables."
528
+ col1, col2 = st.columns([1, 5])
529
+ with col1:
530
+ if st.button("πŸ“ MD", key="md_button"):
531
+ st.session_state["text_input"] = default_text
532
+ with st.spinner("Processing..."):
533
+ process_text(default_text)
534
+ st.rerun()
535
+ with col2:
536
+ text_input = st.text_input("Enter your text:", value=st.session_state.get("text_input", ""), key="text_input_field")
537
+ if text_input and st.button("Submit Text"):
538
+ with st.spinner("Processing..."):
539
+ process_text(text_input)
540
+ st.rerun()
541
+
542
+ elif option == "Image":
543
+ col1, col2 = st.columns(2)
544
+ with col1:
545
+ if st.button("πŸ“ Describe"):
546
+ st.session_state["image_prompt"] = "Describe this image and list ten facts in a markdown outline with emojis."
547
+ with col2:
548
+ if st.button("πŸ” OCR"):
549
+ st.session_state["image_prompt"] = "Show electronic text of text in the image."
550
+ text_input = st.text_input("Image Prompt:", value=st.session_state.get("image_prompt", "Describe this image and list ten facts in a markdown outline with emojis."))
551
+ image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
552
+ if image_input and text_input and st.button("Submit Image"):
553
+ if image_input.size > 200 * 1024 * 1024:
554
+ st.error("Image exceeds 200MB limit.")
555
+ else:
556
+ with st.spinner("Processing..."):
557
+ image_response = process_image(image_input, text_input)
558
+ with st.chat_message("ai", avatar="πŸ¦–"):
559
+ st.markdown(image_response)
560
+ st.rerun()
561
+
562
+ elif option == "Audio":
563
+ text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.")
564
+ audio_input = st.file_uploader("Upload an audio file (max 200MB)", type=["mp3", "wav", "flac", "m4a"], accept_multiple_files=False)
565
+ audio_bytes = audio_recorder()
566
+ if audio_bytes and text_input and st.button("Submit Audio Recording"):
567
+ with open("recorded_audio.wav", "wb") as f:
568
+ f.write(audio_bytes)
569
+ with st.spinner("Processing..."):
570
+ process_audio(audio_bytes, text_input)
571
+ st.rerun()
572
+ elif audio_input and text_input and st.button("Submit Audio File"):
573
+ with st.spinner("Processing..."):
574
+ process_audio(audio_input, text_input)
575
+ st.rerun()
576
+
577
+ elif option == "Video":
578
+ text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.")
579
+ video_input = st.file_uploader("Upload a video file (max 200MB)", type=["mp4"], accept_multiple_files=False)
580
+ if video_input and text_input and st.button("Submit Video"):
581
+ if video_input.size > 200 * 1024 * 1024:
582
+ st.error("Video exceeds 200MB limit.")
583
+ else:
584
+ with st.spinner("Processing..."):
585
+ process_audio_and_video(video_input)
586
+ st.rerun()
587
+
588
+ elif option == "ArXiv Search":
589
+ query = st.text_input("AI Search ArXiv Scholarly Articles:")
590
+ if query and st.button("Search ArXiv"):
591
+ with st.spinner("Searching ArXiv..."):
592
+ result = search_arxiv(query)
593
+ st.markdown(result)
594
+ st.rerun()
595
+
596
+ elif option == "RAG PDF Gallery":
597
+ rag_pdf_gallery()
598
+
599
+ # Chat Display and Input
600
+ for message in st.session_state.messages:
601
+ with st.chat_message(message["role"]):
602
+ st.markdown(message["content"])
603
+
604
+ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
605
+ with st.spinner("Processing..."):
606
+ process_text(prompt)
607
+ st.rerun()
608
+
609
+ FileSidebar()
610
+ main()