awacke1 commited on
Commit
5c15f6d
·
verified ·
1 Parent(s): 03c4954

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +372 -564
app.py CHANGED
@@ -5,18 +5,17 @@ import json
5
  import math
6
  import os
7
  import pytz
8
- import random
9
  import re
10
- import requests
11
- import streamlit as st
12
- import streamlit.components.v1 as components
13
- import textract
14
  import time
15
  import zipfile
 
 
 
16
  from concurrent.futures import ThreadPoolExecutor
17
  from tqdm import tqdm
18
  import concurrent
19
 
 
20
  from audio_recorder_streamlit import audio_recorder
21
  from bs4 import BeautifulSoup
22
  from collections import deque
@@ -24,587 +23,396 @@ from datetime import datetime
24
  from dotenv import load_dotenv
25
  from gradio_client import Client
26
  from io import BytesIO
27
- from moviepy import VideoFileClip
28
  from PIL import Image
29
  from PyPDF2 import PdfReader
30
- from templates import bot_template, css, user_template
31
- from urllib.parse import quote
32
- from xml.etree import ElementTree as ET
33
 
 
34
  import openai
35
  from openai import OpenAI
36
  import pandas as pd
37
 
38
- # Configuration
39
- Site_Name = 'Scholarly-Article-Document-Search-With-Memory'
40
- title = "🔬🧠ScienceBrain.AI"
41
- helpURL = 'https://huggingface.co/awacke1'
42
- bugURL = 'https://huggingface.co/spaces/awacke1'
43
- icons = Image.open("icons.ico")
44
- st.set_page_config(
45
- page_title=title,
46
- page_icon=icons,
47
- layout="wide",
48
- initial_sidebar_state="auto",
49
- menu_items={'Get Help': helpURL, 'Report a bug': bugURL, 'About': title}
50
- )
51
-
52
- # API Configuration
53
- API_KEY = os.getenv('API_KEY')
54
- HF_KEY = os.getenv('HF_KEY')
55
- headers = {"Authorization": f"Bearer {HF_KEY}", "Content-Type": "application/json"}
56
- key = os.getenv('OPENAI_API_KEY')
57
- client = OpenAI(api_key=key, organization=os.getenv('OPENAI_ORG_ID'))
58
- MODEL = "gpt-4o-2024-05-13"
59
- if "openai_model" not in st.session_state:
60
- st.session_state["openai_model"] = MODEL
61
- if "messages" not in st.session_state:
62
- st.session_state.messages = []
63
- if st.button("Clear Session"):
64
- st.session_state.messages = []
65
-
66
- # Sidebar Options
67
- should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
68
-
69
- # HTML5 Speech Synthesis
70
- @st.cache_resource
71
- def SpeechSynthesis(result):
72
- documentHTML5 = '''
73
- <!DOCTYPE html>
74
- <html>
75
- <head>
76
- <title>Read It Aloud</title>
77
- <script type="text/javascript">
78
- function readAloud() {
79
- const text = document.getElementById("textArea").value;
80
- const speech = new SpeechSynthesisUtterance(text);
81
- window.speechSynthesis.speak(speech);
82
- }
83
- </script>
84
- </head>
85
- <body>
86
- <h1>🔊 Read It Aloud</h1>
87
- <textarea id="textArea" rows="10" cols="80">
88
- '''
89
- documentHTML5 += result + '''
90
- </textarea>
91
- <br>
92
- <button onclick="readAloud()">🔊 Read Aloud</button>
93
- </body>
94
- </html>
95
- '''
96
- components.html(documentHTML5, width=1280, height=300)
97
-
98
- # File Naming and Saving
99
- def generate_filename(prompt, file_type, original_name=None):
100
- central = pytz.timezone('US/Central')
101
- safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
102
- safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50]
103
- if original_name and file_type == "md": # For images
104
- base_name = os.path.splitext(original_name)[0]
105
- file_stem = f"{safe_date_time}_{safe_prompt}_{base_name}"[:100] # Cap at 100 chars
106
- return f"{file_stem}.{file_type}"
107
- file_stem = f"{safe_date_time}_{safe_prompt}"[:100] # Cap at 100 chars
108
- return f"{file_stem}.{file_type}"
109
-
110
- def create_and_save_file(content, file_type="md", prompt=None, original_name=None, should_save=True):
111
- if not should_save:
112
- return None
113
- filename = generate_filename(prompt, file_type, original_name)
114
- with open(filename, "w", encoding="utf-8") as f:
115
- f.write(content if not prompt else prompt + "\n\n" + content)
116
- return filename
117
-
118
- # Text Processing
119
- def process_text(text_input):
120
- if text_input:
121
- st.session_state.messages.append({"role": "user", "content": text_input})
122
- with st.chat_message("user"):
123
- st.markdown(text_input)
124
- with st.chat_message("assistant"):
125
- completion = client.chat.completions.create(
126
- model=st.session_state["openai_model"],
127
- messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
128
- stream=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  )
130
- response = completion.choices[0].message.content
131
- st.markdown(response)
132
- filename = generate_filename(text_input, "md")
133
- create_and_save_file(response, "md", text_input, should_save=should_save)
134
- st.session_state.messages.append({"role": "assistant", "content": response})
135
-
136
- # Image Processing
137
- def process_image(image_input, user_prompt):
138
- original_name = image_input.name
139
- image_bytes = image_input.read()
140
- with open(original_name, "wb") as f:
141
- f.write(image_bytes) # Save original image
142
- base64_image = base64.b64encode(image_bytes).decode("utf-8")
143
- response = client.chat.completions.create(
144
- model=st.session_state["openai_model"],
145
- messages=[
146
- {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
147
- {"role": "user", "content": [
148
- {"type": "text", "text": user_prompt},
149
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
150
- ]}
151
- ],
152
- temperature=0.0
153
- )
154
- image_response = response.choices[0].message.content
155
- filename = generate_filename(user_prompt, "md", original_name) # Include prompt in filename
156
- create_and_save_file(image_response, "md", user_prompt, original_name, should_save=should_save)
157
- return image_response
158
-
159
- # Audio Processing
160
- def process_audio(audio_input, text_input=''):
161
- if audio_input:
162
- audio_bytes = audio_input if isinstance(audio_input, bytes) else audio_input.read()
163
- supported_formats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
164
- file_ext = "wav" if isinstance(audio_input, bytes) else os.path.splitext(audio_input.name)[1][1:].lower()
165
- if file_ext not in supported_formats:
166
- st.error(f"Unsupported format: {file_ext}. Supported formats: {supported_formats}")
167
- return
168
- if len(audio_bytes) > 200 * 1024 * 1024: # 200MB limit
169
- st.error("File exceeds 200MB limit.")
170
- return
171
- with st.spinner("Transcribing audio..."):
172
- try:
173
- transcription = client.audio.transcriptions.create(
174
- model="whisper-1",
175
- file=BytesIO(audio_bytes)
176
- ).text
177
- st.session_state.messages.append({"role": "user", "content": transcription})
178
- with st.chat_message("user"):
179
- st.markdown(transcription)
180
- with st.chat_message("assistant"):
181
- completion = client.chat.completions.create(
182
- model=st.session_state["openai_model"],
183
- messages=[{"role": "user", "content": text_input + "\n\nTranscription: " + transcription}]
184
- )
185
- response = completion.choices[0].message.content
186
- st.markdown(response)
187
- filename = generate_filename(transcription, "md")
188
- create_and_save_file(response, "md", text_input, should_save=should_save)
189
- st.session_state.messages.append({"role": "assistant", "content": response})
190
- except openai.BadRequestError as e:
191
- st.error(f"Audio processing error: {str(e)}")
192
-
193
- # Video Processing
194
- def save_video(video_input):
195
- with open(video_input.name, "wb") as f:
196
- f.write(video_input.read())
197
- return video_input.name
198
-
199
- def process_video(video_path, seconds_per_frame=2):
200
- base64Frames = []
201
- base_video_path, _ = os.path.splitext(video_path)
202
- video = cv2.VideoCapture(video_path)
203
- total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
204
- fps = video.get(cv2.CAP_PROP_FPS)
205
- frames_to_skip = int(fps * seconds_per_frame)
206
- curr_frame = 0
207
- while curr_frame < total_frames - 1:
208
- video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
209
- success, frame = video.read()
210
- if not success:
211
- break
212
- _, buffer = cv2.imencode(".jpg", frame)
213
- base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
214
- curr_frame += frames_to_skip
215
- video.release()
216
- audio_path = f"{base_video_path}.mp3"
217
- try:
218
- clip = VideoFileClip(video_path)
219
- if clip.audio:
220
- clip.audio.write_audiofile(audio_path, bitrate="32k")
221
- clip.audio.close()
222
- clip.close()
223
- except Exception as e:
224
- st.warning(f"No audio track found or error: {str(e)}")
225
- audio_path = None
226
- return base64Frames, audio_path
227
-
228
- def process_audio_and_video(video_input):
229
- if video_input:
230
- video_path = save_video(video_input)
231
- with st.spinner("Extracting frames and audio..."):
232
- base64Frames, audio_path = process_video(video_path)
233
- if audio_path:
234
- with st.spinner("Transcribing video audio..."):
235
- try:
236
- with open(audio_path, "rb") as audio_file:
237
- transcript = client.audio.transcriptions.create(
238
- model="whisper-1",
239
- file=audio_file
240
- ).text
241
- with st.chat_message("user"):
242
- st.markdown(f"Video Transcription: {transcript}")
243
- with st.chat_message("assistant"):
244
- response = client.chat.completions.create(
245
- model=st.session_state["openai_model"],
246
- messages=[
247
- {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
248
- {"role": "user", "content": [
249
- "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, base64Frames),
250
- {"type": "text", "text": f"Transcription: {transcript}"}
251
- ]}
252
- ]
253
- )
254
- result = response.choices[0].message.content
255
- st.markdown(result)
256
- filename = generate_filename(transcript, "md")
257
- create_and_save_file(result, "md", "Video summary", should_save=should_save)
258
- except openai.BadRequestError as e:
259
- st.error(f"Video audio processing error: {str(e)}")
260
- else:
261
- st.warning("No audio to transcribe.")
262
-
263
- # ArXiv Search
264
- def search_arxiv(query):
265
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
266
- response = client.predict(
267
- message=query,
268
- llm_results_use=5,
269
- database_choice="Semantic Search",
270
- llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
271
- api_name="/update_with_rag_md"
272
- )
273
- result = response[0] + response[1]
274
- filename = generate_filename(query, "md")
275
- create_and_save_file(result, "md", query, should_save=should_save)
276
- st.session_state.messages.append({"role": "assistant", "content": result})
277
- return result
278
-
279
- # RAG PDF Gallery
280
- def upload_pdf_files_to_vector_store(vector_store_id, pdf_files):
281
- stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}
282
- def upload_single_pdf(file_path):
283
- file_name = os.path.basename(file_path)
284
  try:
285
- with open(file_path, "rb") as f:
286
- file_response = client.files.create(file=f, purpose="assistants")
287
- client.vector_stores.files.create(vector_store_id=vector_store_id, file_id=file_response.id)
288
- return {"file": file_name, "status": "success"}
289
- except Exception as e:
290
- return {"file": file_name, "status": "failed", "error": str(e)}
291
- with ThreadPoolExecutor(max_workers=5) as executor:
292
- futures = [executor.submit(upload_single_pdf, f) for f in pdf_files]
293
- for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
294
- result = future.result()
295
- if result["status"] == "success":
296
- stats["successful_uploads"] += 1
297
- else:
298
- stats["failed_uploads"] += 1
299
- stats["errors"].append(result)
300
- return stats
301
-
302
- def create_vector_store(store_name):
303
- vector_store = client.vector_stores.create(name=store_name)
304
- return {"id": vector_store.id, "name": vector_store.name, "created_at": vector_store.created_at, "file_count": vector_store.file_counts.completed}
305
-
306
- def generate_questions(pdf_path):
307
- text = ""
308
- with open(pdf_path, "rb") as f:
309
- pdf = PdfReader(f)
310
- for page in pdf.pages:
311
- text += page.extract_text() or ""
312
- prompt = f"Generate a 10-question quiz with answers based only on this document. Format as markdown with numbered questions and answers:\n{text[:2000]}\n\n"
313
- response = client.chat.completions.create(
314
- model="gpt-4o-2024-05-13",
315
- messages=[{"role": "user", "content": prompt}]
316
- )
317
- return response.choices[0].message.content
318
-
319
- def process_rag_query(query, vector_store_id):
320
- try:
321
- response = client.chat.completions.create(
322
- model="gpt-4o-2024-05-13",
323
- messages=[{"role": "user", "content": query}],
324
- tools=[{
325
- "type": "file_search",
326
- "file_search": {
327
- "vector_store_ids": [vector_store_id]
328
- }
329
- }],
330
- tool_choice="auto"
331
- )
332
- tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else []
333
- return response.choices[0].message.content, tool_calls
334
- except openai.BadRequestError as e:
335
- st.error(f"RAG query error: {str(e)}")
336
- return None, []
337
-
338
- def evaluate_rag(vector_store_id, questions_dict):
339
- k = 5
340
- total_queries = len(questions_dict) * 10 # 10 questions per PDF
341
- correct_retrievals_at_k = 0
342
- reciprocal_ranks = []
343
- average_precisions = []
344
-
345
- for filename, quiz in questions_dict.items():
346
- questions = re.findall(r"\d+\.\s(.*?)\n\s*Answer:\s(.*?)\n", quiz, re.DOTALL)
347
- for question, _ in questions:
348
- expected_file = filename
349
- response, tool_calls = process_rag_query(question, vector_store_id)
350
- if not tool_calls:
351
- continue
352
- retrieved_files = [call.arguments.get("file_id", "") for call in tool_calls if "file_search" in call.type][:k]
353
- if expected_file in retrieved_files:
354
- rank = retrieved_files.index(expected_file) + 1
355
- correct_retrievals_at_k += 1
356
- reciprocal_ranks.append(1 / rank)
357
- precisions = [1 if f == expected_file else 0 for f in retrieved_files[:rank]]
358
- average_precisions.append(sum(precisions) / len(precisions))
359
  else:
360
- reciprocal_ranks.append(0)
361
- average_precisions.append(0)
362
-
363
- recall_at_k = correct_retrievals_at_k / total_queries if total_queries else 0
364
- mrr = sum(reciprocal_ranks) / total_queries if total_queries else 0
365
- map_score = sum(average_precisions) / total_queries if total_queries else 0
366
- return {"recall@k": recall_at_k, "mrr": mrr, "map": map_score, "k": k}
367
-
368
- def rag_pdf_gallery():
369
- st.subheader("RAG PDF Gallery")
370
- pdf_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
371
- if pdf_files:
372
- pdf_paths = [save_video(f) for f in pdf_files] # Reuse save_video for simplicity
373
- with st.spinner("Creating vector store..."):
374
- vector_store_details = create_vector_store("PDF_Gallery_Store")
375
- stats = upload_pdf_files_to_vector_store(vector_store_details["id"], pdf_paths)
376
- st.json(stats)
377
-
378
- col1, col2, col3 = st.columns(3)
379
- with col1:
380
- if st.button("📝 Quiz"):
381
- st.session_state["rag_prompt"] = "Generate a 10-question quiz with answers based only on this document."
382
- with col2:
383
- if st.button("📑 Summary"):
384
- st.session_state["rag_prompt"] = "Summarize this per page and output as markdown outline with emojis and numbered outline with multiple levels summarizing everything unique per page in method steps or fact steps."
385
- with col3:
386
- if st.button("🔍 Key Facts"):
387
- st.session_state["rag_prompt"] = "Extract 10 key facts from this document in markdown with emojis."
388
-
389
- with st.spinner("Generating questions..."):
390
- questions_dict = {os.path.basename(p): generate_questions(p) for p in pdf_paths}
391
- st.markdown("### Generated Quiz")
392
- for filename, quiz in questions_dict.items():
393
- st.markdown(f"#### {filename}")
394
- st.markdown(quiz)
395
-
396
- query = st.text_input("Ask a question about the PDFs:", value=st.session_state.get("rag_prompt", ""))
397
- if query and st.button("Submit RAG Query"):
398
- with st.spinner("Processing RAG query..."):
399
- response, tool_calls = process_rag_query(query, vector_store_details["id"])
400
- if response:
401
- st.markdown(response)
402
- st.write("Retrieved chunks:")
403
- for call in tool_calls:
404
- if "file_search" in call.type:
405
- st.json(call.arguments)
406
- st.rerun()
407
 
408
- if st.button("Evaluate RAG Performance"):
409
- with st.spinner("Evaluating..."):
410
- metrics = evaluate_rag(vector_store_details["id"], questions_dict)
411
- st.json(metrics)
412
-
413
- # File Sidebar
414
- def FileSidebar():
415
- st.sidebar.title("File Operations")
416
- default_types = [".md", ".png", ".pdf"]
417
- file_types = st.sidebar.multiselect("Filter by type", [".md", ".wav", ".png", ".mp4", ".mp3", ".pdf"], default=default_types)
418
- all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1] in file_types and len(os.path.splitext(f)[0]) >= 10]
419
- all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
420
-
421
- if st.sidebar.button("🗑 Delete All Filtered"):
422
- for file in all_files:
423
- os.remove(file)
424
- st.rerun()
425
-
426
- if st.sidebar.button("⬇️ Download All Filtered"):
427
- zip_file = create_zip_of_files(all_files)
428
- st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
429
 
430
- for file in all_files:
431
- ext = os.path.splitext(file)[1].lower()
432
- col1, col2, col3, col4, col5 = st.sidebar.columns([1, 6, 1, 1, 1])
433
- colFollowUp = "" # Flag to trigger main-area display
434
-
435
- with col1: # View
436
- icon = "📜" if ext == ".md" else "📄" if ext == ".pdf" else "🖼️" if ext in [".png", ".jpg", ".jpeg"] else "🎵" if ext in [".wav", ".mp3"] else "🎥" if ext == ".mp4" else "📎"
437
- if st.button(icon, key=f"view_{file}"):
438
- colFollowUp = "view_" + ext
439
- with open(file, "rb") as f:
440
- content = f.read()
 
 
 
 
441
 
442
- with col2: # Download link
443
- st.markdown(get_table_download_link(file), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
- with col3: # Open
446
- if st.button("📂", key=f"open_{file}"):
447
- colFollowUp = "open_" + ext
448
- with open(file, "rb") as f:
449
- content = f.read()
450
 
451
- with col4: # Run
452
- if st.button("▶️", key=f"run_{file}"):
453
- if ext == ".md":
454
- colFollowUp = "run_" + ext
455
- with open(file, "rb") as f:
456
- content = f.read()
 
 
 
 
 
457
 
458
- with col5: # Delete
459
- if st.button("🗑", key=f"delete_{file}"):
460
- os.remove(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  st.rerun()
 
 
 
 
 
462
 
463
- # Display in main area based on colFollowUp
464
- if colFollowUp.startswith("view_"):
465
- if ext == ".md":
466
- st.markdown(content.decode("utf-8"))
467
- SpeechSynthesis(content.decode("utf-8"))
468
- elif ext == ".pdf":
469
- st.download_button("Download PDF", content, file, "application/pdf")
470
- st.write("PDF Viewer not natively supported; download to view.")
471
- elif ext in [".png", ".jpg", ".jpeg"]:
472
- st.image(content, use_column_width=True)
473
- elif ext in [".wav", ".mp3"]:
474
- st.audio(content, format=f"audio/{ext[1:]}")
475
- elif ext == ".mp4":
476
- st.video(content, format="video/mp4")
477
-
478
- elif colFollowUp.startswith("open_"):
479
- if ext == ".md":
480
- st.text_area(f"Editing {file}", value=content.decode("utf-8"), height=300, key=f"edit_{file}")
481
- elif ext == ".pdf":
482
- st.download_button("Download PDF to Edit", content, file, "application/pdf")
483
- st.write("PDF editing not supported in-app; download to edit externally.")
484
- elif ext in [".png", ".jpg", ".jpeg"]:
485
- st.image(content, use_column_width=True, caption=f"Viewing {file}")
486
- elif ext in [".wav", ".mp3"]:
487
- st.audio(content, format=f"audio/{ext[1:]}")
488
- elif ext == ".mp4":
489
- st.video(content, format="video/mp4")
490
-
491
- elif colFollowUp.startswith("run_"):
492
- if ext == ".md":
493
- process_text(content.decode("utf-8"))
494
-
495
- def create_zip_of_files(files):
496
- zip_name = "Files.zip"
497
- with zipfile.ZipFile(zip_name, 'w') as zipf:
498
- for file in files:
499
- zipf.write(file)
500
- return zip_name
501
-
502
- def get_zip_download_link(zip_file):
503
- with open(zip_file, 'rb') as f:
504
- data = f.read()
505
- b64 = base64.b64encode(data).decode()
506
- return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
507
-
508
- @st.cache_resource
509
- def get_table_download_link(file_path):
510
- with open(file_path, 'rb') as f:
511
- data = f.read()
512
- b64 = base64.b64encode(data).decode()
513
- file_name = os.path.basename(file_path)
514
- ext = os.path.splitext(file_name)[1].lower()
515
- mime_type = "text/markdown" if ext == ".md" else "application/pdf" if ext == ".pdf" else "image/png" if ext in [".png", ".jpg", ".jpeg"] else "audio/wav" if ext == ".wav" else "audio/mpeg" if ext == ".mp3" else "video/mp4" if ext == ".mp4" else "application/octet-stream"
516
- return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>'
517
-
518
- # Main Function
519
- def main():
520
- st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, Video & RAG")
521
- model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"]
522
- st.session_state["openai_model"] = st.selectbox("Select GPT Model", model_options, index=0)
523
-
524
- option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
525
-
526
- if option == "Text":
527
- default_text = "Create a summary of PDF py libraries and usage in py with emojis in markdown. Maybe a buckeyball feature rating comparing them against each other in markdown emoji outline or tables."
528
- col1, col2 = st.columns([1, 5])
529
- with col1:
530
- if st.button("📝 MD", key="md_button"):
531
- st.session_state["text_input"] = default_text
532
- with st.spinner("Processing..."):
533
- process_text(default_text)
534
- st.rerun()
535
- with col2:
536
- text_input = st.text_input("Enter your text:", value=st.session_state.get("text_input", ""), key="text_input_field")
537
- if text_input and st.button("Submit Text"):
538
- with st.spinner("Processing..."):
539
- process_text(text_input)
540
- st.rerun()
541
-
542
- elif option == "Image":
543
- col1, col2 = st.columns(2)
544
- with col1:
545
- if st.button("📝 Describe"):
546
- st.session_state["image_prompt"] = "Describe this image and list ten facts in a markdown outline with emojis."
547
- with col2:
548
- if st.button("🔍 OCR"):
549
- st.session_state["image_prompt"] = "Show electronic text of text in the image."
550
- text_input = st.text_input("Image Prompt:", value=st.session_state.get("image_prompt", "Describe this image and list ten facts in a markdown outline with emojis."))
551
- image_input = st.file_uploader("Upload an image (max 200MB)", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
552
- if image_input and text_input and st.button("Submit Image"):
553
- if image_input.size > 200 * 1024 * 1024:
554
- st.error("Image exceeds 200MB limit.")
555
- else:
556
- with st.spinner("Processing..."):
557
- image_response = process_image(image_input, text_input)
558
- with st.chat_message("ai", avatar="🦖"):
559
- st.markdown(image_response)
560
- st.rerun()
561
-
562
- elif option == "Audio":
563
- text_input = st.text_input("Audio Prompt:", value="Summarize this audio transcription in Markdown.")
564
- audio_input = st.file_uploader("Upload an audio file (max 200MB)", type=["mp3", "wav", "flac", "m4a"], accept_multiple_files=False)
565
- audio_bytes = audio_recorder()
566
- if audio_bytes and text_input and st.button("Submit Audio Recording"):
567
- with open("recorded_audio.wav", "wb") as f:
568
- f.write(audio_bytes)
569
- with st.spinner("Processing..."):
570
- process_audio(audio_bytes, text_input)
571
  st.rerun()
572
- elif audio_input and text_input and st.button("Submit Audio File"):
573
- with st.spinner("Processing..."):
574
- process_audio(audio_input, text_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  st.rerun()
576
 
577
- elif option == "Video":
578
- text_input = st.text_input("Video Prompt:", value="Summarize this video and its transcription in Markdown.")
579
- video_input = st.file_uploader("Upload a video file (max 200MB)", type=["mp4"], accept_multiple_files=False)
580
- if video_input and text_input and st.button("Submit Video"):
581
- if video_input.size > 200 * 1024 * 1024:
582
- st.error("Video exceeds 200MB limit.")
583
- else:
584
- with st.spinner("Processing..."):
585
- process_audio_and_video(video_input)
586
- st.rerun()
587
-
588
- elif option == "ArXiv Search":
589
- query = st.text_input("AI Search ArXiv Scholarly Articles:")
590
- if query and st.button("Search ArXiv"):
591
- with st.spinner("Searching ArXiv..."):
592
- result = search_arxiv(query)
593
- st.markdown(result)
 
594
  st.rerun()
595
-
596
- elif option == "RAG PDF Gallery":
597
- rag_pdf_gallery()
598
 
599
- # Chat Display and Input
600
- for message in st.session_state.messages:
601
- with st.chat_message(message["role"]):
602
- st.markdown(message["content"])
 
603
 
604
- if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
605
- with st.spinner("Processing..."):
606
- process_text(prompt)
607
- st.rerun()
 
608
 
609
- FileSidebar()
610
- main()
 
 
 
5
  import math
6
  import os
7
  import pytz
 
8
  import re
 
 
 
 
9
  import time
10
  import zipfile
11
+ import asyncio
12
+ import streamlit as st
13
+ import streamlit.components.v1 as components
14
  from concurrent.futures import ThreadPoolExecutor
15
  from tqdm import tqdm
16
  import concurrent
17
 
18
+ # Foundational Imports
19
  from audio_recorder_streamlit import audio_recorder
20
  from bs4 import BeautifulSoup
21
  from collections import deque
 
23
  from dotenv import load_dotenv
24
  from gradio_client import Client
25
  from io import BytesIO
26
+ from moviepy.editor import VideoFileClip
27
  from PIL import Image
28
  from PyPDF2 import PdfReader
 
 
 
29
 
30
+ # OpenAI & Data Handling
31
  import openai
32
  from openai import OpenAI
33
  import pandas as pd
34
 
35
+ # Load environment variables
36
+ load_dotenv()
37
+
38
+ # --- Core Classes for Functionality ---
39
+
40
+ class PerformanceTracker:
41
+ """Tracks and displays the performance of executed tasks."""
42
+ def track(self, model_name_provider):
43
+ # ⏱️ Times our functions and brags about how fast they are.
44
+ def decorator(func):
45
+ def wrapper(*args, **kwargs):
46
+ start_time = time.time()
47
+
48
+ # Execute the function in a thread pool for non-blocking UI
49
+ with ThreadPoolExecutor() as executor:
50
+ future = executor.submit(func, *args, **kwargs)
51
+ result = future.result() # Wait for the function to complete
52
+
53
+ end_time = time.time()
54
+ duration = end_time - start_time
55
+ model_used = model_name_provider() if callable(model_name_provider) else model_name_provider
56
+
57
+ st.success(f" **Execution Complete!**")
58
+ st.info(f"Model: `{model_used}` | Runtime: `{duration:.2f} seconds`")
59
+ return result
60
+ return wrapper
61
+ return decorator
62
+
63
+ class FileHandler:
64
+ """Manages all file system operations like naming, saving, and zipping."""
65
+ def __init__(self, should_save=True):
66
+ # 🗂️ I'm the librarian for all your digital stuff.
67
+ self.should_save = should_save
68
+ self.central_tz = pytz.timezone('US/Central')
69
+
70
+ def generate_filename(self, prompt, file_type, original_name=None):
71
+ # 🏷️ Slapping a unique, SFW name on your file so you can find it later.
72
+ safe_date_time = datetime.now(self.central_tz).strftime("%m%d_%H%M")
73
+ safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50]
74
+ file_stem = f"{safe_date_time}_{safe_prompt}"
75
+ if original_name:
76
+ base_name = os.path.splitext(original_name)[0]
77
+ file_stem = f"{file_stem}_{base_name}"
78
+ return f"{file_stem[:100]}.{file_type}"
79
+
80
+ def save_file(self, content, filename, prompt=None):
81
+ # 💾 Saving your masterpiece before you accidentally delete it.
82
+ if not self.should_save:
83
+ return None
84
+ with open(filename, "w", encoding="utf-8") as f:
85
+ if prompt:
86
+ f.write(prompt + "\n\n")
87
+ f.write(content)
88
+ return filename
89
+
90
+ def save_uploaded_file(self, uploaded_file):
91
+ # 📥 Taking your uploaded file and tucking it safely on the server.
92
+ path = os.path.join(uploaded_file.name)
93
+ with open(path, "wb") as f:
94
+ f.write(uploaded_file.getvalue())
95
+ return path
96
+
97
+ def create_zip_archive(self, files_to_zip):
98
+ # 🤐 Zipping up your files nice and tight.
99
+ zip_path = "Filtered_Files.zip"
100
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
101
+ for file in files_to_zip:
102
+ zipf.write(file)
103
+ return zip_path
104
+
105
+ @st.cache_data
106
+ def get_base64_download_link(_self, file_path, link_text, mime_type):
107
+ # 🔗 Creating a magical link to download your file.
108
+ with open(file_path, 'rb') as f:
109
+ data = f.read()
110
+ b64 = base64.b64encode(data).decode()
111
+ return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
112
+
113
+ class OpenAIProcessor:
114
+ """Handles all interactions with the OpenAI API."""
115
+ def __init__(self, api_key, org_id, model):
116
+ # 🤖 I'm the brainiac talking to the OpenAI overlords.
117
+ self.client = OpenAI(api_key=api_key, organization=org_id)
118
+ self.model = model
119
+
120
+ def execute_text_completion(self, messages):
121
+ # ✍️ Turning your prompts into pure AI gold.
122
+ completion = self.client.chat.completions.create(
123
+ model=self.model,
124
+ messages=[{"role": m["role"], "content": m["content"]} for m in messages],
125
+ stream=False
126
+ )
127
+ return completion.choices[0].message.content
128
+
129
+ def execute_image_completion(self, prompt, image_bytes):
130
+ # 🖼️ Analyzing your pics with my digital eyeballs.
131
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
132
+ response = self.client.chat.completions.create(
133
+ model=self.model,
134
+ messages=[
135
+ {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
136
+ {"role": "user", "content": [
137
+ {"type": "text", "text": prompt},
138
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
139
+ ]}
140
+ ],
141
+ temperature=0.0
142
+ )
143
+ return response.choices[0].message.content
144
+
145
+ def execute_video_completion(self, frames, transcript):
146
+ # 🎬 Watching your video and giving you the summary, so you don't have to.
147
+ response = self.client.chat.completions.create(
148
+ model=self.model,
149
+ messages=[
150
+ {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
151
+ {"role": "user", "content": [
152
+ "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, frames),
153
+ {"type": "text", "text": f"Transcription: {transcript}"}
154
+ ]}
155
+ ]
156
+ )
157
+ return response.choices[0].message.content
158
+
159
+ def transcribe_audio(self, audio_bytes):
160
+ # 🎤 I'm all ears... turning your sounds into words.
161
+ try:
162
+ transcription = self.client.audio.transcriptions.create(
163
+ model="whisper-1",
164
+ file=BytesIO(audio_bytes)
165
  )
166
+ return transcription.text
167
+ except openai.BadRequestError as e:
168
+ st.error(f"Audio processing error: {e}")
169
+ return None
170
+
171
+ class MediaProcessor:
172
+ """Handles processing of media files like video and audio."""
173
+ def extract_video_components(self, video_path, seconds_per_frame=2):
174
+ # ✂️ Chopping up your video into frames and snatching the audio.
175
+ base64Frames = []
176
+ video = cv2.VideoCapture(video_path)
177
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
178
+ fps = video.get(cv2.CAP_PROP_FPS)
179
+ frames_to_skip = int(fps * seconds_per_frame)
180
+ curr_frame = 0
181
+
182
+ while curr_frame < total_frames - 1:
183
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
184
+ success, frame = video.read()
185
+ if not success: break
186
+ _, buffer = cv2.imencode(".jpg", frame)
187
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
188
+ curr_frame += frames_to_skip
189
+ video.release()
190
+
191
+ audio_path = f"{os.path.splitext(video_path)[0]}.mp3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  try:
193
+ clip = VideoFileClip(video_path)
194
+ if clip.audio:
195
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  else:
197
+ audio_path = None
198
+ except Exception:
199
+ audio_path = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ return base64Frames, audio_path
202
+
203
+ class RAGManager:
204
+ """Manages Retrieval-Augmented Generation processes."""
205
+ def __init__(self, openai_client):
206
+ # 📚 Building a library and then acing the open-book test.
207
+ self.client = openai_client
208
+
209
+ def create_vector_store(self, name):
210
+ # 🗄️ Creating a shiny new digital filing cabinet.
211
+ vector_store = self.client.vector_stores.create(name=name)
212
+ return vector_store.id
 
 
 
 
 
 
 
 
 
213
 
214
+ # ... Other RAG methods would go here ...
215
+
216
+ class ExternalAPIHandler:
217
+ """Handles calls to external APIs like ArXiv."""
218
+ def search_arxiv(self, query):
219
+ # 👨‍🔬 Pestering the digital librarians at ArXiv for juicy papers.
220
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
221
+ response = client.predict(
222
+ message=query,
223
+ llm_results_use=5,
224
+ database_choice="Semantic Search",
225
+ llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
226
+ api_name="/update_with_rag_md"
227
+ )
228
+ return response[0] + response[1]
229
 
230
+ # --- Streamlit UI Class ---
231
+
232
+ class StreamlitUI:
233
+ """Main class to build and run the Streamlit user interface."""
234
+
235
+ def __init__(self):
236
+ # 🎨 I'm the artist painting your beautiful web app.
237
+ self.setup_page()
238
+ self.initialize_state()
239
+
240
+ # Initialize helper classes
241
+ self.file_handler = FileHandler(should_save=st.session_state.should_save)
242
+ self.openai_processor = OpenAIProcessor(
243
+ api_key=os.getenv('OPENAI_API_KEY'),
244
+ org_id=os.getenv('OPENAI_ORG_ID'),
245
+ model=st.session_state.openai_model
246
+ )
247
+ self.media_processor = MediaProcessor()
248
+ self.external_api_handler = ExternalAPIHandler()
249
+ # Initialize performance tracker
250
+ global performance_tracker
251
+ performance_tracker = PerformanceTracker()
252
+
253
+
254
+ def setup_page(self):
255
+ # ✨ Setting the stage for our amazing app.
256
+ st.set_page_config(
257
+ page_title="🔬🧠ScienceBrain.AI",
258
+ page_icon=Image.open("icons.ico"),
259
+ layout="wide",
260
+ initial_sidebar_state="auto",
261
+ menu_items={
262
+ 'Get Help': 'https://huggingface.co/awacke1',
263
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
264
+ 'About': "🔬🧠ScienceBrain.AI"
265
+ }
266
+ )
267
+
268
+ def initialize_state(self):
269
+ # 📝 Keeping notes so we don't forget stuff between clicks.
270
+ if "openai_model" not in st.session_state:
271
+ st.session_state.openai_model = "gpt-4o-2024-05-13"
272
+ if "messages" not in st.session_state:
273
+ st.session_state.messages = []
274
+
275
+ def display_sidebar(self):
276
+ # 👈 Everything you see on the left? That's me.
277
+ st.sidebar.title("Configuration & Files")
278
+ st.session_state.should_save = st.sidebar.checkbox("💾 Save Session", value=True)
279
+ if st.sidebar.button("🗑️ Clear Chat History"):
280
+ st.session_state.messages = []
281
+ st.rerun()
282
+
283
+ st.sidebar.markdown("---")
284
+ # File management logic here...
285
+
286
+ def display_main_interface(self):
287
+ # 🖥️ This is the main event, the star of the show!
288
+ st.markdown("##### GPT-4o Omni: Text, Audio, Image, Video & RAG")
289
 
290
+ model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo"]
291
+ st.session_state.openai_model = st.selectbox(
292
+ "Select OpenAI Model", model_options, index=model_options.index(st.session_state.openai_model)
293
+ )
 
294
 
295
+ input_type = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "ArXiv Search", "RAG PDF Gallery"))
296
+
297
+ if input_type == "Text":
298
+ self.handle_text_input()
299
+ elif input_type == "Image":
300
+ self.handle_image_input()
301
+ elif input_type == "Video":
302
+ self.handle_video_input()
303
+ elif input_type == "ArXiv Search":
304
+ self.handle_arxiv_search()
305
+ # ... other handlers
306
 
307
+ def handle_text_input(self):
308
+ # 💬 You talk, I listen (and then make the AI talk back).
309
+ prompt = st.text_input("Enter your text prompt:", key="text_prompt")
310
+ if st.button("Submit Text", key="submit_text"):
311
+ if prompt:
312
+ st.session_state.messages.append({"role": "user", "content": prompt})
313
+ with st.chat_message("user"):
314
+ st.markdown(prompt)
315
+
316
+ with st.chat_message("assistant"):
317
+ with st.spinner("Thinking..."):
318
+ # Use the performance tracker decorator
319
+ @performance_tracker.track(lambda: self.openai_processor.model)
320
+ def run_completion():
321
+ return self.openai_processor.execute_text_completion(st.session_state.messages)
322
+
323
+ response = run_completion()
324
+ st.markdown(response)
325
+ st.session_state.messages.append({"role": "assistant", "content": response})
326
+ filename = self.file_handler.generate_filename(prompt, "md")
327
+ self.file_handler.save_file(response, filename, prompt=prompt)
328
  st.rerun()
329
+
330
+ def handle_image_input(self):
331
+ # 📸 Say cheese! Let's see what the AI thinks of your photo.
332
+ prompt = st.text_input("Enter a prompt for the image:", value="Describe this image in detail.")
333
+ uploaded_image = st.file_uploader("Upload an image:", type=["png", "jpg", "jpeg"])
334
 
335
+ if st.button("Submit Image") and uploaded_image and prompt:
336
+ with st.chat_message("user"):
337
+ st.image(uploaded_image, width=250)
338
+ st.markdown(prompt)
339
+
340
+ with st.chat_message("assistant"):
341
+ with st.spinner("Analyzing image..."):
342
+ image_bytes = uploaded_image.getvalue()
343
+
344
+ @performance_tracker.track(lambda: self.openai_processor.model)
345
+ def run_image_analysis():
346
+ return self.openai_processor.execute_image_completion(prompt, image_bytes)
347
+
348
+ response = run_image_analysis()
349
+ st.markdown(response)
350
+ filename = self.file_handler.generate_filename(prompt, "md", original_name=uploaded_image.name)
351
+ self.file_handler.save_file(response, filename, prompt=prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  st.rerun()
353
+
354
+ def handle_video_input(self):
355
+ # 📼 Roll the tape! Time to process that video.
356
+ prompt = st.text_input("Enter a prompt for the video:", value="Summarize the key events in this video.")
357
+ uploaded_video = st.file_uploader("Upload a video:", type=["mp4", "mov"])
358
+
359
+ if st.button("Submit Video") and uploaded_video and prompt:
360
+ with st.chat_message("user"):
361
+ st.markdown(f"Analyzing video: `{uploaded_video.name}` with prompt: `{prompt}`")
362
+
363
+ with st.chat_message("assistant"):
364
+ with st.spinner("Processing video... this may take a moment."):
365
+ video_path = self.file_handler.save_uploaded_file(uploaded_video)
366
+
367
+ @performance_tracker.track(lambda: self.openai_processor.model)
368
+ def run_video_analysis():
369
+ frames, audio_path = self.media_processor.extract_video_components(video_path)
370
+ transcript = "No audio found."
371
+ if audio_path:
372
+ with open(audio_path, "rb") as af:
373
+ transcript = self.openai_processor.transcribe_audio(af.read())
374
+
375
+ return self.openai_processor.execute_video_completion(frames, transcript)
376
+
377
+ response = run_video_analysis()
378
+ st.markdown(response)
379
+ filename = self.file_handler.generate_filename(prompt, "md", original_name=uploaded_video.name)
380
+ self.file_handler.save_file(response, filename, prompt=prompt)
381
  st.rerun()
382
 
383
+ def handle_arxiv_search(self):
384
+ # 🔬 Diving deep into the archives of science!
385
+ query = st.text_input("Search ArXiv for scholarly articles:")
386
+ if st.button("Search ArXiv") and query:
387
+ with st.chat_message("user"):
388
+ st.markdown(f"ArXiv Search: `{query}`")
389
+ with st.chat_message("assistant"):
390
+ with st.spinner("Searching ArXiv..."):
391
+
392
+ @performance_tracker.track("Mistral-7B-Instruct-v0.2") # Model is fixed for this endpoint
393
+ def run_arxiv_search():
394
+ return self.external_api_handler.search_arxiv(query)
395
+
396
+ response = run_arxiv_search()
397
+ st.markdown(response)
398
+ st.session_state.messages.append({"role": "assistant", "content": response})
399
+ filename = self.file_handler.generate_filename(query, "md")
400
+ self.file_handler.save_file(response, filename, prompt=query)
401
  st.rerun()
 
 
 
402
 
403
+ def display_chat_history(self):
404
+ # 📜 Let's review what we've talked about so far.
405
+ for message in st.session_state.messages:
406
+ with st.chat_message(message["role"]):
407
+ st.markdown(message["content"])
408
 
409
+ def run(self):
410
+ # ▶️ Lights, camera, action! Let's get this show on the road.
411
+ self.display_sidebar()
412
+ self.display_chat_history()
413
+ self.display_main_interface()
414
 
415
+ # --- Main Execution ---
416
+ if __name__ == "__main__":
417
+ app = StreamlitUI()
418
+ app.run()