awacke1 commited on
Commit
6e8c55b
·
verified ·
1 Parent(s): e04becd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +388 -561
app.py CHANGED
@@ -1,14 +1,9 @@
1
  import aiofiles
2
  import asyncio
3
  import base64
4
- import cv2
5
  import fitz
6
  import glob
7
- import io
8
- import json
9
  import logging
10
- import math
11
- import mistune
12
  import os
13
  import pandas as pd
14
  import pytz
@@ -17,34 +12,26 @@ import re
17
  import requests
18
  import shutil
19
  import streamlit as st
20
- import streamlit.components.v1 as components
21
- import sys
22
  import time
23
  import torch
24
  import zipfile
25
 
26
- from audio_recorder_streamlit import audio_recorder
27
- from bs4 import BeautifulSoup
28
- from collections import deque
29
- from contextlib import redirect_stdout
30
  from dataclasses import dataclass
31
  from datetime import datetime
32
  from diffusers import StableDiffusionPipeline
33
- from dotenv import load_dotenv
34
- from gradio_client import Client, handle_file
35
- from huggingface_hub import InferenceClient
36
  from io import BytesIO
37
- from moviepy import VideoFileClip
38
  from openai import OpenAI
39
  from PIL import Image
40
- from PyPDF2 import PdfReader
41
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
42
  from typing import Optional
43
- from urllib.parse import quote
44
- from xml.etree import ElementTree as ET
45
 
46
- client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
 
 
 
 
47
 
 
48
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
49
  logger = logging.getLogger(__name__)
50
  log_records = []
@@ -53,62 +40,37 @@ class LogCaptureHandler(logging.Handler):
53
  log_records.append(record)
54
  logger.addHandler(LogCaptureHandler())
55
 
 
56
  st.set_page_config(
57
- page_title="AI Multimodal Titan 🚀",
58
  page_icon="🤖",
59
  layout="wide",
60
  initial_sidebar_state="expanded",
61
  menu_items={
62
  'Get Help': 'https://huggingface.co/awacke1',
63
  'Report a Bug': 'https://huggingface.co/spaces/awacke1',
64
- 'About': "AI Multimodal Titan: PDFs, OCR, Image Gen, Audio/Video, Code Execution, and More! 🌌"
65
  }
66
  )
67
 
68
- for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
69
- st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
70
- st.session_state.setdefault('builder', None)
71
- st.session_state.setdefault('model_loaded', False)
 
 
 
 
72
  st.session_state.setdefault('selected_model_type', "Causal LM")
73
  st.session_state.setdefault('selected_model', "None")
74
- st.session_state.setdefault('gallery_size', 2)
75
- st.session_state.setdefault('asset_gallery_container', st.sidebar.empty())
76
  st.session_state.setdefault('cam0_file', None)
77
  st.session_state.setdefault('cam1_file', None)
78
- st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
79
-
80
- def get_gpu_info():
81
- if torch.cuda.is_available():
82
- gpu_name = torch.cuda.get_device_name(0)
83
- total_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
84
- reserved_memory = torch.cuda.memory_reserved(0) / (1024 ** 3)
85
- allocated_memory = torch.cuda.memory_allocated(0) / (1024 ** 3)
86
- free_memory = total_memory - allocated_memory
87
- utilization = torch.cuda.utilization(0)
88
- return {
89
- "GPU Name": gpu_name,
90
- "Total Memory (GB)": f"{total_memory:.2f}",
91
- "Reserved Memory (GB)": f"{reserved_memory:.2f}",
92
- "Allocated Memory (GB)": f"{allocated_memory:.2f}",
93
- "Free Memory (GB)": f"{free_memory:.2f}",
94
- "Utilization (%)": utilization
95
- }
96
- else:
97
- return {"Status": "No GPU detected"}
98
 
99
- def display_gpu_info():
100
- gpu_info = get_gpu_info()
101
- st.sidebar.subheader("GPU Status 📊")
102
- if "Status" in gpu_info and gpu_info["Status"] == "No GPU detected":
103
- st.sidebar.warning("No GPU detected. Running on CPU.")
104
- else:
105
- for key, value in gpu_info.items():
106
- st.sidebar.write(f"{key}: {value}")
107
- memory_usage_percent = (float(gpu_info["Allocated Memory (GB)"]) / float(gpu_info["Total Memory (GB)"])) * 100
108
- st.sidebar.progress(min(memory_usage_percent / 100, 1.0))
109
- st.sidebar.caption(f"Memory Usage: {memory_usage_percent:.1f}%")
110
 
111
- @dataclass
112
  class ModelConfig:
113
  name: str
114
  base_model: str
@@ -119,7 +81,7 @@ class ModelConfig:
119
  def model_path(self):
120
  return f"models/{self.name}"
121
 
122
- @dataclass
123
  class DiffusionConfig:
124
  name: str
125
  base_model: str
@@ -137,8 +99,8 @@ class ModelBuilder:
137
  self.jokes = [
138
  "Why did the AI go to therapy? Too many layers to unpack! 😂",
139
  "Training complete! Time for a binary coffee break. ☕",
140
- "I told my neural network a joke; it couldnt stop dropping bits! 🤖",
141
- "I asked the AI for a pun, and it said, 'Im punning on parallel processing!' 😄",
142
  "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
143
  ]
144
  def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
@@ -149,9 +111,8 @@ class ModelBuilder:
149
  self.tokenizer.pad_token = self.tokenizer.eos_token
150
  if config:
151
  self.config = config
152
- device = "cuda" if torch.cuda.is_available() else "cpu"
153
- self.model.to(device)
154
- st.success(f"Model loaded on {device}! 🎉 {random.choice(self.jokes)}")
155
  return self
156
  def save_model(self, path: str):
157
  with st.spinner("Saving model... 💾"):
@@ -179,32 +140,24 @@ class DiffusionBuilder:
179
  def generate(self, prompt: str):
180
  return self.pipeline(prompt, num_inference_steps=20).images[0]
181
 
182
- def generate_filename(sequence, ext="png", prompt=None):
183
- central = pytz.timezone('US/Central')
184
- safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
185
- if prompt:
186
- safe_prompt = re.sub(r'[<>:"/\\|?*\n]', '_', prompt)[:240]
187
- return f"{safe_date_time}_{safe_prompt}.{ext}"
188
  return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}"
189
 
190
  def pdf_url_to_filename(url):
191
  return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf"
192
 
193
  def get_download_link(file_path, mime_type="application/pdf", label="Download"):
194
- with open(file_path, "rb") as f:
195
- data = base64.b64encode(f.read()).decode()
196
- return f'<a href="data:{mime_type};base64,{data}" download="{os.path.basename(file_path)}">{label}</a>'
197
 
198
  def zip_directory(directory_path, zip_path):
199
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
200
- for root, _, files in os.walk(directory_path):
201
- for file in files:
202
- zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
203
 
204
  def get_model_files(model_type="causal_lm"):
205
  return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"]
206
 
207
- def get_gallery_files(file_types=["png", "pdf", "md", "wav", "mp4"]):
208
  return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))
209
 
210
  def get_pdf_files():
@@ -217,11 +170,15 @@ def download_pdf(url, output_path):
217
  with open(output_path, "wb") as f:
218
  for chunk in response.iter_content(chunk_size=8192):
219
  f.write(chunk)
220
- return True
 
 
221
  except requests.RequestException as e:
222
  logger.error(f"Failed to download {url}: {e}")
223
- return False
 
224
 
 
225
  async def process_pdf_snapshot(pdf_path, mode="single"):
226
  start_time = time.time()
227
  status = st.empty()
@@ -236,21 +193,10 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
236
  pix.save(output_file)
237
  output_files.append(output_file)
238
  elif mode == "twopage":
239
- if len(doc) >= 2:
240
- pix1 = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
241
- pix2 = doc[1].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
242
- img1 = Image.frombytes("RGB", [pix1.width, pix1.height], pix1.samples)
243
- img2 = Image.frombytes("RGB", [pix2.width, pix2.height], pix2.samples)
244
- combined_img = Image.new("RGB", (pix1.width + pix2.width, max(pix1.height, pix2.height)))
245
- combined_img.paste(img1, (0, 0))
246
- combined_img.paste(img2, (pix1.width, 0))
247
- output_file = generate_filename("twopage", "png")
248
- combined_img.save(output_file)
249
- output_files.append(output_file)
250
- else:
251
- page = doc[0]
252
  pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
253
- output_file = generate_filename("single", "png")
254
  pix.save(output_file)
255
  output_files.append(output_file)
256
  elif mode == "allpages":
@@ -268,46 +214,57 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
268
  status.error(f"Failed to process PDF: {str(e)}")
269
  return []
270
 
 
271
  async def process_ocr(image, output_file):
272
  start_time = time.time()
273
  status = st.empty()
274
  status.text("Processing GOT-OCR2_0... (0s)")
275
  tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
276
- model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
277
- device = "cuda" if torch.cuda.is_available() else "cpu"
278
- model.to(device).eval()
279
- temp_file = generate_filename("temp", "png")
280
  image.save(temp_file)
281
  result = model.chat(tokenizer, temp_file, ocr_type='ocr')
282
  os.remove(temp_file)
283
  elapsed = int(time.time() - start_time)
284
- status.text(f"GOT-OCR2_0 completed in {elapsed}s on {device}!")
285
  async with aiofiles.open(output_file, "w") as f:
286
  await f.write(result)
287
  return result
288
 
 
289
  async def process_image_gen(prompt, output_file):
290
  start_time = time.time()
291
  status = st.empty()
292
  status.text("Processing Image Gen... (0s)")
293
- pipeline = (st.session_state['builder'].pipeline if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder) and st.session_state['builder'].pipeline else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu"))
 
 
 
294
  gen_image = pipeline(prompt, num_inference_steps=20).images[0]
295
  elapsed = int(time.time() - start_time)
296
  status.text(f"Image Gen completed in {elapsed}s!")
297
  gen_image.save(output_file)
298
  return gen_image
299
 
 
300
  def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
301
  buffered = BytesIO()
302
  image.save(buffered, format="PNG")
303
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
304
- messages = [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}]}]
 
 
 
 
 
 
305
  try:
306
  response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
307
  return response.choices[0].message.content
308
  except Exception as e:
309
  return f"Error processing image with GPT: {str(e)}"
310
 
 
311
  def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
312
  messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
313
  try:
@@ -316,540 +273,410 @@ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
316
  except Exception as e:
317
  return f"Error processing text with GPT: {str(e)}"
318
 
319
- def process_text(text_input):
320
- if text_input:
321
- st.session_state.messages.append({"role": "user", "content": text_input})
322
- with st.chat_message("user"):
323
- st.markdown(text_input)
324
- with st.chat_message("assistant"):
325
- completion = client.chat.completions.create(model=st.session_state["openai_model"], messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages], stream=False)
326
- return_text = completion.choices[0].message.content
327
- st.write("Assistant: " + return_text)
328
- filename = generate_filename(text_input, "md")
329
- with open(filename, "w", encoding="utf-8") as f:
330
- f.write(text_input + "\n\n" + return_text)
331
- st.session_state.messages.append({"role": "assistant", "content": return_text})
332
- return return_text
333
-
334
- def process_audio(audio_input, text_input=''):
335
- if isinstance(audio_input, str):
336
- with open(audio_input, "rb") as file:
337
- audio_input = file.read()
338
- transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_input)
339
- st.session_state.messages.append({"role": "user", "content": transcription.text})
340
- with st.chat_message("assistant"):
341
- st.markdown(transcription.text)
342
- SpeechSynthesis(transcription.text)
343
- filename = generate_filename(transcription.text, "wav")
344
- create_audio_file(filename, audio_input, True)
345
- filename = generate_filename(transcription.text, "md")
346
- with open(filename, "w", encoding="utf-8") as f:
347
- f.write(transcription.text + "\n\n" + transcription.text)
348
- return transcription.text
349
-
350
- def process_video(video_path, user_prompt):
351
- base64Frames, audio_path = process_video_frames(video_path)
352
- with open(video_path, "rb") as file:
353
- transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
354
- response = client.chat.completions.create(
355
- model=st.session_state["openai_model"],
356
- messages=[
357
- {"role": "system", "content": "You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"},
358
- {"role": "user", "content": [
359
- "These are the frames from the video.",
360
- *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
361
- {"type": "text", "text": f"The audio transcription is: {transcription.text}\n\n{user_prompt}"}
362
- ]}
363
- ],
364
- temperature=0,
365
- )
366
- video_response = response.choices[0].message.content
367
- filename_md = generate_filename(video_path + '- ' + video_response, "md")
368
- with open(filename_md, "w", encoding="utf-8") as f:
369
- f.write(video_response)
370
- return video_response
371
-
372
- def process_video_frames(video_path, seconds_per_frame=2):
373
- base64Frames = []
374
- base_video_path, _ = os.path.splitext(video_path)
375
- base_video_path = video_path
376
- video = cv2.VideoCapture(video_path)
377
- total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
378
- fps = video.get(cv2.CAP_PROP_FPS)
379
- frames_to_skip = int(fps * seconds_per_frame)
380
- curr_frame = 0
381
- while curr_frame < total_frames - 1:
382
- video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
383
- success, frame = video.read()
384
- if not success:
385
- break
386
- _, buffer = cv2.imencode(".jpg", frame)
387
- base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
388
- curr_frame += frames_to_skip
389
- video.release()
390
- audio_path = f"{base_video_path}.mp3"
391
- try:
392
- clip = VideoFileClip(video_path)
393
- clip.audio.write_audiofile(audio_path, bitrate="32k")
394
- clip.audio.close()
395
- clip.close()
396
- except:
397
- logger.info("No audio track found in video.")
398
- return base64Frames, audio_path
399
-
400
- def execute_code(code):
401
- buffer = io.StringIO()
402
- try:
403
- with redirect_stdout(buffer):
404
- exec(code, {}, {})
405
- return buffer.getvalue(), None
406
- except Exception as e:
407
- return None, str(e)
408
- finally:
409
- buffer.close()
410
-
411
- def extract_python_code(markdown_text):
412
- pattern = r"```python\s*(.*?)\s*```"
413
- matches = re.findall(pattern, markdown_text, re.DOTALL)
414
- return matches
415
-
416
- def SpeechSynthesis(result):
417
- documentHTML5 = f'''
418
- <!DOCTYPE html>
419
- <html>
420
- <head>
421
- <title>Read It Aloud</title>
422
- <script type="text/javascript">
423
- function readAloud() {{
424
- const text = document.getElementById("textArea").value;
425
- const speech = new SpeechSynthesisUtterance(text);
426
- window.speechSynthesis.speak(speech);
427
- }}
428
- </script>
429
- </head>
430
- <body>
431
- <h1>🔊 Read It Aloud</h1>
432
- <textarea id="textArea" rows="10" cols="80">{result}</textarea>
433
- <br>
434
- <button onclick="readAloud()">🔊 Read Aloud</button>
435
- </body>
436
- </html>
437
- '''
438
- components.html(documentHTML5, width=1280, height=300)
439
-
440
- def search_arxiv(query):
441
- start_time = time.strftime("%Y-%m-%d %H:%M:%S")
442
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
443
- response1 = client.predict(message="Hello!!", llm_results_use=5, database_choice="Semantic Search", llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", api_name="/update_with_rag_md")
444
- Question = f'### 🔎 {query}\r\n'
445
- References = response1[0]
446
- References2 = response1[1]
447
- filename = generate_filename(query, "md")
448
- with open(filename, "w", encoding="utf-8") as f:
449
- f.write(Question + References + References2)
450
- st.session_state.messages.append({"role": "assistant", "content": References + References2})
451
- response2 = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1", True, api_name="/ask_llm")
452
- if len(response2) > 10:
453
- Answer = response2
454
- SpeechSynthesis(Answer)
455
- results = Question + '\r\n' + Answer + '\r\n' + References + '\r\n' + References2
456
- return results
457
- return References + References2
458
-
459
- roleplaying_glossary = {
460
- "🤖 AI Concepts": {
461
- "MoE (Mixture of Experts) 🧠": [
462
- "As a leading AI health researcher, provide an overview of MoE, MAS, memory, and mirroring in healthcare applications.",
463
- "Explain how MoE and MAS can be leveraged to create AGI and AMI systems for healthcare, as an AI architect."
464
- ],
465
- "Multi Agent Systems (MAS) 🤝": [
466
- "As a renowned MAS researcher, describe the key characteristics of distributed, autonomous, and cooperative MAS.",
467
- "Discuss how MAS is applied in robotics, simulations, and decentralized problem-solving, as an AI engineer."
468
- ]
469
- }
470
- }
471
 
472
- def display_glossary_grid(roleplaying_glossary):
473
- search_urls = {
474
- "🚀🌌ArXiv": lambda k: f"/?q={quote(k)}",
475
- "📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
476
- "🔍": lambda k: f"https://www.google.com/search?q={quote(k)}"
477
- }
478
- for category, details in roleplaying_glossary.items():
479
- st.write(f"### {category}")
480
- cols = st.columns(len(details))
481
- for idx, (game, terms) in enumerate(details.items()):
482
- with cols[idx]:
483
- st.markdown(f"#### {game}")
484
- for term in terms:
485
- links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()])
486
- st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
487
-
488
- def create_zip_of_files(files):
489
- zip_name = "assets.zip"
490
- with zipfile.ZipFile(zip_name, 'w') as zipf:
491
- for file in files:
492
- zipf.write(file)
493
- return zip_name
494
-
495
- def get_zip_download_link(zip_file):
496
- with open(zip_file, 'rb') as f:
497
- data = f.read()
498
- b64 = base64.b64encode(data).decode()
499
- return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
500
-
501
- def FileSidebar():
502
- all_files = glob.glob("*.md")
503
- all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10]
504
- all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)
505
- Files1, Files2 = st.sidebar.columns(2)
506
- with Files1:
507
- if st.button("🗑 Delete All"):
508
- for file in all_files:
509
- os.remove(file)
510
- st.rerun()
511
- with Files2:
512
- if st.button("⬇️ Download"):
513
- zip_file = create_zip_of_files(all_files)
514
- st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
515
- file_contents = ''
516
- file_name = ''
517
- next_action = ''
518
- for file in all_files:
519
- col1, col2, col3, col4, col5 = st.sidebar.columns([1, 6, 1, 1, 1])
520
- with col1:
521
- if st.button("🌐", key=f"md_{file}"):
522
- with open(file, "r", encoding='utf-8') as f:
523
- file_contents = f.read()
524
- file_name = file
525
- next_action = 'md'
526
- st.session_state['next_action'] = next_action
527
- with col2:
528
- st.markdown(get_download_link(file, "text/markdown", file))
529
- with col3:
530
- if st.button("📂", key=f"open_{file}"):
531
- with open(file, "r", encoding='utf-8') as f:
532
- file_contents = f.read()
533
- file_name = file
534
- next_action = 'open'
535
- st.session_state['lastfilename'] = file
536
- st.session_state['filename'] = file
537
- st.session_state['filetext'] = file_contents
538
- st.session_state['next_action'] = next_action
539
- with col4:
540
- if st.button("▶️", key=f"read_{file}"):
541
- with open(file, "r", encoding='utf-8') as f:
542
- file_contents = f.read()
543
- file_name = file
544
- next_action = 'search'
545
- st.session_state['next_action'] = next_action
546
- with col5:
547
- if st.button("🗑", key=f"delete_{file}"):
548
- os.remove(file)
549
- file_name = file
550
- st.rerun()
551
- next_action = 'delete'
552
- st.session_state['next_action'] = next_action
553
- if len(file_contents) > 0:
554
- if next_action == 'open':
555
- if 'lastfilename' not in st.session_state:
556
- st.session_state['lastfilename'] = ''
557
- if 'filename' not in st.session_state:
558
- st.session_state['filename'] = ''
559
- if 'filetext' not in st.session_state:
560
- st.session_state['filetext'] = ''
561
- open1, open2 = st.columns([.8, .2])
562
- with open1:
563
- file_name_input = st.text_input(key='file_name_input', label="File Name:", value=file_name)
564
- file_content_area = st.text_area(key='file_content_area', label="File Contents:", value=file_contents, height=300)
565
- if file_name_input != file_name:
566
- os.rename(file_name, file_name_input)
567
- st.markdown(f'Renamed file {file_name} to {file_name_input}.')
568
- if file_content_area != file_contents:
569
- with open(file_name_input, 'w', encoding='utf-8') as f:
570
- f.write(file_content_area)
571
- st.markdown(f'Saved {file_name_input}.')
572
- if next_action == 'search':
573
- st.text_area("File Contents:", file_contents, height=500)
574
- filesearch = "Create a streamlit python user app with full code listing: " + file_contents
575
- st.markdown(filesearch)
576
- if st.button(key='rerun', label='🔍Re-Code'):
577
- result = search_arxiv(filesearch)
578
- st.markdown(result)
579
- if next_action == 'md':
580
- st.markdown(file_contents)
581
- SpeechSynthesis(file_contents)
582
-
583
- FileSidebar()
584
-
585
- tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚", "Search 🔎"])
586
- (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
587
 
 
588
  with tab_camera:
589
  st.header("Camera Snap 📷")
 
590
  cols = st.columns(2)
591
- for i, cam_key in enumerate(["cam0", "cam1"]):
592
- with cols[i]:
593
- cam_img = st.camera_input(f"Take a picture - Cam {i}", key=cam_key)
594
- if cam_img:
595
- filename = generate_filename(f"cam{i}", "png")
596
- if st.session_state[f'cam{i}_file'] and os.path.exists(st.session_state[f'cam{i}_file']):
597
- os.remove(st.session_state[f'cam{i}_file'])
598
- with open(filename, "wb") as f:
599
- f.write(cam_img.getvalue())
600
- st.session_state[f'cam{i}_file'] = filename
601
- st.session_state['history'].append(f"Snapshot from Cam {i}: {filename}")
602
- st.image(Image.open(filename), caption=f"Camera {i}", use_container_width=True)
603
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  with tab_download:
605
  st.header("Download PDFs 📥")
606
  if st.button("Examples 📚"):
607
- example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703"]
 
 
 
 
 
 
 
 
 
 
 
 
 
608
  st.session_state['pdf_urls'] = "\n".join(example_urls)
609
  url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
610
  if st.button("Robo-Download 🤖"):
611
  urls = url_input.strip().split("\n")
612
  progress_bar = st.progress(0)
 
 
 
613
  for idx, url in enumerate(urls):
614
  if url:
615
  output_path = pdf_url_to_filename(url)
616
- if download_pdf(url, output_path):
 
 
 
 
 
 
 
 
 
 
 
617
  st.session_state['downloaded_pdfs'][url] = output_path
618
- st.session_state['history'].append(f"Downloaded PDF: {output_path}")
619
- st.session_state['asset_checkboxes'][output_path] = True
620
- progress_bar.progress((idx + 1) / len(urls))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
 
 
622
  with tab_ocr:
623
  st.header("Test OCR 🔍")
624
  all_files = get_gallery_files()
625
  if all_files:
626
- ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
627
  if st.button("OCR All Assets 🚀"):
628
  full_text = "# OCR Results\n\n"
629
- for file in ocr_files:
630
  if file.endswith('.png'):
631
  image = Image.open(file)
632
  else:
633
- try:
634
- doc = fitz.open(file)
635
- pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
636
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
637
- doc.close()
638
- except Exception as e:
639
- st.error(f"Failed to process {file}: {str(e)}")
640
- continue
641
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
642
  result = asyncio.run(process_ocr(image, output_file))
643
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
644
- st.session_state['history'].append(f"OCR Test: {file} -> {output_file}")
645
- md_output_file = generate_filename("full_ocr", "md")
 
646
  with open(md_output_file, "w") as f:
647
  f.write(full_text)
648
  st.success(f"Full OCR saved to {md_output_file}")
649
  st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
650
- selected_file = st.selectbox("Select Image or PDF", ocr_files, key="ocr_select")
651
  if selected_file:
652
  if selected_file.endswith('.png'):
653
  image = Image.open(selected_file)
654
  else:
655
- try:
656
- doc = fitz.open(selected_file)
657
- pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
659
- doc.close()
660
- except Exception as e:
661
- st.error(f"Cannot process {selected_file}: {str(e)}. Please select a PNG or PDF file.")
662
- image = None
663
- if image:
664
- st.image(image, caption="Input Image", use_container_width=True)
665
- if st.button("Run OCR 🚀", key="ocr_run"):
666
- output_file = generate_filename("ocr_output", "txt")
667
  result = asyncio.run(process_ocr(image, output_file))
668
- st.text_area("OCR Result", result, height=200)
669
- st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
 
 
 
 
 
 
670
  else:
671
  st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
672
 
 
673
  with tab_build:
674
  st.header("Build Titan 🌱")
675
  model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
676
- base_model = st.selectbox("Select Model", ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM" else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"])
 
 
 
 
677
  model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
 
678
  if st.button("Download Model ⬇️"):
679
- config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name, base_model=base_model, size="small")
 
 
680
  builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
681
  builder.load_model(base_model, config)
682
  builder.save_model(config.model_path)
683
  st.session_state['builder'] = builder
684
  st.session_state['model_loaded'] = True
685
-
 
 
 
 
 
 
 
686
  with tab_imggen:
687
  st.header("Test Image Gen 🎨")
688
- prompt = st.text_area("Prompt", "Generate a futuristic cityscape")
689
- if st.button("Run Image Gen 🚀"):
690
- output_file = generate_filename("gen_output", "png", prompt=prompt)
691
- result = asyncio.run(process_image_gen(prompt, output_file))
692
- st.image(result, caption="Generated Image", use_container_width=True)
693
- st.session_state['history'].append(f"Image Gen Test: {prompt} -> {output_file}")
694
-
695
- with tab_pdf:
696
- st.header("PDF Process 📄")
697
- uploaded_pdfs = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
698
- view_mode = st.selectbox("View Mode", ["Single Page", "Two Pages"], key="pdf_view_mode")
699
- if st.button("Process PDFs"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
700
  for pdf_file in uploaded_pdfs:
701
- pdf_path = generate_filename(pdf_file.name, "pdf")
702
- with open(pdf_path, "wb") as f:
703
- f.write(pdf_file.read())
704
- snapshots = asyncio.run(process_pdf_snapshot(pdf_path, "twopage" if view_mode == "Two Pages" else "single"))
705
- for snapshot in snapshots:
706
- st.image(Image.open(snapshot), caption=snapshot)
707
- text = process_image_with_prompt(Image.open(snapshot), "Extract the electronic text from image")
708
- st.text_area(f"Extracted Text from {snapshot}", text)
709
- code_prompt = f"Generate Python code based on this text:\n\n{text}"
710
- code = process_text_with_prompt(text, code_prompt)
711
- st.code(code, language="python")
712
- if st.button(f"Execute Code from {snapshot}"):
713
- output, error = execute_code(code)
714
- if error:
715
- st.error(f"Error: {error}")
716
- else:
717
- st.success(f"Output: {output or 'No output'}")
718
-
719
- with tab_image:
720
- st.header("Image Process 🖼️")
721
- uploaded_images = st.file_uploader("Upload Images", type=["png", "jpg"], accept_multiple_files=True)
722
- prompt = st.text_input("Prompt", "Extract the electronic text from image")
723
- if st.button("Process Images"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  for img_file in uploaded_images:
725
- img = Image.open(img_file)
726
- st.image(img, caption=img_file.name)
727
- result = process_image_with_prompt(img, prompt)
728
- st.text_area(f"Result for {img_file.name}", result)
729
-
730
- with tab_audio:
731
- st.header("Audio Process 🎵")
732
- audio_bytes = audio_recorder()
733
- if audio_bytes:
734
- filename = generate_filename("recording", "wav")
735
- with open(filename, "wb") as f:
736
- f.write(audio_bytes)
737
- st.audio(filename)
738
- process_audio(filename)
739
-
740
- with tab_video:
741
- st.header("Video Process 🎥")
742
- video_input = st.file_uploader("Upload Video", type=["mp4"])
743
- if video_input:
744
- video_path = generate_filename(video_input.name, "mp4")
745
- with open(video_path, "wb") as f:
746
- f.write(video_input.read())
747
- st.video(video_path)
748
- result = process_video(video_path, "Summarize this video in markdown")
749
- st.markdown(result)
750
-
751
- with tab_code:
752
- st.header("Code Executor 🧑‍💻")
753
- uploaded_file = st.file_uploader("📤 Upload a Python (.py) or Markdown (.md) file", type=['py', 'md'])
754
- if 'code' not in st.session_state:
755
- st.session_state.code = '''import streamlit as st\nst.write("Hello, World!")'''
756
- if uploaded_file:
757
- content = uploaded_file.getvalue().decode()
758
- if uploaded_file.type == "text/markdown":
759
- code_blocks = extract_python_code(content)
760
- code_input = code_blocks[0] if code_blocks else ""
761
- else:
762
- code_input = content
763
- else:
764
- code_input = st.text_area("Python Code", value=st.session_state.code, height=400)
765
- col1, col2 = st.columns([1, 1])
766
- with col1:
767
- if st.button("▶️ Run Code"):
768
- output, error = execute_code(code_input)
769
- if error:
770
- st.error(f"Error: {error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
771
  else:
772
- st.success(f"Output: {output or 'No output'}")
773
- with col2:
774
- if st.button("🗑️ Clear Code"):
775
- st.session_state.code = ""
776
- st.rerun()
777
-
778
- with tab_gallery:
779
- st.header("Gallery 📚")
780
- all_files = get_gallery_files()
781
- for file in all_files:
782
- if file.endswith('.png'):
783
- st.image(Image.open(file), caption=file)
784
- elif file.endswith('.pdf'):
785
- doc = fitz.open(file)
786
- pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
787
- st.image(Image.frombytes("RGB", [pix.width, pix.height], pix.samples), caption=file)
788
- doc.close()
789
- elif file.endswith('.md'):
790
- with open(file, "r") as f:
791
- st.markdown(f.read())
792
- elif file.endswith('.wav'):
793
- st.audio(file)
794
- elif file.endswith('.mp4'):
795
- st.video(file)
796
-
797
- with tab_search:
798
- st.header("ArXiv Search 🔎")
799
- query = st.text_input("Search ArXiv", "")
800
- if query:
801
- result = search_arxiv(query)
802
- st.markdown(result)
803
-
804
- st.sidebar.subheader("Gallery Settings")
805
- st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
806
- display_gpu_info()
807
- st.sidebar.subheader("Action Logs 📜")
808
- for record in log_records:
809
- st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
810
- st.sidebar.subheader("History 📜")
811
- for entry in st.session_state.get("history", []):
812
- if entry:
813
- st.sidebar.write(entry)
814
 
 
 
815
  def update_gallery():
816
  container = st.session_state['asset_gallery_container']
817
- container.empty()
818
  all_files = get_gallery_files()
819
  if all_files:
820
  container.markdown("### Asset Gallery 📸📖")
821
  cols = container.columns(2)
822
  for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
823
  with cols[idx % 2]:
 
 
824
  if file.endswith('.png'):
825
- st.image(Image.open(file), caption=os.path.basename(file))
826
- elif file.endswith('.pdf'):
827
  doc = fitz.open(file)
828
  pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
829
- st.image(Image.frombytes("RGB", [pix.width, pix.height], pix.samples), caption=os.path.basename(file))
 
830
  doc.close()
831
- st.checkbox("Select", key=f"asset_{file}", value=st.session_state['asset_checkboxes'].get(file, False))
832
- st.markdown(get_download_link(file, "application/octet-stream", "Download"), unsafe_allow_html=True)
833
- if st.button("Delete", key=f"delete_{file}"):
 
 
834
  os.remove(file)
835
  st.session_state['asset_checkboxes'].pop(file, None)
 
836
  st.experimental_rerun()
837
 
 
838
  update_gallery()
839
 
840
- if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
841
- st.session_state.messages.append({"role": "user", "content": prompt})
842
- with st.chat_message("user"):
843
- st.markdown(prompt)
844
- with st.chat_message("assistant"):
845
- completion = client.chat.completions.create(model=st.session_state["openai_model"], messages=st.session_state.messages, stream=True)
846
- response = ""
847
- for chunk in completion:
848
- if chunk.choices[0].delta.content:
849
- response += chunk.choices[0].delta.content
850
- st.write(response)
851
- st.session_state.messages.append({"role": "assistant", "content": response})
852
-
853
- def create_audio_file(filename, audio_input, flag):
854
- with open(filename, "wb") as f:
855
- f.write(audio_input)
 
1
  import aiofiles
2
  import asyncio
3
  import base64
 
4
  import fitz
5
  import glob
 
 
6
  import logging
 
 
7
  import os
8
  import pandas as pd
9
  import pytz
 
12
  import requests
13
  import shutil
14
  import streamlit as st
 
 
15
  import time
16
  import torch
17
  import zipfile
18
 
 
 
 
 
19
  from dataclasses import dataclass
20
  from datetime import datetime
21
  from diffusers import StableDiffusionPipeline
 
 
 
22
  from io import BytesIO
 
23
  from openai import OpenAI
24
  from PIL import Image
 
25
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
26
  from typing import Optional
 
 
27
 
28
+ # 🤖 OpenAI wizardry: Summon your API magic!
29
+ client = OpenAI(
30
+ api_key=os.getenv('OPENAI_API_KEY'),
31
+ organization=os.getenv('OPENAI_ORG_ID')
32
+ )
33
 
34
+ # 📜 Logging activated: Capturing chaos and calm!
35
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
36
  logger = logging.getLogger(__name__)
37
  log_records = []
 
40
  log_records.append(record)
41
  logger.addHandler(LogCaptureHandler())
42
 
43
+ # 🎨 Streamlit styling: Designing a cosmic interface!
44
  st.set_page_config(
45
+ page_title="AI Vision & SFT Titans 🚀",
46
  page_icon="🤖",
47
  layout="wide",
48
  initial_sidebar_state="expanded",
49
  menu_items={
50
  'Get Help': 'https://huggingface.co/awacke1',
51
  'Report a Bug': 'https://huggingface.co/spaces/awacke1',
52
+ 'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, Custom Diffusion, and SFT on CPU! 🌌"
53
  }
54
  )
55
 
56
+ # Set up default session state values.
57
+ st.session_state.setdefault('history', []) # History: starting fresh if empty!
58
+ st.session_state.setdefault('builder', None) # Builder: set up if missing.
59
+ st.session_state.setdefault('model_loaded', False) # Model Loaded: not loaded by default.
60
+ st.session_state.setdefault('processing', {}) # Processing: initialize as an empty dict.
61
+ st.session_state.setdefault('asset_checkboxes', {}) # Asset Checkboxes: default to an empty dict.
62
+ st.session_state.setdefault('downloaded_pdfs', {}) # Downloaded PDFs: start with none.
63
+ st.session_state.setdefault('unique_counter', 0) # Unique Counter: initialize to zero.
64
  st.session_state.setdefault('selected_model_type', "Causal LM")
65
  st.session_state.setdefault('selected_model', "None")
 
 
66
  st.session_state.setdefault('cam0_file', None)
67
  st.session_state.setdefault('cam1_file', None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Create a single container for the asset gallery in the sidebar.
70
+ if 'asset_gallery_container' not in st.session_state:
71
+ st.session_state['asset_gallery_container'] = st.sidebar.empty()
 
 
 
 
 
 
 
 
72
 
73
+ @dataclass # ModelConfig: A blueprint for model configurations.
74
  class ModelConfig:
75
  name: str
76
  base_model: str
 
81
  def model_path(self):
82
  return f"models/{self.name}"
83
 
84
+ @dataclass # DiffusionConfig: Where diffusion magic takes shape.
85
  class DiffusionConfig:
86
  name: str
87
  base_model: str
 
99
  self.jokes = [
100
  "Why did the AI go to therapy? Too many layers to unpack! 😂",
101
  "Training complete! Time for a binary coffee break. ☕",
102
+ "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
103
+ "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
104
  "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
105
  ]
106
  def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
 
111
  self.tokenizer.pad_token = self.tokenizer.eos_token
112
  if config:
113
  self.config = config
114
+ self.model.to("cuda" if torch.cuda.is_available() else "cpu")
115
+ st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
 
116
  return self
117
  def save_model(self, path: str):
118
  with st.spinner("Saving model... 💾"):
 
140
  def generate(self, prompt: str):
141
  return self.pipeline(prompt, num_inference_steps=20).images[0]
142
 
143
+ def generate_filename(sequence, ext="png"):
 
 
 
 
 
144
  return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}"
145
 
146
  def pdf_url_to_filename(url):
147
  return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf"
148
 
149
  def get_download_link(file_path, mime_type="application/pdf", label="Download"):
150
+ return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>'
 
 
151
 
152
  def zip_directory(directory_path, zip_path):
153
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
154
+ [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
155
+ for root, _, files in os.walk(directory_path) for file in files]
 
156
 
157
  def get_model_files(model_type="causal_lm"):
158
  return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"]
159
 
160
+ def get_gallery_files(file_types=["png", "pdf"]):
161
  return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))
162
 
163
  def get_pdf_files():
 
170
  with open(output_path, "wb") as f:
171
  for chunk in response.iter_content(chunk_size=8192):
172
  f.write(chunk)
173
+ ret = True
174
+ else:
175
+ ret = False
176
  except requests.RequestException as e:
177
  logger.error(f"Failed to download {url}: {e}")
178
+ ret = False
179
+ return ret
180
 
181
+ # Async PDF Snapshot: Snap your PDF pages without blocking.
182
  async def process_pdf_snapshot(pdf_path, mode="single"):
183
  start_time = time.time()
184
  status = st.empty()
 
193
  pix.save(output_file)
194
  output_files.append(output_file)
195
  elif mode == "twopage":
196
+ for i in range(min(2, len(doc))):
197
+ page = doc[i]
 
 
 
 
 
 
 
 
 
 
 
198
  pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
199
+ output_file = generate_filename(f"twopage_{i}", "png")
200
  pix.save(output_file)
201
  output_files.append(output_file)
202
  elif mode == "allpages":
 
214
  status.error(f"Failed to process PDF: {str(e)}")
215
  return []
216
 
217
+ # Async OCR: Convert images to text.
218
  async def process_ocr(image, output_file):
219
  start_time = time.time()
220
  status = st.empty()
221
  status.text("Processing GOT-OCR2_0... (0s)")
222
  tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
223
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
224
+ temp_file = f"temp_{int(time.time())}.png"
 
 
225
  image.save(temp_file)
226
  result = model.chat(tokenizer, temp_file, ocr_type='ocr')
227
  os.remove(temp_file)
228
  elapsed = int(time.time() - start_time)
229
+ status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
230
  async with aiofiles.open(output_file, "w") as f:
231
  await f.write(result)
232
  return result
233
 
234
+ # Async Image Gen: Your image genie.
235
  async def process_image_gen(prompt, output_file):
236
  start_time = time.time()
237
  status = st.empty()
238
  status.text("Processing Image Gen... (0s)")
239
+ pipeline = (st.session_state['builder'].pipeline
240
+ if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder)
241
+ and st.session_state['builder'].pipeline
242
+ else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu"))
243
  gen_image = pipeline(prompt, num_inference_steps=20).images[0]
244
  elapsed = int(time.time() - start_time)
245
  status.text(f"Image Gen completed in {elapsed}s!")
246
  gen_image.save(output_file)
247
  return gen_image
248
 
249
+ # GPT-Image Interpreter: Turning pixels into prose!
250
  def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
251
  buffered = BytesIO()
252
  image.save(buffered, format="PNG")
253
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
254
+ messages = [{
255
+ "role": "user",
256
+ "content": [
257
+ {"type": "text", "text": prompt},
258
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
259
+ ]
260
+ }]
261
  try:
262
  response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
263
  return response.choices[0].message.content
264
  except Exception as e:
265
  return f"Error processing image with GPT: {str(e)}"
266
 
267
+ # GPT-Text Alchemist: Merging prompt and text.
268
  def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
269
  messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
270
  try:
 
273
  except Exception as e:
274
  return f"Error processing text with GPT: {str(e)}"
275
 
276
+ # ----------------- SIDEBAR UPDATES -----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
+ # Sidebar: Gallery Settings
279
+ st.sidebar.subheader("Gallery Settings")
280
+ st.session_state.setdefault('gallery_size', 2)
281
+ st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
282
+
283
+ # ----------------- TAB SETUP -----------------
284
+ tabs = st.tabs([
285
+ "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
286
+ "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
287
+ ])
288
+ (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
+ # ----------------- TAB: Camera Snap -----------------
291
  with tab_camera:
292
  st.header("Camera Snap 📷")
293
+ st.subheader("Single Capture")
294
  cols = st.columns(2)
295
+ with cols[0]:
296
+ cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
297
+ if cam0_img:
298
+ filename = generate_filename("cam0")
299
+ if st.session_state['cam0_file'] and os.path.exists(st.session_state['cam0_file']):
300
+ os.remove(st.session_state['cam0_file'])
301
+ with open(filename, "wb") as f:
302
+ f.write(cam0_img.getvalue())
303
+ st.session_state['cam0_file'] = filename
304
+ entry = f"Snapshot from Cam 0: {filename}"
305
+ st.session_state['history'].append(entry)
306
+ st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
307
+ logger.info(f"Saved snapshot from Camera 0: {filename}")
308
+ with cols[1]:
309
+ cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
310
+ if cam1_img:
311
+ filename = generate_filename("cam1")
312
+ if st.session_state['cam1_file'] and os.path.exists(st.session_state['cam1_file']):
313
+ os.remove(st.session_state['cam1_file'])
314
+ with open(filename, "wb") as f:
315
+ f.write(cam1_img.getvalue())
316
+ st.session_state['cam1_file'] = filename
317
+ entry = f"Snapshot from Cam 1: {filename}"
318
+ st.session_state['history'].append(entry)
319
+ st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
320
+ logger.info(f"Saved snapshot from Camera 1: {filename}")
321
+
322
+ # ----------------- TAB: Download PDFs -----------------
323
  with tab_download:
324
  st.header("Download PDFs 📥")
325
  if st.button("Examples 📚"):
326
+ example_urls = [
327
+ "https://arxiv.org/pdf/2308.03892",
328
+ "https://arxiv.org/pdf/1912.01703",
329
+ "https://arxiv.org/pdf/2408.11039",
330
+ "https://arxiv.org/pdf/2109.10282",
331
+ "https://arxiv.org/pdf/2112.10752",
332
+ "https://arxiv.org/pdf/2308.11236",
333
+ "https://arxiv.org/pdf/1706.03762",
334
+ "https://arxiv.org/pdf/2006.11239",
335
+ "https://arxiv.org/pdf/2305.11207",
336
+ "https://arxiv.org/pdf/2106.09685",
337
+ "https://arxiv.org/pdf/2005.11401",
338
+ "https://arxiv.org/pdf/2106.10504"
339
+ ]
340
  st.session_state['pdf_urls'] = "\n".join(example_urls)
341
  url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
342
  if st.button("Robo-Download 🤖"):
343
  urls = url_input.strip().split("\n")
344
  progress_bar = st.progress(0)
345
+ status_text = st.empty()
346
+ total_urls = len(urls)
347
+ existing_pdfs = get_pdf_files()
348
  for idx, url in enumerate(urls):
349
  if url:
350
  output_path = pdf_url_to_filename(url)
351
+ status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
352
+ if output_path not in existing_pdfs:
353
+ if download_pdf(url, output_path):
354
+ st.session_state['downloaded_pdfs'][url] = output_path
355
+ logger.info(f"Downloaded PDF from {url} to {output_path}")
356
+ entry = f"Downloaded PDF: {output_path}"
357
+ st.session_state['history'].append(entry)
358
+ st.session_state['asset_checkboxes'][output_path] = True
359
+ else:
360
+ st.error(f"Failed to nab {url} 😿")
361
+ else:
362
+ st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
363
  st.session_state['downloaded_pdfs'][url] = output_path
364
+ progress_bar.progress((idx + 1) / total_urls)
365
+ status_text.text("Robo-Download complete! 🚀")
366
+ mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
367
+ if st.button("Snapshot Selected 📸"):
368
+ selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
369
+ if selected_pdfs:
370
+ for pdf_path in selected_pdfs:
371
+ if not os.path.exists(pdf_path):
372
+ st.warning(f"File not found: {pdf_path}. Skipping.")
373
+ continue
374
+ mode_key = {"Single Page (High-Res)": "single",
375
+ "Two Pages (High-Res)": "twopage",
376
+ "All Pages (High-Res)": "allpages"}[mode]
377
+ snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
378
+ for snapshot in snapshots:
379
+ st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
380
+ st.session_state['asset_checkboxes'][snapshot] = True
381
+ # No update_gallery() call here; will update once later.
382
+ else:
383
+ st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
384
 
385
+ # ----------------- TAB: Test OCR -----------------
386
  with tab_ocr:
387
  st.header("Test OCR 🔍")
388
  all_files = get_gallery_files()
389
  if all_files:
 
390
  if st.button("OCR All Assets 🚀"):
391
  full_text = "# OCR Results\n\n"
392
+ for file in all_files:
393
  if file.endswith('.png'):
394
  image = Image.open(file)
395
  else:
396
+ doc = fitz.open(file)
397
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
398
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
399
+ doc.close()
 
 
 
 
400
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
401
  result = asyncio.run(process_ocr(image, output_file))
402
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
403
+ entry = f"OCR Test: {file} -> {output_file}"
404
+ st.session_state['history'].append(entry)
405
+ md_output_file = f"full_ocr_{int(time.time())}.md"
406
  with open(md_output_file, "w") as f:
407
  f.write(full_text)
408
  st.success(f"Full OCR saved to {md_output_file}")
409
  st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
410
+ selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
411
  if selected_file:
412
  if selected_file.endswith('.png'):
413
  image = Image.open(selected_file)
414
  else:
415
+ doc = fitz.open(selected_file)
416
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
417
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
418
+ doc.close()
419
+ st.image(image, caption="Input Image", use_container_width=True)
420
+ if st.button("Run OCR 🚀", key="ocr_run"):
421
+ output_file = generate_filename("ocr_output", "txt")
422
+ st.session_state['processing']['ocr'] = True
423
+ result = asyncio.run(process_ocr(image, output_file))
424
+ entry = f"OCR Test: {selected_file} -> {output_file}"
425
+ st.session_state['history'].append(entry)
426
+ st.text_area("OCR Result", result, height=200, key="ocr_result")
427
+ st.success(f"OCR output saved to {output_file}")
428
+ st.session_state['processing']['ocr'] = False
429
+ if selected_file.endswith('.pdf') and st.button("OCR All Pages 🚀", key="ocr_all_pages"):
430
+ doc = fitz.open(selected_file)
431
+ full_text = f"# OCR Results for {os.path.basename(selected_file)}\n\n"
432
+ for i in range(len(doc)):
433
+ pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
434
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
435
+ output_file = generate_filename(f"ocr_page_{i}", "txt")
 
 
 
 
 
 
 
436
  result = asyncio.run(process_ocr(image, output_file))
437
+ full_text += f"## Page {i + 1}\n\n{result}\n\n"
438
+ entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
439
+ st.session_state['history'].append(entry)
440
+ md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
441
+ with open(md_output_file, "w") as f:
442
+ f.write(full_text)
443
+ st.success(f"Full OCR saved to {md_output_file}")
444
+ st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
445
  else:
446
  st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
447
 
448
+ # ----------------- TAB: Build Titan -----------------
449
  with tab_build:
450
  st.header("Build Titan 🌱")
451
  model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
452
+ base_model = st.selectbox(
453
+ "Select Tiny Model",
454
+ ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
455
+ else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
456
+ )
457
  model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
458
+ domain = st.text_input("Target Domain", "general")
459
  if st.button("Download Model ⬇️"):
460
+ config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(
461
+ name=model_name, base_model=base_model, size="small", domain=domain
462
+ )
463
  builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
464
  builder.load_model(base_model, config)
465
  builder.save_model(config.model_path)
466
  st.session_state['builder'] = builder
467
  st.session_state['model_loaded'] = True
468
+ st.session_state['selected_model_type'] = model_type
469
+ st.session_state['selected_model'] = config.model_path
470
+ entry = f"Built {model_type} model: {model_name}"
471
+ st.session_state['history'].append(entry)
472
+ st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
473
+ st.experimental_rerun()
474
+
475
+ # ----------------- TAB: Test Image Gen -----------------
476
  with tab_imggen:
477
  st.header("Test Image Gen 🎨")
478
+ all_files = get_gallery_files()
479
+ if all_files:
480
+ selected_file = st.selectbox("Select Image or PDF", all_files, key="gen_select")
481
+ if selected_file:
482
+ if selected_file.endswith('.png'):
483
+ image = Image.open(selected_file)
484
+ else:
485
+ doc = fitz.open(selected_file)
486
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
487
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
488
+ doc.close()
489
+ st.image(image, caption="Reference Image", use_container_width=True)
490
+ prompt = st.text_area("Prompt", "Generate a neon superhero version of this image", key="gen_prompt")
491
+ if st.button("Run Image Gen 🚀", key="gen_run"):
492
+ output_file = generate_filename("gen_output", "png")
493
+ st.session_state['processing']['gen'] = True
494
+ result = asyncio.run(process_image_gen(prompt, output_file))
495
+ entry = f"Image Gen Test: {prompt} -> {output_file}"
496
+ st.session_state['history'].append(entry)
497
+ st.image(result, caption="Generated Image", use_container_width=True)
498
+ st.success(f"Image saved to {output_file}")
499
+ st.session_state['processing']['gen'] = False
500
+ else:
501
+ st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
502
+
503
+ # ----------------- TAB: PDF Process -----------------
504
+ with tab_pdf_process:
505
+ st.header("PDF Process")
506
+ st.subheader("Upload PDFs for GPT-based text extraction")
507
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
508
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="pdf_gpt_model")
509
+ detail_level = st.selectbox("Detail Level", ["auto", "low", "high"], key="pdf_detail_level")
510
+ uploaded_pdfs = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True, key="pdf_process_uploader")
511
+ view_mode = st.selectbox("View Mode", ["Single Page", "Double Page"], key="pdf_view_mode")
512
+ if st.button("Process Uploaded PDFs", key="process_pdfs"):
513
+ combined_text = ""
514
  for pdf_file in uploaded_pdfs:
515
+ pdf_bytes = pdf_file.read()
516
+ temp_pdf_path = f"temp_{pdf_file.name}"
517
+ with open(temp_pdf_path, "wb") as f:
518
+ f.write(pdf_bytes)
519
+ try:
520
+ doc = fitz.open(temp_pdf_path)
521
+ st.write(f"Processing {pdf_file.name} with {len(doc)} pages")
522
+ if view_mode == "Single Page":
523
+ for i, page in enumerate(doc):
524
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
525
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
526
+ st.image(img, caption=f"{pdf_file.name} Page {i+1}")
527
+ gpt_text = process_image_with_prompt(img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
528
+ combined_text += f"\n## {pdf_file.name} - Page {i+1}\n\n{gpt_text}\n"
529
+ else:
530
+ pages = list(doc)
531
+ for i in range(0, len(pages), 2):
532
+ if i+1 < len(pages):
533
+ pix1 = pages[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
534
+ img1 = Image.frombytes("RGB", [pix1.width, pix1.height], pix1.samples)
535
+ pix2 = pages[i+1].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
536
+ img2 = Image.frombytes("RGB", [pix2.width, pix2.height], pix2.samples)
537
+ total_width = img1.width + img2.width
538
+ max_height = max(img1.height, img2.height)
539
+ combined_img = Image.new("RGB", (total_width, max_height))
540
+ combined_img.paste(img1, (0, 0))
541
+ combined_img.paste(img2, (img1.width, 0))
542
+ st.image(combined_img, caption=f"{pdf_file.name} Pages {i+1}-{i+2}")
543
+ gpt_text = process_image_with_prompt(combined_img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
544
+ combined_text += f"\n## {pdf_file.name} - Pages {i+1}-{i+2}\n\n{gpt_text}\n"
545
+ else:
546
+ pix = pages[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
547
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
548
+ st.image(img, caption=f"{pdf_file.name} Page {i+1}")
549
+ gpt_text = process_image_with_prompt(img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
550
+ combined_text += f"\n## {pdf_file.name} - Page {i+1}\n\n{gpt_text}\n"
551
+ doc.close()
552
+ except Exception as e:
553
+ st.error(f"Error processing {pdf_file.name}: {str(e)}")
554
+ finally:
555
+ os.remove(temp_pdf_path)
556
+ output_filename = generate_filename("processed_pdf", "md")
557
+ with open(output_filename, "w", encoding="utf-8") as f:
558
+ f.write(combined_text)
559
+ st.success(f"PDF processing complete. MD file saved as {output_filename}")
560
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
561
+
562
+ # ----------------- TAB: Image Process -----------------
563
+ with tab_image_process:
564
+ st.header("Image Process")
565
+ st.subheader("Upload Images for GPT-based OCR")
566
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
567
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="img_gpt_model")
568
+ detail_level = st.selectbox("Detail Level", ["auto", "low", "high"], key="img_detail_level")
569
+ prompt_img = st.text_input("Enter prompt for image processing", "Extract the electronic text from image", key="img_process_prompt")
570
+ uploaded_images = st.file_uploader("Upload image files", type=["png", "jpg", "jpeg"], accept_multiple_files=True, key="image_process_uploader")
571
+ if st.button("Process Uploaded Images", key="process_images"):
572
+ combined_text = ""
573
  for img_file in uploaded_images:
574
+ try:
575
+ img = Image.open(img_file)
576
+ st.image(img, caption=img_file.name)
577
+ gpt_text = process_image_with_prompt(img, prompt_img, model=selected_gpt_model, detail=detail_level)
578
+ combined_text += f"\n## {img_file.name}\n\n{gpt_text}\n"
579
+ except Exception as e:
580
+ st.error(f"Error processing image {img_file.name}: {str(e)}")
581
+ output_filename = generate_filename("processed_image", "md")
582
+ with open(output_filename, "w", encoding="utf-8") as f:
583
+ f.write(combined_text)
584
+ st.success(f"Image processing complete. MD file saved as {output_filename}")
585
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
586
+
587
+ # ----------------- TAB: MD Gallery -----------------
588
+ with tab_md_gallery:
589
+ st.header("MD Gallery and GPT Processing")
590
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
591
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="md_gpt_model")
592
+ md_files = sorted(glob.glob("*.md"))
593
+ if md_files:
594
+ st.subheader("Individual File Processing")
595
+ cols = st.columns(2)
596
+ for idx, md_file in enumerate(md_files):
597
+ with cols[idx % 2]:
598
+ st.write(md_file)
599
+ if st.button(f"Process {md_file}", key=f"process_md_{md_file}"):
600
+ try:
601
+ with open(md_file, "r", encoding="utf-8") as f:
602
+ content = f.read()
603
+ prompt_md = "Summarize this into markdown outline with emojis and number the topics 1..12"
604
+ result_text = process_text_with_prompt(content, prompt_md, model=selected_gpt_model)
605
+ st.markdown(result_text)
606
+ output_filename = generate_filename(f"processed_{os.path.splitext(md_file)[0]}", "md")
607
+ with open(output_filename, "w", encoding="utf-8") as f:
608
+ f.write(result_text)
609
+ st.markdown(get_download_link(output_filename, "text/markdown", f"Download {output_filename}"), unsafe_allow_html=True)
610
+ except Exception as e:
611
+ st.error(f"Error processing {md_file}: {str(e)}")
612
+ st.subheader("Batch Processing")
613
+ st.write("Select MD files to combine and process:")
614
+ selected_md = {}
615
+ for md_file in md_files:
616
+ selected_md[md_file] = st.checkbox(md_file, key=f"checkbox_md_{md_file}")
617
+ batch_prompt = st.text_input("Enter batch processing prompt", "Summarize this into markdown outline with emojis and number the topics 1..12", key="batch_prompt")
618
+ if st.button("Process Selected MD Files", key="process_batch_md"):
619
+ combined_content = ""
620
+ for md_file, selected in selected_md.items():
621
+ if selected:
622
+ try:
623
+ with open(md_file, "r", encoding="utf-8") as f:
624
+ combined_content += f"\n## {md_file}\n" + f.read() + "\n"
625
+ except Exception as e:
626
+ st.error(f"Error reading {md_file}: {str(e)}")
627
+ if combined_content:
628
+ result_text = process_text_with_prompt(combined_content, batch_prompt, model=selected_gpt_model)
629
+ st.markdown(result_text)
630
+ output_filename = generate_filename("batch_processed_md", "md")
631
+ with open(output_filename, "w", encoding="utf-8") as f:
632
+ f.write(result_text)
633
+ st.success(f"Batch processing complete. MD file saved as {output_filename}")
634
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Batch Processed MD"), unsafe_allow_html=True)
635
  else:
636
+ st.warning("No MD files selected.")
637
+ else:
638
+ st.warning("No MD files found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
 
640
+ # ----------------- FINAL SIDEBAR UPDATE -----------------
641
+ # Update the asset gallery once (using its container).
642
  def update_gallery():
643
  container = st.session_state['asset_gallery_container']
644
+ container.empty() # Clear previous gallery content.
645
  all_files = get_gallery_files()
646
  if all_files:
647
  container.markdown("### Asset Gallery 📸📖")
648
  cols = container.columns(2)
649
  for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
650
  with cols[idx % 2]:
651
+ st.session_state['unique_counter'] += 1
652
+ unique_id = st.session_state['unique_counter']
653
  if file.endswith('.png'):
654
+ st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
655
+ else:
656
  doc = fitz.open(file)
657
  pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
658
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
659
+ st.image(img, caption=os.path.basename(file), use_container_width=True)
660
  doc.close()
661
+ checkbox_key = f"asset_{file}_{unique_id}"
662
+ st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
663
+ mime_type = "image/png" if file.endswith('.png') else "application/pdf"
664
+ st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
665
+ if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
666
  os.remove(file)
667
  st.session_state['asset_checkboxes'].pop(file, None)
668
+ st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
669
  st.experimental_rerun()
670
 
671
+ # Call the gallery update once after all tabs have been processed.
672
  update_gallery()
673
 
674
+ # Finally, update the Action Logs and History in the sidebar.
675
+ st.sidebar.subheader("Action Logs 📜")
676
+ for record in log_records:
677
+ st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
678
+
679
+ st.sidebar.subheader("History 📜")
680
+ for entry in st.session_state.get("history", []):
681
+ if entry is not None:
682
+ st.sidebar.write(entry)