awacke1 commited on
Commit
2cbf123
·
verified ·
1 Parent(s): 79b3496

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +658 -0
app.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiofiles
2
+ import asyncio
3
+ import base64
4
+ import fitz
5
+ import glob
6
+ import logging
7
+ import os
8
+ import pandas as pd
9
+ import pytz
10
+ import random
11
+ import re
12
+ import requests
13
+ import shutil
14
+ import streamlit as st
15
+ import time
16
+ import torch
17
+ import zipfile
18
+
19
+ from dataclasses import dataclass
20
+ from datetime import datetime
21
+ from diffusers import StableDiffusionPipeline
22
+ from io import BytesIO
23
+ from openai import OpenAI
24
+ from PIL import Image
25
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
26
+ from typing import Optional
27
+
28
+ # OpenAI client initialization
29
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
30
+
31
+ # Logging setup
32
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
33
+ logger = logging.getLogger(__name__)
34
+ log_records = []
35
+ class LogCaptureHandler(logging.Handler):
36
+ def emit(self, record):
37
+ log_records.append(record)
38
+ logger.addHandler(LogCaptureHandler())
39
+
40
+ # Streamlit configuration
41
+ st.set_page_config(
42
+ page_title="AI Vision & SFT Titans 🚀",
43
+ page_icon="🤖",
44
+ layout="wide",
45
+ initial_sidebar_state="expanded",
46
+ menu_items={
47
+ 'Get Help': 'https://huggingface.co/awacke1',
48
+ 'Report a Bug': 'https://huggingface.co/spaces/awacke1',
49
+ 'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, Custom Diffusion, and SFT on CPU! 🌌"
50
+ }
51
+ )
52
+
53
+ # Session state initialization
54
+ st.session_state.setdefault('history', [])
55
+ st.session_state.setdefault('builder', None)
56
+ st.session_state.setdefault('model_loaded', False)
57
+ st.session_state.setdefault('processing', {})
58
+ st.session_state.setdefault('asset_checkboxes', {})
59
+ st.session_state.setdefault('downloaded_pdfs', {})
60
+ st.session_state.setdefault('unique_counter', 0)
61
+ st.session_state.setdefault('selected_model_type', "Causal LM")
62
+ st.session_state.setdefault('selected_model', "None")
63
+ st.session_state.setdefault('cam0_file', None)
64
+ st.session_state.setdefault('cam1_file', None)
65
+ if 'asset_gallery_container' not in st.session_state:
66
+ st.session_state['asset_gallery_container'] = st.sidebar.empty()
67
+
68
+ @dataclass
69
+ class ModelConfig:
70
+ name: str
71
+ base_model: str
72
+ size: str
73
+ domain: Optional[str] = None
74
+ model_type: str = "causal_lm"
75
+ @property
76
+ def model_path(self):
77
+ return f"models/{self.name}"
78
+
79
+ @dataclass
80
+ class DiffusionConfig:
81
+ name: str
82
+ base_model: str
83
+ size: str
84
+ domain: Optional[str] = None
85
+ @property
86
+ def model_path(self):
87
+ return f"diffusion_models/{self.name}"
88
+
89
+ class ModelBuilder:
90
+ def __init__(self):
91
+ self.config = None
92
+ self.model = None
93
+ self.tokenizer = None
94
+ self.jokes = [
95
+ "Why did the AI go to therapy? Too many layers to unpack! 😂",
96
+ "Training complete! Time for a binary coffee break. ☕",
97
+ "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
98
+ "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
99
+ "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
100
+ ]
101
+ def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
102
+ with st.spinner(f"Loading {model_path}... ⏳"):
103
+ self.model = AutoModelForCausalLM.from_pretrained(model_path)
104
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
105
+ if self.tokenizer.pad_token is None:
106
+ self.tokenizer.pad_token = self.tokenizer.eos_token
107
+ if config:
108
+ self.config = config
109
+ self.model.to("cuda" if torch.cuda.is_available() else "cpu")
110
+ st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
111
+ return self
112
+ def save_model(self, path: str):
113
+ with st.spinner("Saving model... 💾"):
114
+ os.makedirs(os.path.dirname(path), exist_ok=True)
115
+ self.model.save_pretrained(path)
116
+ self.tokenizer.save_pretrained(path)
117
+ st.success(f"Model saved at {path}! ✅")
118
+
119
+ class DiffusionBuilder:
120
+ def __init__(self):
121
+ self.config = None
122
+ self.pipeline = None
123
+ def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
124
+ with st.spinner(f"Loading diffusion model {model_path}... ⏳"):
125
+ self.pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float32).to("cpu")
126
+ if config:
127
+ self.config = config
128
+ st.success("Diffusion model loaded! 🎨")
129
+ return self
130
+ def save_model(self, path: str):
131
+ with st.spinner("Saving diffusion model... 💾"):
132
+ os.makedirs(os.path.dirname(path), exist_ok=True)
133
+ self.pipeline.save_pretrained(path)
134
+ st.success(f"Diffusion model saved at {path}! ✅")
135
+ def generate(self, prompt: str):
136
+ return self.pipeline(prompt, num_inference_steps=20).images[0]
137
+
138
+ def generate_filename(sequence, ext="png"):
139
+ return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}"
140
+
141
+ def pdf_url_to_filename(url):
142
+ return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf"
143
+
144
+ def get_download_link(file_path, mime_type="application/pdf", label="Download"):
145
+ return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>'
146
+
147
+ def zip_directory(directory_path, zip_path):
148
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
149
+ [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
150
+ for root, _, files in os.walk(directory_path) for file in files]
151
+
152
+ def get_model_files(model_type="causal_lm"):
153
+ return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"]
154
+
155
+ def get_gallery_files(file_types=["png", "pdf"]):
156
+ return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))
157
+
158
+ def get_pdf_files():
159
+ return sorted(glob.glob("*.pdf"))
160
+
161
+ def download_pdf(url, output_path):
162
+ try:
163
+ response = requests.get(url, stream=True, timeout=10)
164
+ if response.status_code == 200:
165
+ with open(output_path, "wb") as f:
166
+ for chunk in response.iter_content(chunk_size=8192):
167
+ f.write(chunk)
168
+ ret = True
169
+ else:
170
+ ret = False
171
+ except requests.RequestException as e:
172
+ logger.error(f"Failed to download {url}: {e}")
173
+ ret = False
174
+ return ret
175
+
176
+ async def process_pdf_snapshot(pdf_path, mode="single"):
177
+ start_time = time.time()
178
+ status = st.empty()
179
+ status.text(f"Processing PDF Snapshot ({mode})... (0s)")
180
+ try:
181
+ doc = fitz.open(pdf_path)
182
+ output_files = []
183
+ if mode == "single":
184
+ page = doc[0]
185
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
186
+ output_file = generate_filename("single", "png")
187
+ pix.save(output_file)
188
+ output_files.append(output_file)
189
+ elif mode == "twopage":
190
+ for i in range(min(2, len(doc))):
191
+ page = doc[i]
192
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
193
+ output_file = generate_filename(f"twopage_{i}", "png")
194
+ pix.save(output_file)
195
+ output_files.append(output_file)
196
+ elif mode == "allpages":
197
+ for i in range(len(doc)):
198
+ page = doc[i]
199
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
200
+ output_file = generate_filename(f"page_{i}", "png")
201
+ pix.save(output_file)
202
+ output_files.append(output_file)
203
+ doc.close()
204
+ elapsed = int(time.time() - start_time)
205
+ status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
206
+ return output_files
207
+ except Exception as e:
208
+ status.error(f"Failed to process PDF: {str(e)}")
209
+ return []
210
+
211
+ async def process_ocr(image, output_file):
212
+ start_time = time.time()
213
+ status = st.empty()
214
+ status.text("Processing GOT-OCR2_0... (0s)")
215
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
216
+ # Force CPU usage to avoid CUDA error until GPU setup is fixed
217
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
218
+ temp_file = f"temp_{int(time.time())}.png"
219
+ image.save(temp_file)
220
+ result = model.chat(tokenizer, temp_file, ocr_type='ocr')
221
+ os.remove(temp_file)
222
+ elapsed = int(time.time() - start_time)
223
+ status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
224
+ async with aiofiles.open(output_file, "w") as f:
225
+ await f.write(result)
226
+ return result
227
+
228
+ async def process_image_gen(prompt, output_file):
229
+ start_time = time.time()
230
+ status = st.empty()
231
+ status.text("Processing Image Gen... (0s)")
232
+ pipeline = (st.session_state['builder'].pipeline
233
+ if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder)
234
+ and st.session_state['builder'].pipeline
235
+ else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu"))
236
+ gen_image = pipeline(prompt, num_inference_steps=20).images[0]
237
+ elapsed = int(time.time() - start_time)
238
+ status.text(f"Image Gen completed in {elapsed}s!")
239
+ gen_image.save(output_file)
240
+ return gen_image
241
+
242
+ def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
243
+ buffered = BytesIO()
244
+ image.save(buffered, format="PNG")
245
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
246
+ messages = [{
247
+ "role": "user",
248
+ "content": [
249
+ {"type": "text", "text": prompt},
250
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
251
+ ]
252
+ }]
253
+ try:
254
+ response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
255
+ return response.choices[0].message.content
256
+ except Exception as e:
257
+ return f"Error processing image with GPT: {str(e)}"
258
+
259
+ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
260
+ messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
261
+ try:
262
+ response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
263
+ return response.choices[0].message.content
264
+ except Exception as e:
265
+ return f"Error processing text with GPT: {str(e)}"
266
+
267
+ # Sidebar: Gallery Settings
268
+ st.sidebar.subheader("Gallery Settings")
269
+ st.session_state.setdefault('gallery_size', 2)
270
+ st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
271
+
272
+ # Tabs setup
273
+ tabs = st.tabs([
274
+ "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
275
+ "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
276
+ ])
277
+ (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
278
+
279
+ with tab_camera:
280
+ st.header("Camera Snap 📷")
281
+ st.subheader("Single Capture")
282
+ cols = st.columns(2)
283
+ with cols[0]:
284
+ cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
285
+ if cam0_img:
286
+ filename = generate_filename("cam0")
287
+ if st.session_state['cam0_file'] and os.path.exists(st.session_state['cam0_file']):
288
+ os.remove(st.session_state['cam0_file'])
289
+ with open(filename, "wb") as f:
290
+ f.write(cam0_img.getvalue())
291
+ st.session_state['cam0_file'] = filename
292
+ entry = f"Snapshot from Cam 0: {filename}"
293
+ st.session_state['history'].append(entry)
294
+ st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
295
+ logger.info(f"Saved snapshot from Camera 0: {filename}")
296
+ with cols[1]:
297
+ cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
298
+ if cam1_img:
299
+ filename = generate_filename("cam1")
300
+ if st.session_state['cam1_file'] and os.path.exists(st.session_state['cam1_file']):
301
+ os.remove(st.session_state['cam1_file'])
302
+ with open(filename, "wb") as f:
303
+ f.write(cam1_img.getvalue())
304
+ st.session_state['cam1_file'] = filename
305
+ entry = f"Snapshot from Cam 1: {filename}"
306
+ st.session_state['history'].append(entry)
307
+ st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
308
+ logger.info(f"Saved snapshot from Camera 1: {filename}")
309
+
310
+ with tab_download:
311
+ st.header("Download PDFs 📥")
312
+ if st.button("Examples 📚"):
313
+ example_urls = [
314
+ "https://arxiv.org/pdf/2308.03892",
315
+ "https://arxiv.org/pdf/1912.01703",
316
+ "https://arxiv.org/pdf/2408.11039",
317
+ "https://arxiv.org/pdf/2109.10282",
318
+ "https://arxiv.org/pdf/2112.10752",
319
+ "https://arxiv.org/pdf/2308.11236",
320
+ "https://arxiv.org/pdf/1706.03762",
321
+ "https://arxiv.org/pdf/2006.11239",
322
+ "https://arxiv.org/pdf/2305.11207",
323
+ "https://arxiv.org/pdf/2106.09685",
324
+ "https://arxiv.org/pdf/2005.11401",
325
+ "https://arxiv.org/pdf/2106.10504"
326
+ ]
327
+ st.session_state['pdf_urls'] = "\n".join(example_urls)
328
+ url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
329
+ if st.button("Robo-Download 🤖"):
330
+ urls = url_input.strip().split("\n")
331
+ progress_bar = st.progress(0)
332
+ status_text = st.empty()
333
+ total_urls = len(urls)
334
+ existing_pdfs = get_pdf_files()
335
+ for idx, url in enumerate(urls):
336
+ if url:
337
+ output_path = pdf_url_to_filename(url)
338
+ status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
339
+ if output_path not in existing_pdfs:
340
+ if download_pdf(url, output_path):
341
+ st.session_state['downloaded_pdfs'][url] = output_path
342
+ logger.info(f"Downloaded PDF from {url} to {output_path}")
343
+ entry = f"Downloaded PDF: {output_path}"
344
+ st.session_state['history'].append(entry)
345
+ st.session_state['asset_checkboxes'][output_path] = True
346
+ else:
347
+ st.error(f"Failed to nab {url} 😿")
348
+ else:
349
+ st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
350
+ st.session_state['downloaded_pdfs'][url] = output_path
351
+ progress_bar.progress((idx + 1) / total_urls)
352
+ status_text.text("Robo-Download complete! 🚀")
353
+ mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
354
+ if st.button("Snapshot Selected 📸"):
355
+ selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
356
+ if selected_pdfs:
357
+ for pdf_path in selected_pdfs:
358
+ if not os.path.exists(pdf_path):
359
+ st.warning(f"File not found: {pdf_path}. Skipping.")
360
+ continue
361
+ mode_key = {"Single Page (High-Res)": "single",
362
+ "Two Pages (High-Res)": "twopage",
363
+ "All Pages (High-Res)": "allpages"}[mode]
364
+ snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
365
+ for snapshot in snapshots:
366
+ st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
367
+ st.session_state['asset_checkboxes'][snapshot] = True
368
+ else:
369
+ st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
370
+
371
+ with tab_ocr:
372
+ st.header("Test OCR 🔍")
373
+ all_files = get_gallery_files()
374
+ if all_files:
375
+ if st.button("OCR All Assets 🚀"):
376
+ full_text = "# OCR Results\n\n"
377
+ for file in all_files:
378
+ if file.endswith('.png'):
379
+ image = Image.open(file)
380
+ else:
381
+ doc = fitz.open(file)
382
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
383
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
384
+ doc.close()
385
+ output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
386
+ result = asyncio.run(process_ocr(image, output_file))
387
+ full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
388
+ entry = f"OCR Test: {file} -> {output_file}"
389
+ st.session_state['history'].append(entry)
390
+ md_output_file = f"full_ocr_{int(time.time())}.md"
391
+ with open(md_output_file, "w") as f:
392
+ f.write(full_text)
393
+ st.success(f"Full OCR saved to {md_output_file}")
394
+ st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
395
+ selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
396
+ if selected_file:
397
+ if selected_file.endswith('.png'):
398
+ image = Image.open(selected_file)
399
+ else:
400
+ doc = fitz.open(selected_file)
401
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
402
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
403
+ doc.close()
404
+ st.image(image, caption="Input Image", use_container_width=True)
405
+ if st.button("Run OCR 🚀", key="ocr_run"):
406
+ output_file = generate_filename("ocr_output", "txt")
407
+ st.session_state['processing']['ocr'] = True
408
+ result = asyncio.run(process_ocr(image, output_file))
409
+ entry = f"OCR Test: {selected_file} -> {output_file}"
410
+ st.session_state['history'].append(entry)
411
+ st.text_area("OCR Result", result, height=200, key="ocr_result")
412
+ st.success(f"OCR output saved to {output_file}")
413
+ st.session_state['processing']['ocr'] = False
414
+ if selected_file.endswith('.pdf') and st.button("OCR All Pages 🚀", key="ocr_all_pages"):
415
+ doc = fitz.open(selected_file)
416
+ full_text = f"# OCR Results for {os.path.basename(selected_file)}\n\n"
417
+ for i in range(len(doc)):
418
+ pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
419
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
420
+ output_file = generate_filename(f"ocr_page_{i}", "txt")
421
+ result = asyncio.run(process_ocr(image, output_file))
422
+ full_text += f"## Page {i + 1}\n\n{result}\n\n"
423
+ entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
424
+ st.session_state['history'].append(entry)
425
+ md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
426
+ with open(md_output_file, "w") as f:
427
+ f.write(full_text)
428
+ st.success(f"Full OCR saved to {md_output_file}")
429
+ st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
430
+ else:
431
+ st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
432
+
433
+ with tab_build:
434
+ st.header("Build Titan 🌱")
435
+ model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
436
+ base_model = st.selectbox(
437
+ "Select Tiny Model",
438
+ ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
439
+ else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
440
+ )
441
+ model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
442
+ domain = st.text_input("Target Domain", "general")
443
+ if st.button("Download Model ⬇️"):
444
+ config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(
445
+ name=model_name, base_model=base_model, size="small", domain=domain
446
+ )
447
+ builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
448
+ builder.load_model(base_model, config)
449
+ builder.save_model(config.model_path)
450
+ st.session_state['builder'] = builder
451
+ st.session_state['model_loaded'] = True
452
+ st.session_state['selected_model_type'] = model_type
453
+ st.session_state['selected_model'] = config.model_path
454
+ entry = f"Built {model_type} model: {model_name}"
455
+ st.session_state['history'].append(entry)
456
+ st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
457
+ st.experimental_rerun()
458
+
459
+ with tab_imggen:
460
+ st.header("Test Image Gen 🎨")
461
+ all_files = get_gallery_files()
462
+ if all_files:
463
+ selected_file = st.selectbox("Select Image or PDF", all_files, key="gen_select")
464
+ if selected_file:
465
+ if selected_file.endswith('.png'):
466
+ image = Image.open(selected_file)
467
+ else:
468
+ doc = fitz.open(selected_file)
469
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
470
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
471
+ doc.close()
472
+ st.image(image, caption="Reference Image", use_container_width=True)
473
+ prompt = st.text_area("Prompt", "Generate a neon superhero version of this image", key="gen_prompt")
474
+ if st.button("Run Image Gen 🚀", key="gen_run"):
475
+ output_file = generate_filename("gen_output", "png")
476
+ st.session_state['processing']['gen'] = True
477
+ result = asyncio.run(process_image_gen(prompt, output_file))
478
+ entry = f"Image Gen Test: {prompt} -> {output_file}"
479
+ st.session_state['history'].append(entry)
480
+ st.image(result, caption="Generated Image", use_container_width=True)
481
+ st.success(f"Image saved to {output_file}")
482
+ st.session_state['processing']['gen'] = False
483
+ else:
484
+ st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
485
+
486
+ with tab_pdf_process:
487
+ st.header("PDF Process")
488
+ st.subheader("Upload PDFs for GPT-based text extraction")
489
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
490
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="pdf_gpt_model")
491
+ detail_level = st.selectbox("Detail Level", ["auto", "low", "high"], key="pdf_detail_level")
492
+ uploaded_pdfs = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True, key="pdf_process_uploader")
493
+ view_mode = st.selectbox("View Mode", ["Single Page", "Double Page"], key="pdf_view_mode")
494
+ if st.button("Process Uploaded PDFs", key="process_pdfs"):
495
+ combined_text = ""
496
+ for pdf_file in uploaded_pdfs:
497
+ pdf_bytes = pdf_file.read()
498
+ temp_pdf_path = f"temp_{pdf_file.name}"
499
+ with open(temp_pdf_path, "wb") as f:
500
+ f.write(pdf_bytes)
501
+ try:
502
+ doc = fitz.open(temp_pdf_path)
503
+ st.write(f"Processing {pdf_file.name} with {len(doc)} pages")
504
+ if view_mode == "Single Page":
505
+ for i, page in enumerate(doc):
506
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
507
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
508
+ st.image(img, caption=f"{pdf_file.name} Page {i+1}")
509
+ gpt_text = process_image_with_prompt(img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
510
+ combined_text += f"\n## {pdf_file.name} - Page {i+1}\n\n{gpt_text}\n"
511
+ else:
512
+ pages = list(doc)
513
+ for i in range(0, len(pages), 2):
514
+ if i+1 < len(pages):
515
+ pix1 = pages[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
516
+ img1 = Image.frombytes("RGB", [pix1.width, pix1.height], pix1.samples)
517
+ pix2 = pages[i+1].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
518
+ img2 = Image.frombytes("RGB", [pix2.width, pix2.height], pix2.samples)
519
+ total_width = img1.width + img2.width
520
+ max_height = max(img1.height, img2.height)
521
+ combined_img = Image.new("RGB", (total_width, max_height))
522
+ combined_img.paste(img1, (0, 0))
523
+ combined_img.paste(img2, (img1.width, 0))
524
+ st.image(combined_img, caption=f"{pdf_file.name} Pages {i+1}-{i+2}")
525
+ gpt_text = process_image_with_prompt(combined_img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
526
+ combined_text += f"\n## {pdf_file.name} - Pages {i+1}-{i+2}\n\n{gpt_text}\n"
527
+ else:
528
+ pix = pages[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
529
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
530
+ st.image(img, caption=f"{pdf_file.name} Page {i+1}")
531
+ gpt_text = process_image_with_prompt(img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
532
+ combined_text += f"\n## {pdf_file.name} - Page {i+1}\n\n{gpt_text}\n"
533
+ doc.close()
534
+ except Exception as e:
535
+ st.error(f"Error processing {pdf_file.name}: {str(e)}")
536
+ finally:
537
+ os.remove(temp_pdf_path)
538
+ output_filename = generate_filename("processed_pdf", "md")
539
+ with open(output_filename, "w", encoding="utf-8") as f:
540
+ f.write(combined_text)
541
+ st.success(f"PDF processing complete. MD file saved as {output_filename}")
542
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
543
+
544
+ with tab_image_process:
545
+ st.header("Image Process")
546
+ st.subheader("Upload Images for GPT-based OCR")
547
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
548
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="img_gpt_model")
549
+ detail_level = st.selectbox("Detail Level", ["auto", "low", "high"], key="img_detail_level")
550
+ prompt_img = st.text_input("Enter prompt for image processing", "Extract the electronic text from image", key="img_process_prompt")
551
+ uploaded_images = st.file_uploader("Upload image files", type=["png", "jpg", "jpeg"], accept_multiple_files=True, key="image_process_uploader")
552
+ if st.button("Process Uploaded Images", key="process_images"):
553
+ combined_text = ""
554
+ for img_file in uploaded_images:
555
+ try:
556
+ img = Image.open(img_file)
557
+ st.image(img, caption=img_file.name)
558
+ gpt_text = process_image_with_prompt(img, prompt_img, model=selected_gpt_model, detail=detail_level)
559
+ combined_text += f"\n## {img_file.name}\n\n{gpt_text}\n"
560
+ except Exception as e:
561
+ st.error(f"Error processing image {img_file.name}: {str(e)}")
562
+ output_filename = generate_filename("processed_image", "md")
563
+ with open(output_filename, "w", encoding="utf-8") as f:
564
+ f.write(combined_text)
565
+ st.success(f"Image processing complete. MD file saved as {output_filename}")
566
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
567
+
568
+ with tab_md_gallery:
569
+ st.header("MD Gallery and GPT Processing")
570
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
571
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="md_gpt_model")
572
+ md_files = sorted(glob.glob("*.md"))
573
+ if md_files:
574
+ st.subheader("Individual File Processing")
575
+ cols = st.columns(2)
576
+ for idx, md_file in enumerate(md_files):
577
+ with cols[idx % 2]:
578
+ st.write(md_file)
579
+ if st.button(f"Process {md_file}", key=f"process_md_{md_file}"):
580
+ try:
581
+ with open(md_file, "r", encoding="utf-8") as f:
582
+ content = f.read()
583
+ prompt_md = "Summarize this into markdown outline with emojis and number the topics 1..12"
584
+ result_text = process_text_with_prompt(content, prompt_md, model=selected_gpt_model)
585
+ st.markdown(result_text)
586
+ output_filename = generate_filename(f"processed_{os.path.splitext(md_file)[0]}", "md")
587
+ with open(output_filename, "w", encoding="utf-8") as f:
588
+ f.write(result_text)
589
+ st.markdown(get_download_link(output_filename, "text/markdown", f"Download {output_filename}"), unsafe_allow_html=True)
590
+ except Exception as e:
591
+ st.error(f"Error processing {md_file}: {str(e)}")
592
+ st.subheader("Batch Processing")
593
+ st.write("Select MD files to combine and process:")
594
+ selected_md = {}
595
+ for md_file in md_files:
596
+ selected_md[md_file] = st.checkbox(md_file, key=f"checkbox_md_{md_file}")
597
+ batch_prompt = st.text_input("Enter batch processing prompt", "Summarize this into markdown outline with emojis and number the topics 1..12", key="batch_prompt")
598
+ if st.button("Process Selected MD Files", key="process_batch_md"):
599
+ combined_content = ""
600
+ for md_file, selected in selected_md.items():
601
+ if selected:
602
+ try:
603
+ with open(md_file, "r", encoding="utf-8") as f:
604
+ combined_content += f"\n## {md_file}\n" + f.read() + "\n"
605
+ except Exception as e:
606
+ st.error(f"Error reading {md_file}: {str(e)}")
607
+ if combined_content:
608
+ result_text = process_text_with_prompt(combined_content, batch_prompt, model=selected_gpt_model)
609
+ st.markdown(result_text)
610
+ output_filename = generate_filename("batch_processed_md", "md")
611
+ with open(output_filename, "w", encoding="utf-8") as f:
612
+ f.write(result_text)
613
+ st.success(f"Batch processing complete. MD file saved as {output_filename}")
614
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Batch Processed MD"), unsafe_allow_html=True)
615
+ else:
616
+ st.warning("No MD files selected.")
617
+ else:
618
+ st.warning("No MD files found.")
619
+
620
+ def update_gallery():
621
+ container = st.session_state['asset_gallery_container']
622
+ container.empty()
623
+ all_files = get_gallery_files()
624
+ if all_files:
625
+ container.markdown("### Asset Gallery 📸📖")
626
+ cols = container.columns(2)
627
+ for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
628
+ with cols[idx % 2]:
629
+ st.session_state['unique_counter'] += 1
630
+ unique_id = st.session_state['unique_counter']
631
+ if file.endswith('.png'):
632
+ st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
633
+ else:
634
+ doc = fitz.open(file)
635
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
636
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
637
+ st.image(img, caption=os.path.basename(file), use_container_width=True)
638
+ doc.close()
639
+ checkbox_key = f"asset_{file}_{unique_id}"
640
+ st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
641
+ mime_type = "image/png" if file.endswith('.png') else "application/pdf"
642
+ st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
643
+ if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
644
+ os.remove(file)
645
+ st.session_state['asset_checkboxes'].pop(file, None)
646
+ st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
647
+ st.experimental_rerun()
648
+
649
+ update_gallery()
650
+
651
+ st.sidebar.subheader("Action Logs 📜")
652
+ for record in log_records:
653
+ st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
654
+
655
+ st.sidebar.subheader("History 📜")
656
+ for entry in st.session_state.get("history", []):
657
+ if entry is not None:
658
+ st.sidebar.write(entry)