awacke1 commited on
Commit
4301b1c
·
verified ·
1 Parent(s): a57b24e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -146
app.py CHANGED
@@ -53,23 +53,24 @@ st.set_page_config(
53
  }
54
  )
55
 
56
- st.session_state.setdefault('history', []) # 🌱 History: starting fresh if empty!
57
- st.session_state.setdefault('builder', None) # 🛠️ Builder: set up the builder if it's missing!
58
- st.session_state.setdefault('model_loaded', False) # 🚦 Model Loaded: mark as not loaded by default!
59
- st.session_state.setdefault('processing', {}) # Processing: initialize processing state as an empty dict!
60
- st.session_state.setdefault('asset_checkboxes', {}) # ✅ Asset Checkboxes: default to an empty dictionary!
61
- st.session_state.setdefault('downloaded_pdfs', {}) # 📄 Downloaded PDFs: start with no PDFs downloaded!
62
- st.session_state.setdefault('unique_counter', 0) # 🔢 Unique Counter: initialize the counter to zero!
63
- st.session_state.setdefault('selected_model_type', "Causal LM") # 🧠 Selected Model Type: default to "Causal LM"!
64
- st.session_state.setdefault('selected_model', "None") # 🤖 Selected Model: set to "None" if not already set!
65
- st.session_state.setdefault('cam0_file', None) # 📸 Cam0 File: no file loaded by default!
66
- st.session_state.setdefault('cam1_file', None) # 📸 Cam1 File: no file loaded by default!
67
-
68
- # Create a single placeholder for the asset gallery in the sidebar.
 
69
  if 'asset_gallery_container' not in st.session_state:
70
  st.session_state['asset_gallery_container'] = st.sidebar.empty()
71
 
72
- @dataclass # 🎨 ModelConfig: A blueprint for model configurations!
73
  class ModelConfig:
74
  name: str
75
  base_model: str
@@ -77,46 +78,48 @@ class ModelConfig:
77
  domain: Optional[str] = None
78
  model_type: str = "causal_lm"
79
  @property
80
- def model_path(self): return f"models/{self.name}" # 🚀 Model Path: Home base for brilliance!
 
81
 
82
- @dataclass # 🎨 DiffusionConfig: Where diffusion magic takes shape!
83
  class DiffusionConfig:
84
  name: str
85
  base_model: str
86
  size: str
87
  domain: Optional[str] = None
88
  @property
89
- def model_path(self): return f"diffusion_models/{self.name}" # 🚀 Diffusion Path: Let the diffusion begin!
90
-
91
- class ModelBuilder: # 🔧 ModelBuilder: Crafting AI wonders with wit!
92
- def __init__(self): # 🚀 Initialize: Setting up the AI factory!
93
- self.config = None # No config yet—waiting for genius!
94
- self.model = None # Model not built until the magic happens!
95
- self.tokenizer = None # Tokenizer: Ready to speak in AI!
96
- self.jokes = [ # 🤣 Jokes to keep the circuits laughing!
 
97
  "Why did the AI go to therapy? Too many layers to unpack! 😂",
98
  "Training complete! Time for a binary coffee break. ☕",
99
  "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
100
  "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
101
  "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
102
  ]
103
- def load_model(self, model_path: str, config: Optional[ModelConfig] = None): # 🔄 load_model: Booting up genius!
104
- with st.spinner(f"Loading {model_path}... ⏳"): # ⏳ Spinner: Genius loading...
105
  self.model = AutoModelForCausalLM.from_pretrained(model_path)
106
  self.tokenizer = AutoTokenizer.from_pretrained(model_path)
107
- if self.tokenizer.pad_token is None:
108
- self.tokenizer.pad_token = self.tokenizer.eos_token # 🔧 Fix pad token if missing!
109
- if config:
110
- self.config = config # 🛠️ Config loaded—setting the stage!
111
- self.model.to("cuda" if torch.cuda.is_available() else "cpu") # 💻 Deploying the model to its device!
112
- st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}") # 🎉 Success: Model is now in orbit!
113
  return self
114
- def save_model(self, path: str): # 💾 save_model: Securing your masterpiece!
115
- with st.spinner("Saving model... 💾"): # ⏳ Spinner: Saving brilliance...
116
  os.makedirs(os.path.dirname(path), exist_ok=True)
117
  self.model.save_pretrained(path)
118
- self.tokenizer.save_pretrained(path) # 📂 Directory magic: Creating and saving!
119
- st.success(f"Model saved at {path}! ✅") # ✅ Success: Your model is safely stored!
120
 
121
  class DiffusionBuilder:
122
  def __init__(self):
@@ -137,32 +140,31 @@ class DiffusionBuilder:
137
  def generate(self, prompt: str):
138
  return self.pipeline(prompt, num_inference_steps=20).images[0]
139
 
140
- def generate_filename(sequence, ext="png"):
141
- return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}" # ⏳ Generate filename with timestamp magic!
142
 
143
  def pdf_url_to_filename(url):
144
- return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf" # 📄 Convert URL to a safe PDF filename – no hackers allowed!
145
 
146
  def get_download_link(file_path, mime_type="application/pdf", label="Download"):
147
- return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>' # 🔗 Create a download link – click it like it's hot!
148
 
149
- def zip_directory(directory_path, zip_path):
150
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
151
  [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
152
- for root, _, files in os.walk(directory_path) for file in files] # 🎁 Zip directory: Packing files faster than Santa on Christmas Eve!
153
 
154
  def get_model_files(model_type="causal_lm"):
155
- return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"] # 📂 Get model files: Hunting directories like a pro!
156
 
157
  def get_gallery_files(file_types=["png", "pdf"]):
158
- return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")})) # 🖼️ Get gallery files: Finding art in a digital haystack!
159
 
160
  def get_pdf_files():
161
- return sorted(glob.glob("*.pdf")) # 📄 Get PDF files: Sorted and served – no paper cuts here!
162
 
163
- # 📥 Download PDF: Delivering docs faster than a caffeinated courier!
164
  def download_pdf(url, output_path):
165
- try:
166
  response = requests.get(url, stream=True, timeout=10)
167
  if response.status_code == 200:
168
  with open(output_path, "wb") as f:
@@ -171,13 +173,13 @@ def download_pdf(url, output_path):
171
  ret = True
172
  else:
173
  ret = False
174
- except requests.RequestException as e:
175
  logger.error(f"Failed to download {url}: {e}")
176
  ret = False
177
- return ret
178
 
179
- # 📚 Async PDF Snapshot: Snap your PDF pages without blocking—juggle pages like a ninja! 🥷
180
- async def process_pdf_snapshot(pdf_path, mode="single"):
181
  start_time = time.time()
182
  status = st.empty()
183
  status.text(f"Processing PDF Snapshot ({mode})... (0s)")
@@ -207,14 +209,13 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
207
  doc.close()
208
  elapsed = int(time.time() - start_time)
209
  status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
210
- update_gallery()
211
  return output_files
212
  except Exception as e:
213
  status.error(f"Failed to process PDF: {str(e)}")
214
  return []
215
 
216
- # 😎 Async OCR: Convert images to text while your app keeps on groovin'—no blocking, just rocking! 🎸
217
- async def process_ocr(image, output_file):
218
  start_time = time.time()
219
  status = st.empty()
220
  status.text("Processing GOT-OCR2_0... (0s)")
@@ -228,97 +229,69 @@ async def process_ocr(image, output_file):
228
  status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
229
  async with aiofiles.open(output_file, "w") as f:
230
  await f.write(result)
231
- update_gallery()
232
  return result
233
 
234
- # 🧞 Async Image Gen: Your image genie—wishing up pictures while the event loop keeps the party going! 🎉
235
- async def process_image_gen(prompt, output_file):
236
  start_time = time.time()
237
  status = st.empty()
238
  status.text("Processing Image Gen... (0s)")
239
- pipeline = st.session_state['builder'].pipeline if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder) and st.session_state['builder'].pipeline else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
 
 
 
240
  gen_image = pipeline(prompt, num_inference_steps=20).images[0]
241
  elapsed = int(time.time() - start_time)
242
  status.text(f"Image Gen completed in {elapsed}s!")
243
  gen_image.save(output_file)
244
- update_gallery()
245
  return gen_image
246
 
247
- # 🖼️ GPT-Image Interpreter: Turning pixels into prose!
248
- def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
249
  buffered = BytesIO()
250
- image.save(buffered, format="PNG") # 💾 Save the image in-memory as PNG—no hard drives harmed!
251
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") # 🔐 Encode image data in Base64 for secure, inline transmission!
252
- messages = [{"role": "user", "content": [
253
- {"type": "text", "text": prompt},
254
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
255
- ]}]
 
 
 
256
  try:
257
  response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
258
  return response.choices[0].message.content
259
  except Exception as e:
260
  return f"Error processing image with GPT: {str(e)}"
261
 
262
- # 📝 GPT-Text Alchemist: Merging your prompt and text into digital gold!
263
- def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
264
  messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
265
- try:
266
  response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
267
  return response.choices[0].message.content
268
  except Exception as e:
269
  return f"Error processing text with GPT: {str(e)}"
270
 
271
- st.sidebar.subheader("Gallery Settings") # 🎨 Sidebar Gallery: Customize your creative space!
272
- st.session_state.setdefault('gallery_size', 2) # 🔧 Setting default gallery size to 2 if it's missing!
273
- st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider") # 🎚️ Slide to adjust your gallery size and bring balance to your art!
274
 
275
- # 📸 Gallery Updater: Making your assets dazzle and disappear faster than a magician's rabbit! 🐇✨
276
- def update_gallery():
277
- container = st.session_state['asset_gallery_container']
278
- container.empty() # Clear previous gallery content
279
- all_files = get_gallery_files() # 🔍 Grab all gallery files like a digital treasure hunt!
280
- if all_files:
281
- with container:
282
- st.sidebar.subheader("Asset Gallery 📸📖")
283
- cols = st.sidebar.columns(2)
284
- for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
285
- with cols[idx % 2]:
286
- st.session_state['unique_counter'] += 1
287
- unique_id = st.session_state['unique_counter']
288
- if file.endswith('.png'):
289
- st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
290
- else:
291
- doc = fitz.open(file)
292
- pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
293
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
294
- st.image(img, caption=os.path.basename(file), use_container_width=True)
295
- doc.close()
296
- checkbox_key = f"asset_{file}_{unique_id}"
297
- st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
298
- mime_type = "image/png" if file.endswith('.png') else "application/pdf"
299
- st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
300
- if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
301
- os.remove(file)
302
- st.session_state['asset_checkboxes'].pop(file, None)
303
- st.sidebar.success(f"Asset {os.path.basename(file)} vaporized! 💨")
304
- st.rerun()
305
-
306
- st.sidebar.subheader("Action Logs 📜") # 📝 Action Logs: Where our system whispers its secrets!
307
- with st.sidebar:
308
- [st.write(f"{record.asctime} - {record.levelname} - {record.message}") for record in log_records]
309
-
310
- st.sidebar.subheader("History 📜") # 🕰️ History: A walk down memory lane, one log at a time!
311
- with st.sidebar:
312
- [st.write(entry) for entry in st.session_state['history']]
313
-
314
- tabs = st.tabs(["Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱", "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"])
315
  (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
316
 
 
317
  with tab_camera:
318
- st.header("Camera Snap 📷") # 🎥 Header: Let’s capture those Kodak moments!
319
- st.subheader("Single Capture") # 📸 Subheader: One snap at a time, no double exposure!
320
  cols = st.columns(2)
321
-
322
  with cols[0]:
323
  cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
324
  if cam0_img:
@@ -329,12 +302,9 @@ with tab_camera:
329
  f.write(cam0_img.getvalue())
330
  st.session_state['cam0_file'] = filename
331
  entry = f"Snapshot from Cam 0: {filename}"
332
- if entry not in st.session_state['history']:
333
- st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 0:")] + [entry]
334
  st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
335
  logger.info(f"Saved snapshot from Camera 0: {filename}")
336
- update_gallery()
337
-
338
  with cols[1]:
339
  cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
340
  if cam1_img:
@@ -345,12 +315,11 @@ with tab_camera:
345
  f.write(cam1_img.getvalue())
346
  st.session_state['cam1_file'] = filename
347
  entry = f"Snapshot from Cam 1: {filename}"
348
- if entry not in st.session_state['history']:
349
- st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 1:")] + [entry]
350
  st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
351
  logger.info(f"Saved snapshot from Camera 1: {filename}")
352
- update_gallery()
353
 
 
354
  with tab_download:
355
  st.header("Download PDFs 📥")
356
  if st.button("Examples 📚"):
@@ -369,7 +338,6 @@ with tab_download:
369
  "https://arxiv.org/pdf/2106.10504"
370
  ]
371
  st.session_state['pdf_urls'] = "\n".join(example_urls)
372
-
373
  url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
374
  if st.button("Robo-Download 🤖"):
375
  urls = url_input.strip().split("\n")
@@ -386,8 +354,7 @@ with tab_download:
386
  st.session_state['downloaded_pdfs'][url] = output_path
387
  logger.info(f"Downloaded PDF from {url} to {output_path}")
388
  entry = f"Downloaded PDF: {output_path}"
389
- if entry not in st.session_state['history']:
390
- st.session_state['history'].append(entry)
391
  st.session_state['asset_checkboxes'][output_path] = True
392
  else:
393
  st.error(f"Failed to nab {url} 😿")
@@ -396,8 +363,6 @@ with tab_download:
396
  st.session_state['downloaded_pdfs'][url] = output_path
397
  progress_bar.progress((idx + 1) / total_urls)
398
  status_text.text("Robo-Download complete! 🚀")
399
- update_gallery()
400
-
401
  mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
402
  if st.button("Snapshot Selected 📸"):
403
  selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
@@ -406,15 +371,18 @@ with tab_download:
406
  if not os.path.exists(pdf_path):
407
  st.warning(f"File not found: {pdf_path}. Skipping.")
408
  continue
409
- mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (High-Res)": "allpages"}[mode]
 
 
410
  snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
411
  for snapshot in snapshots:
412
  st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
413
  st.session_state['asset_checkboxes'][snapshot] = True
414
- update_gallery()
415
  else:
416
  st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
417
 
 
418
  with tab_ocr:
419
  st.header("Test OCR 🔍")
420
  all_files = get_gallery_files()
@@ -433,8 +401,7 @@ with tab_ocr:
433
  result = asyncio.run(process_ocr(image, output_file))
434
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
435
  entry = f"OCR Test: {file} -> {output_file}"
436
- if entry not in st.session_state['history']:
437
- st.session_state['history'].append(entry)
438
  md_output_file = f"full_ocr_{int(time.time())}.md"
439
  with open(md_output_file, "w") as f:
440
  f.write(full_text)
@@ -455,8 +422,7 @@ with tab_ocr:
455
  st.session_state['processing']['ocr'] = True
456
  result = asyncio.run(process_ocr(image, output_file))
457
  entry = f"OCR Test: {selected_file} -> {output_file}"
458
- if entry not in st.session_state['history']:
459
- st.session_state['history'].append(entry)
460
  st.text_area("OCR Result", result, height=200, key="ocr_result")
461
  st.success(f"OCR output saved to {output_file}")
462
  st.session_state['processing']['ocr'] = False
@@ -470,8 +436,7 @@ with tab_ocr:
470
  result = asyncio.run(process_ocr(image, output_file))
471
  full_text += f"## Page {i + 1}\n\n{result}\n\n"
472
  entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
473
- if entry not in st.session_state['history']:
474
- st.session_state['history'].append(entry)
475
  md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
476
  with open(md_output_file, "w") as f:
477
  f.write(full_text)
@@ -480,12 +445,13 @@ with tab_ocr:
480
  else:
481
  st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
482
 
 
483
  with tab_build:
484
  st.header("Build Titan 🌱")
485
  model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
486
  base_model = st.selectbox(
487
  "Select Tiny Model",
488
- ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
489
  else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
490
  )
491
  model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
@@ -495,14 +461,18 @@ with tab_build:
495
  name=model_name, base_model=base_model, size="small", domain=domain
496
  )
497
  builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
498
- builder.load_model(base_model, config); builder.save_model(config.model_path)
499
- st.session_state['builder'] = builder; st.session_state['model_loaded'] = True
500
- st.session_state['selected_model_type'] = model_type; st.session_state['selected_model'] = config.model_path
 
 
 
501
  entry = f"Built {model_type} model: {model_name}"
502
- if entry not in st.session_state['history']:
503
- st.session_state['history'].append(entry)
504
- st.success(f"Model downloaded and saved to {config.model_path}! 🎉"); st.rerun()
505
 
 
506
  with tab_imggen:
507
  st.header("Test Image Gen 🎨")
508
  all_files = get_gallery_files()
@@ -523,15 +493,14 @@ with tab_imggen:
523
  st.session_state['processing']['gen'] = True
524
  result = asyncio.run(process_image_gen(prompt, output_file))
525
  entry = f"Image Gen Test: {prompt} -> {output_file}"
526
- if entry not in st.session_state['history']:
527
- st.session_state['history'].append(entry)
528
  st.image(result, caption="Generated Image", use_container_width=True)
529
  st.success(f"Image saved to {output_file}")
530
  st.session_state['processing']['gen'] = False
531
  else:
532
  st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
533
- update_gallery()
534
 
 
535
  with tab_pdf_process:
536
  st.header("PDF Process")
537
  st.subheader("Upload PDFs for GPT-based text extraction")
@@ -590,6 +559,7 @@ with tab_pdf_process:
590
  st.success(f"PDF processing complete. MD file saved as {output_filename}")
591
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
592
 
 
593
  with tab_image_process:
594
  st.header("Image Process")
595
  st.subheader("Upload Images for GPT-based OCR")
@@ -614,6 +584,7 @@ with tab_image_process:
614
  st.success(f"Image processing complete. MD file saved as {output_filename}")
615
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
616
 
 
617
  with tab_md_gallery:
618
  st.header("MD Gallery and GPT Processing")
619
  gpt_models = ["gpt-4o", "gpt-4o-mini"]
@@ -665,3 +636,47 @@ with tab_md_gallery:
665
  st.warning("No MD files selected.")
666
  else:
667
  st.warning("No MD files found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  )
55
 
56
+ # Set up default session state values.
57
+ st.session_state.setdefault('history', []) # History: starting fresh if empty!
58
+ st.session_state.setdefault('builder', None) # Builder: set up if missing.
59
+ st.session_state.setdefault('model_loaded', False) # Model Loaded: not loaded by default.
60
+ st.session_state.setdefault('processing', {}) # Processing: initialize as an empty dict.
61
+ st.session_state.setdefault('asset_checkboxes', {}) # Asset Checkboxes: default to an empty dict.
62
+ st.session_state.setdefault('downloaded_pdfs', {}) # Downloaded PDFs: start with none.
63
+ st.session_state.setdefault('unique_counter', 0) # Unique Counter: initialize to zero.
64
+ st.session_state.setdefault('selected_model_type', "Causal LM")
65
+ st.session_state.setdefault('selected_model', "None")
66
+ st.session_state.setdefault('cam0_file', None)
67
+ st.session_state.setdefault('cam1_file', None)
68
+
69
+ # Create a single container for the asset gallery in the sidebar.
70
  if 'asset_gallery_container' not in st.session_state:
71
  st.session_state['asset_gallery_container'] = st.sidebar.empty()
72
 
73
+ @dataclass # ModelConfig: A blueprint for model configurations.
74
  class ModelConfig:
75
  name: str
76
  base_model: str
 
78
  domain: Optional[str] = None
79
  model_type: str = "causal_lm"
80
  @property
81
+ def model_path(self):
82
+ return f"models/{self.name}"
83
 
84
+ @dataclass # DiffusionConfig: Where diffusion magic takes shape.
85
  class DiffusionConfig:
86
  name: str
87
  base_model: str
88
  size: str
89
  domain: Optional[str] = None
90
  @property
91
+ def model_path(self):
92
+ return f"diffusion_models/{self.name}"
93
+
94
+ class ModelBuilder:
95
+ def __init__(self):
96
+ self.config = None
97
+ self.model = None
98
+ self.tokenizer = None
99
+ self.jokes = [
100
  "Why did the AI go to therapy? Too many layers to unpack! 😂",
101
  "Training complete! Time for a binary coffee break. ☕",
102
  "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
103
  "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
104
  "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
105
  ]
106
+ def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
107
+ with st.spinner(f"Loading {model_path}... ⏳"):
108
  self.model = AutoModelForCausalLM.from_pretrained(model_path)
109
  self.tokenizer = AutoTokenizer.from_pretrained(model_path)
110
+ if self.tokenizer.pad_token is None:
111
+ self.tokenizer.pad_token = self.tokenizer.eos_token
112
+ if config:
113
+ self.config = config
114
+ self.model.to("cuda" if torch.cuda.is_available() else "cpu")
115
+ st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
116
  return self
117
+ def save_model(self, path: str):
118
+ with st.spinner("Saving model... 💾"):
119
  os.makedirs(os.path.dirname(path), exist_ok=True)
120
  self.model.save_pretrained(path)
121
+ self.tokenizer.save_pretrained(path)
122
+ st.success(f"Model saved at {path}! ✅")
123
 
124
  class DiffusionBuilder:
125
  def __init__(self):
 
140
  def generate(self, prompt: str):
141
  return self.pipeline(prompt, num_inference_steps=20).images[0]
142
 
143
+ def generate_filename(sequence, ext="png"):
144
+ return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}"
145
 
146
  def pdf_url_to_filename(url):
147
+ return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf"
148
 
149
  def get_download_link(file_path, mime_type="application/pdf", label="Download"):
150
+ return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>'
151
 
152
+ def zip_directory(directory_path, zip_path):
153
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
154
  [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
155
+ for root, _, files in os.walk(directory_path) for file in files]
156
 
157
  def get_model_files(model_type="causal_lm"):
158
+ return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"]
159
 
160
  def get_gallery_files(file_types=["png", "pdf"]):
161
+ return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))
162
 
163
  def get_pdf_files():
164
+ return sorted(glob.glob("*.pdf"))
165
 
 
166
  def download_pdf(url, output_path):
167
+ try:
168
  response = requests.get(url, stream=True, timeout=10)
169
  if response.status_code == 200:
170
  with open(output_path, "wb") as f:
 
173
  ret = True
174
  else:
175
  ret = False
176
+ except requests.RequestException as e:
177
  logger.error(f"Failed to download {url}: {e}")
178
  ret = False
179
+ return ret
180
 
181
+ # Async PDF Snapshot: Snap your PDF pages without blocking.
182
+ async def process_pdf_snapshot(pdf_path, mode="single"):
183
  start_time = time.time()
184
  status = st.empty()
185
  status.text(f"Processing PDF Snapshot ({mode})... (0s)")
 
209
  doc.close()
210
  elapsed = int(time.time() - start_time)
211
  status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
 
212
  return output_files
213
  except Exception as e:
214
  status.error(f"Failed to process PDF: {str(e)}")
215
  return []
216
 
217
+ # Async OCR: Convert images to text.
218
+ async def process_ocr(image, output_file):
219
  start_time = time.time()
220
  status = st.empty()
221
  status.text("Processing GOT-OCR2_0... (0s)")
 
229
  status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
230
  async with aiofiles.open(output_file, "w") as f:
231
  await f.write(result)
 
232
  return result
233
 
234
+ # Async Image Gen: Your image genie.
235
+ async def process_image_gen(prompt, output_file):
236
  start_time = time.time()
237
  status = st.empty()
238
  status.text("Processing Image Gen... (0s)")
239
+ pipeline = (st.session_state['builder'].pipeline
240
+ if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder)
241
+ and st.session_state['builder'].pipeline
242
+ else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu"))
243
  gen_image = pipeline(prompt, num_inference_steps=20).images[0]
244
  elapsed = int(time.time() - start_time)
245
  status.text(f"Image Gen completed in {elapsed}s!")
246
  gen_image.save(output_file)
 
247
  return gen_image
248
 
249
+ # GPT-Image Interpreter: Turning pixels into prose!
250
+ def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
251
  buffered = BytesIO()
252
+ image.save(buffered, format="PNG")
253
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
254
+ messages = [{
255
+ "role": "user",
256
+ "content": [
257
+ {"type": "text", "text": prompt},
258
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
259
+ ]
260
+ }]
261
  try:
262
  response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
263
  return response.choices[0].message.content
264
  except Exception as e:
265
  return f"Error processing image with GPT: {str(e)}"
266
 
267
+ # GPT-Text Alchemist: Merging prompt and text.
268
+ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
269
  messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
270
+ try:
271
  response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
272
  return response.choices[0].message.content
273
  except Exception as e:
274
  return f"Error processing text with GPT: {str(e)}"
275
 
276
+ # ----------------- SIDEBAR UPDATES -----------------
 
 
277
 
278
+ # Sidebar: Gallery Settings
279
+ st.sidebar.subheader("Gallery Settings")
280
+ st.session_state.setdefault('gallery_size', 2)
281
+ st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
282
+
283
+ # ----------------- TAB SETUP -----------------
284
+ tabs = st.tabs([
285
+ "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
286
+ "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
287
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
289
 
290
+ # ----------------- TAB: Camera Snap -----------------
291
  with tab_camera:
292
+ st.header("Camera Snap 📷")
293
+ st.subheader("Single Capture")
294
  cols = st.columns(2)
 
295
  with cols[0]:
296
  cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
297
  if cam0_img:
 
302
  f.write(cam0_img.getvalue())
303
  st.session_state['cam0_file'] = filename
304
  entry = f"Snapshot from Cam 0: {filename}"
305
+ st.session_state['history'].append(entry)
 
306
  st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
307
  logger.info(f"Saved snapshot from Camera 0: {filename}")
 
 
308
  with cols[1]:
309
  cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
310
  if cam1_img:
 
315
  f.write(cam1_img.getvalue())
316
  st.session_state['cam1_file'] = filename
317
  entry = f"Snapshot from Cam 1: {filename}"
318
+ st.session_state['history'].append(entry)
 
319
  st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
320
  logger.info(f"Saved snapshot from Camera 1: {filename}")
 
321
 
322
+ # ----------------- TAB: Download PDFs -----------------
323
  with tab_download:
324
  st.header("Download PDFs 📥")
325
  if st.button("Examples 📚"):
 
338
  "https://arxiv.org/pdf/2106.10504"
339
  ]
340
  st.session_state['pdf_urls'] = "\n".join(example_urls)
 
341
  url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
342
  if st.button("Robo-Download 🤖"):
343
  urls = url_input.strip().split("\n")
 
354
  st.session_state['downloaded_pdfs'][url] = output_path
355
  logger.info(f"Downloaded PDF from {url} to {output_path}")
356
  entry = f"Downloaded PDF: {output_path}"
357
+ st.session_state['history'].append(entry)
 
358
  st.session_state['asset_checkboxes'][output_path] = True
359
  else:
360
  st.error(f"Failed to nab {url} 😿")
 
363
  st.session_state['downloaded_pdfs'][url] = output_path
364
  progress_bar.progress((idx + 1) / total_urls)
365
  status_text.text("Robo-Download complete! 🚀")
 
 
366
  mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
367
  if st.button("Snapshot Selected 📸"):
368
  selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
 
371
  if not os.path.exists(pdf_path):
372
  st.warning(f"File not found: {pdf_path}. Skipping.")
373
  continue
374
+ mode_key = {"Single Page (High-Res)": "single",
375
+ "Two Pages (High-Res)": "twopage",
376
+ "All Pages (High-Res)": "allpages"}[mode]
377
  snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
378
  for snapshot in snapshots:
379
  st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
380
  st.session_state['asset_checkboxes'][snapshot] = True
381
+ # No update_gallery() call here; will update once later.
382
  else:
383
  st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
384
 
385
+ # ----------------- TAB: Test OCR -----------------
386
  with tab_ocr:
387
  st.header("Test OCR 🔍")
388
  all_files = get_gallery_files()
 
401
  result = asyncio.run(process_ocr(image, output_file))
402
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
403
  entry = f"OCR Test: {file} -> {output_file}"
404
+ st.session_state['history'].append(entry)
 
405
  md_output_file = f"full_ocr_{int(time.time())}.md"
406
  with open(md_output_file, "w") as f:
407
  f.write(full_text)
 
422
  st.session_state['processing']['ocr'] = True
423
  result = asyncio.run(process_ocr(image, output_file))
424
  entry = f"OCR Test: {selected_file} -> {output_file}"
425
+ st.session_state['history'].append(entry)
 
426
  st.text_area("OCR Result", result, height=200, key="ocr_result")
427
  st.success(f"OCR output saved to {output_file}")
428
  st.session_state['processing']['ocr'] = False
 
436
  result = asyncio.run(process_ocr(image, output_file))
437
  full_text += f"## Page {i + 1}\n\n{result}\n\n"
438
  entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
439
+ st.session_state['history'].append(entry)
 
440
  md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
441
  with open(md_output_file, "w") as f:
442
  f.write(full_text)
 
445
  else:
446
  st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
447
 
448
+ # ----------------- TAB: Build Titan -----------------
449
  with tab_build:
450
  st.header("Build Titan 🌱")
451
  model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
452
  base_model = st.selectbox(
453
  "Select Tiny Model",
454
+ ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
455
  else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
456
  )
457
  model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
 
461
  name=model_name, base_model=base_model, size="small", domain=domain
462
  )
463
  builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
464
+ builder.load_model(base_model, config)
465
+ builder.save_model(config.model_path)
466
+ st.session_state['builder'] = builder
467
+ st.session_state['model_loaded'] = True
468
+ st.session_state['selected_model_type'] = model_type
469
+ st.session_state['selected_model'] = config.model_path
470
  entry = f"Built {model_type} model: {model_name}"
471
+ st.session_state['history'].append(entry)
472
+ st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
473
+ st.experimental_rerun()
474
 
475
+ # ----------------- TAB: Test Image Gen -----------------
476
  with tab_imggen:
477
  st.header("Test Image Gen 🎨")
478
  all_files = get_gallery_files()
 
493
  st.session_state['processing']['gen'] = True
494
  result = asyncio.run(process_image_gen(prompt, output_file))
495
  entry = f"Image Gen Test: {prompt} -> {output_file}"
496
+ st.session_state['history'].append(entry)
 
497
  st.image(result, caption="Generated Image", use_container_width=True)
498
  st.success(f"Image saved to {output_file}")
499
  st.session_state['processing']['gen'] = False
500
  else:
501
  st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
 
502
 
503
+ # ----------------- TAB: PDF Process -----------------
504
  with tab_pdf_process:
505
  st.header("PDF Process")
506
  st.subheader("Upload PDFs for GPT-based text extraction")
 
559
  st.success(f"PDF processing complete. MD file saved as {output_filename}")
560
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
561
 
562
+ # ----------------- TAB: Image Process -----------------
563
  with tab_image_process:
564
  st.header("Image Process")
565
  st.subheader("Upload Images for GPT-based OCR")
 
584
  st.success(f"Image processing complete. MD file saved as {output_filename}")
585
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
586
 
587
+ # ----------------- TAB: MD Gallery -----------------
588
  with tab_md_gallery:
589
  st.header("MD Gallery and GPT Processing")
590
  gpt_models = ["gpt-4o", "gpt-4o-mini"]
 
636
  st.warning("No MD files selected.")
637
  else:
638
  st.warning("No MD files found.")
639
+
640
+ # ----------------- FINAL SIDEBAR UPDATE -----------------
641
+ # Update the asset gallery once (using its container).
642
+ def update_gallery():
643
+ container = st.session_state['asset_gallery_container']
644
+ container.empty() # Clear previous gallery content.
645
+ all_files = get_gallery_files()
646
+ if all_files:
647
+ container.markdown("### Asset Gallery 📸📖")
648
+ cols = container.columns(2)
649
+ for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
650
+ with cols[idx % 2]:
651
+ st.session_state['unique_counter'] += 1
652
+ unique_id = st.session_state['unique_counter']
653
+ if file.endswith('.png'):
654
+ st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
655
+ else:
656
+ doc = fitz.open(file)
657
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
658
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
659
+ st.image(img, caption=os.path.basename(file), use_container_width=True)
660
+ doc.close()
661
+ checkbox_key = f"asset_{file}_{unique_id}"
662
+ st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
663
+ mime_type = "image/png" if file.endswith('.png') else "application/pdf"
664
+ st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
665
+ if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
666
+ os.remove(file)
667
+ st.session_state['asset_checkboxes'].pop(file, None)
668
+ st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
669
+ st.experimental_rerun()
670
+
671
+ # Call the gallery update once after all tabs have been processed.
672
+ update_gallery()
673
+
674
+ # Finally, update the Action Logs and History in the sidebar.
675
+ st.sidebar.subheader("Action Logs 📜")
676
+ for record in log_records:
677
+ st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
678
+
679
+ st.sidebar.subheader("History 📜")
680
+ for entry in st.session_state.get("history", []):
681
+ if entry is not None:
682
+ st.sidebar.write(entry)