Spaces:

awacke1
/

TorchTransformers-CV-SFT

Running

App Files Files Community

awacke1 commited on Mar 26

Commit

4301b1c

verified ·

1 Parent(s): a57b24e

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -146

app.py CHANGED Viewed

@@ -53,23 +53,24 @@ st.set_page_config(
     }
 )
-st.session_state.setdefault('history', [])  # 🌱 History: starting fresh if empty!
-st.session_state.setdefault('builder', None)  # 🛠️ Builder: set up the builder if it's missing!
-st.session_state.setdefault('model_loaded', False)  # 🚦 Model Loaded: mark as not loaded by default!
-st.session_state.setdefault('processing', {})  # ⏳ Processing: initialize processing state as an empty dict!
-st.session_state.setdefault('asset_checkboxes', {})  # ✅ Asset Checkboxes: default to an empty dictionary!
-st.session_state.setdefault('downloaded_pdfs', {})  # 📄 Downloaded PDFs: start with no PDFs downloaded!
-st.session_state.setdefault('unique_counter', 0)  # 🔢 Unique Counter: initialize the counter to zero!
-st.session_state.setdefault('selected_model_type', "Causal LM")  # 🧠 Selected Model Type: default to "Causal LM"!
-st.session_state.setdefault('selected_model', "None")  # 🤖 Selected Model: set to "None" if not already set!
-st.session_state.setdefault('cam0_file', None)  # 📸 Cam0 File: no file loaded by default!
-st.session_state.setdefault('cam1_file', None)  # 📸 Cam1 File: no file loaded by default!
-# Create a single placeholder for the asset gallery in the sidebar.
 if 'asset_gallery_container' not in st.session_state:
     st.session_state['asset_gallery_container'] = st.sidebar.empty()
-@dataclass  # 🎨 ModelConfig: A blueprint for model configurations!
 class ModelConfig:
     name: str
     base_model: str
@@ -77,46 +78,48 @@ class ModelConfig:
     domain: Optional[str] = None
     model_type: str = "causal_lm"
     @property
-    def model_path(self): return f"models/{self.name}"  # 🚀 Model Path: Home base for brilliance!
-@dataclass  # 🎨 DiffusionConfig: Where diffusion magic takes shape!
 class DiffusionConfig:
     name: str
     base_model: str
     size: str
     domain: Optional[str] = None
     @property
-    def model_path(self): return f"diffusion_models/{self.name}"  # 🚀 Diffusion Path: Let the diffusion begin!
-class ModelBuilder:  # 🔧 ModelBuilder: Crafting AI wonders with wit!
-    def __init__(self):  # 🚀 Initialize: Setting up the AI factory!
-        self.config = None  # No config yet—waiting for genius!
-        self.model = None  # Model not built until the magic happens!
-        self.tokenizer = None  # Tokenizer: Ready to speak in AI!
-        self.jokes = [  # 🤣 Jokes to keep the circuits laughing!
             "Why did the AI go to therapy? Too many layers to unpack! 😂",
             "Training complete! Time for a binary coffee break. ☕",
             "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
             "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
             "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
         ]
-    def load_model(self, model_path: str, config: Optional[ModelConfig] = None):  # 🔄 load_model: Booting up genius!
-        with st.spinner(f"Loading {model_path}... ⏳"):  # ⏳ Spinner: Genius loading...
             self.model = AutoModelForCausalLM.from_pretrained(model_path)
             self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-            if self.tokenizer.pad_token is None:
-                self.tokenizer.pad_token = self.tokenizer.eos_token  # 🔧 Fix pad token if missing!
-            if config:
-                self.config = config  # 🛠️ Config loaded—setting the stage!
-            self.model.to("cuda" if torch.cuda.is_available() else "cpu")  # 💻 Deploying the model to its device!
-        st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")  # 🎉 Success: Model is now in orbit!
         return self
-    def save_model(self, path: str):  # 💾 save_model: Securing your masterpiece!
-        with st.spinner("Saving model... 💾"):  # ⏳ Spinner: Saving brilliance...
             os.makedirs(os.path.dirname(path), exist_ok=True)
             self.model.save_pretrained(path)
-            self.tokenizer.save_pretrained(path)  # 📂 Directory magic: Creating and saving!
-        st.success(f"Model saved at {path}! ✅")  # ✅ Success: Your model is safely stored!
 class DiffusionBuilder:
     def __init__(self):
@@ -137,32 +140,31 @@ class DiffusionBuilder:
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
-def generate_filename(sequence, ext="png"):
-    return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}"  # ⏳ Generate filename with timestamp magic!
 def pdf_url_to_filename(url):
-    return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf"  # 📄 Convert URL to a safe PDF filename – no hackers allowed!
 def get_download_link(file_path, mime_type="application/pdf", label="Download"):
-    return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>'  # 🔗 Create a download link – click it like it's hot!
-def zip_directory(directory_path, zip_path):
     with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
         [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
-         for root, _, files in os.walk(directory_path) for file in files]  # 🎁 Zip directory: Packing files faster than Santa on Christmas Eve!
 def get_model_files(model_type="causal_lm"):
-    return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"]  # 📂 Get model files: Hunting directories like a pro!
 def get_gallery_files(file_types=["png", "pdf"]):
-    return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))  # 🖼️ Get gallery files: Finding art in a digital haystack!
 def get_pdf_files():
-    return sorted(glob.glob("*.pdf"))  # 📄 Get PDF files: Sorted and served – no paper cuts here!
-# 📥 Download PDF: Delivering docs faster than a caffeinated courier!
 def download_pdf(url, output_path):
-    try:
         response = requests.get(url, stream=True, timeout=10)
         if response.status_code == 200:
             with open(output_path, "wb") as f:
@@ -171,13 +173,13 @@ def download_pdf(url, output_path):
             ret = True
         else:
             ret = False
-    except requests.RequestException as e:
         logger.error(f"Failed to download {url}: {e}")
         ret = False
-    return ret
-# 📚 Async PDF Snapshot: Snap your PDF pages without blocking—juggle pages like a ninja! 🥷
-async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
     status.text(f"Processing PDF Snapshot ({mode})... (0s)")
@@ -207,14 +209,13 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
         doc.close()
         elapsed = int(time.time() - start_time)
         status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
-        update_gallery()
         return output_files
     except Exception as e:
         status.error(f"Failed to process PDF: {str(e)}")
         return []
-# 😎 Async OCR: Convert images to text while your app keeps on groovin'—no blocking, just rocking! 🎸
-async def process_ocr(image, output_file):
     start_time = time.time()
     status = st.empty()
     status.text("Processing GOT-OCR2_0... (0s)")
@@ -228,97 +229,69 @@ async def process_ocr(image, output_file):
     status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
         await f.write(result)
-    update_gallery()
     return result
-# 🧞 Async Image Gen: Your image genie—wishing up pictures while the event loop keeps the party going! 🎉
-async def process_image_gen(prompt, output_file):
     start_time = time.time()
     status = st.empty()
     status.text("Processing Image Gen... (0s)")
-    pipeline = st.session_state['builder'].pipeline if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder) and st.session_state['builder'].pipeline else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
     gen_image = pipeline(prompt, num_inference_steps=20).images[0]
     elapsed = int(time.time() - start_time)
     status.text(f"Image Gen completed in {elapsed}s!")
     gen_image.save(output_file)
-    update_gallery()
     return gen_image
-# 🖼️ GPT-Image Interpreter: Turning pixels into prose!
-def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
     buffered = BytesIO()
-    image.save(buffered, format="PNG")  # 💾 Save the image in-memory as PNG—no hard drives harmed!
-    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")  # 🔐 Encode image data in Base64 for secure, inline transmission!
-    messages = [{"role": "user", "content": [
-                    {"type": "text", "text": prompt},
-                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
-                ]}]
     try:
         response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
         return response.choices[0].message.content
     except Exception as e:
         return f"Error processing image with GPT: {str(e)}"
-# 📝 GPT-Text Alchemist: Merging your prompt and text into digital gold!
-def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
     messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
-    try:
         response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
         return response.choices[0].message.content
     except Exception as e:
         return f"Error processing text with GPT: {str(e)}"
-st.sidebar.subheader("Gallery Settings")  # 🎨 Sidebar Gallery: Customize your creative space!
-st.session_state.setdefault('gallery_size', 2)  # 🔧 Setting default gallery size to 2 if it's missing!
-st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")  # 🎚️ Slide to adjust your gallery size and bring balance to your art!
-# 📸 Gallery Updater: Making your assets dazzle and disappear faster than a magician's rabbit! 🐇✨
-def update_gallery():
-    container = st.session_state['asset_gallery_container']
-    container.empty()  # Clear previous gallery content
-    all_files = get_gallery_files()  # 🔍 Grab all gallery files like a digital treasure hunt!
-    if all_files:
-        with container:
-            st.sidebar.subheader("Asset Gallery 📸📖")
-            cols = st.sidebar.columns(2)
-            for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
-                with cols[idx % 2]:
-                    st.session_state['unique_counter'] += 1
-                    unique_id = st.session_state['unique_counter']
-                    if file.endswith('.png'):
-                        st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
-                    else:
-                        doc = fitz.open(file)
-                        pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
-                        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
-                        st.image(img, caption=os.path.basename(file), use_container_width=True)
-                        doc.close()
-                    checkbox_key = f"asset_{file}_{unique_id}"
-                    st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
-                    mime_type = "image/png" if file.endswith('.png') else "application/pdf"
-                    st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
-                    if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
-                        os.remove(file)
-                        st.session_state['asset_checkboxes'].pop(file, None)
-                        st.sidebar.success(f"Asset {os.path.basename(file)} vaporized! 💨")
-                        st.rerun()
-st.sidebar.subheader("Action Logs 📜")  # 📝 Action Logs: Where our system whispers its secrets!
-with st.sidebar:
-    [st.write(f"{record.asctime} - {record.levelname} - {record.message}") for record in log_records]
-st.sidebar.subheader("History 📜")  # 🕰️ History: A walk down memory lane, one log at a time!
-with st.sidebar:
-    [st.write(entry) for entry in st.session_state['history']]
-tabs = st.tabs(["Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱", "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"])
 (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
 with tab_camera:
-    st.header("Camera Snap 📷")  # 🎥 Header: Let’s capture those Kodak moments!
-    st.subheader("Single Capture")  # 📸 Subheader: One snap at a time, no double exposure!
     cols = st.columns(2)
     with cols[0]:
         cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
         if cam0_img:
@@ -329,12 +302,9 @@ with tab_camera:
                 f.write(cam0_img.getvalue())
             st.session_state['cam0_file'] = filename
             entry = f"Snapshot from Cam 0: {filename}"
-            if entry not in st.session_state['history']:
-                st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 0:")] + [entry]
             st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 0: {filename}")
-            update_gallery()
     with cols[1]:
         cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
         if cam1_img:
@@ -345,12 +315,11 @@ with tab_camera:
                 f.write(cam1_img.getvalue())
             st.session_state['cam1_file'] = filename
             entry = f"Snapshot from Cam 1: {filename}"
-            if entry not in st.session_state['history']:
-                st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 1:")] + [entry]
             st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 1: {filename}")
-            update_gallery()
 with tab_download:
     st.header("Download PDFs 📥")
     if st.button("Examples 📚"):
@@ -369,7 +338,6 @@ with tab_download:
             "https://arxiv.org/pdf/2106.10504"
         ]
         st.session_state['pdf_urls'] = "\n".join(example_urls)
     url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
     if st.button("Robo-Download 🤖"):
         urls = url_input.strip().split("\n")
@@ -386,8 +354,7 @@ with tab_download:
                         st.session_state['downloaded_pdfs'][url] = output_path
                         logger.info(f"Downloaded PDF from {url} to {output_path}")
                         entry = f"Downloaded PDF: {output_path}"
-                        if entry not in st.session_state['history']:
-                            st.session_state['history'].append(entry)
                         st.session_state['asset_checkboxes'][output_path] = True
                     else:
                         st.error(f"Failed to nab {url} 😿")
@@ -396,8 +363,6 @@ with tab_download:
                     st.session_state['downloaded_pdfs'][url] = output_path
                 progress_bar.progress((idx + 1) / total_urls)
         status_text.text("Robo-Download complete! 🚀")
-        update_gallery()
     mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
     if st.button("Snapshot Selected 📸"):
         selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
@@ -406,15 +371,18 @@ with tab_download:
                 if not os.path.exists(pdf_path):
                     st.warning(f"File not found: {pdf_path}. Skipping.")
                     continue
-                mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (High-Res)": "allpages"}[mode]
                 snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
                 for snapshot in snapshots:
                     st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
                     st.session_state['asset_checkboxes'][snapshot] = True
-            update_gallery()
         else:
             st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
 with tab_ocr:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
@@ -433,8 +401,7 @@ with tab_ocr:
                 result = asyncio.run(process_ocr(image, output_file))
                 full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
                 entry = f"OCR Test: {file} -> {output_file}"
-                if entry not in st.session_state['history']:
-                    st.session_state['history'].append(entry)
             md_output_file = f"full_ocr_{int(time.time())}.md"
             with open(md_output_file, "w") as f:
                 f.write(full_text)
@@ -455,8 +422,7 @@ with tab_ocr:
                 st.session_state['processing']['ocr'] = True
                 result = asyncio.run(process_ocr(image, output_file))
                 entry = f"OCR Test: {selected_file} -> {output_file}"
-                if entry not in st.session_state['history']:
-                    st.session_state['history'].append(entry)
                 st.text_area("OCR Result", result, height=200, key="ocr_result")
                 st.success(f"OCR output saved to {output_file}")
                 st.session_state['processing']['ocr'] = False
@@ -470,8 +436,7 @@ with tab_ocr:
                     result = asyncio.run(process_ocr(image, output_file))
                     full_text += f"## Page {i + 1}\n\n{result}\n\n"
                     entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
-                    if entry not in st.session_state['history']:
-                        st.session_state['history'].append(entry)
                 md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
                 with open(md_output_file, "w") as f:
                     f.write(full_text)
@@ -480,12 +445,13 @@ with tab_ocr:
     else:
         st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
 with tab_build:
     st.header("Build Titan 🌱")
     model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
     base_model = st.selectbox(
         "Select Tiny Model",
-        ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
         else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
     )
     model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
@@ -495,14 +461,18 @@ with tab_build:
             name=model_name, base_model=base_model, size="small", domain=domain
         )
         builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
-        builder.load_model(base_model, config); builder.save_model(config.model_path)
-        st.session_state['builder'] = builder; st.session_state['model_loaded'] = True
-        st.session_state['selected_model_type'] = model_type; st.session_state['selected_model'] = config.model_path
         entry = f"Built {model_type} model: {model_name}"
-        if entry not in st.session_state['history']:
-            st.session_state['history'].append(entry)
-        st.success(f"Model downloaded and saved to {config.model_path}! 🎉"); st.rerun()
 with tab_imggen:
     st.header("Test Image Gen 🎨")
     all_files = get_gallery_files()
@@ -523,15 +493,14 @@ with tab_imggen:
                 st.session_state['processing']['gen'] = True
                 result = asyncio.run(process_image_gen(prompt, output_file))
                 entry = f"Image Gen Test: {prompt} -> {output_file}"
-                if entry not in st.session_state['history']:
-                    st.session_state['history'].append(entry)
                 st.image(result, caption="Generated Image", use_container_width=True)
                 st.success(f"Image saved to {output_file}")
                 st.session_state['processing']['gen'] = False
     else:
         st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
-    update_gallery()
 with tab_pdf_process:
     st.header("PDF Process")
     st.subheader("Upload PDFs for GPT-based text extraction")
@@ -590,6 +559,7 @@ with tab_pdf_process:
         st.success(f"PDF processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
 with tab_image_process:
     st.header("Image Process")
     st.subheader("Upload Images for GPT-based OCR")
@@ -614,6 +584,7 @@ with tab_image_process:
         st.success(f"Image processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
 with tab_md_gallery:
     st.header("MD Gallery and GPT Processing")
     gpt_models = ["gpt-4o", "gpt-4o-mini"]
@@ -665,3 +636,47 @@ with tab_md_gallery:
                 st.warning("No MD files selected.")
     else:
         st.warning("No MD files found.")

     }
 )
+# Set up default session state values.
+st.session_state.setdefault('history', [])             # History: starting fresh if empty!
+st.session_state.setdefault('builder', None)             # Builder: set up if missing.
+st.session_state.setdefault('model_loaded', False)       # Model Loaded: not loaded by default.
+st.session_state.setdefault('processing', {})            # Processing: initialize as an empty dict.
+st.session_state.setdefault('asset_checkboxes', {})      # Asset Checkboxes: default to an empty dict.
+st.session_state.setdefault('downloaded_pdfs', {})       # Downloaded PDFs: start with none.
+st.session_state.setdefault('unique_counter', 0)         # Unique Counter: initialize to zero.
+st.session_state.setdefault('selected_model_type', "Causal LM")
+st.session_state.setdefault('selected_model', "None")
+st.session_state.setdefault('cam0_file', None)
+st.session_state.setdefault('cam1_file', None)
+# Create a single container for the asset gallery in the sidebar.
 if 'asset_gallery_container' not in st.session_state:
     st.session_state['asset_gallery_container'] = st.sidebar.empty()
+@dataclass  # ModelConfig: A blueprint for model configurations.
 class ModelConfig:
     name: str
     base_model: str
     domain: Optional[str] = None
     model_type: str = "causal_lm"
     @property
+    def model_path(self):
+        return f"models/{self.name}"
+@dataclass  # DiffusionConfig: Where diffusion magic takes shape.
 class DiffusionConfig:
     name: str
     base_model: str
     size: str
     domain: Optional[str] = None
     @property
+    def model_path(self):
+        return f"diffusion_models/{self.name}"
+class ModelBuilder:
+    def __init__(self):
+        self.config = None
+        self.model = None
+        self.tokenizer = None
+        self.jokes = [
             "Why did the AI go to therapy? Too many layers to unpack! 😂",
             "Training complete! Time for a binary coffee break. ☕",
             "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
             "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
             "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
         ]
+    def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
+        with st.spinner(f"Loading {model_path}... ⏳"):
             self.model = AutoModelForCausalLM.from_pretrained(model_path)
             self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            if config:
+                self.config = config
+            self.model.to("cuda" if torch.cuda.is_available() else "cpu")
+        st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
         return self
+    def save_model(self, path: str):
+        with st.spinner("Saving model... 💾"):
             os.makedirs(os.path.dirname(path), exist_ok=True)
             self.model.save_pretrained(path)
+            self.tokenizer.save_pretrained(path)
+        st.success(f"Model saved at {path}! ✅")
 class DiffusionBuilder:
     def __init__(self):
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
+def generate_filename(sequence, ext="png"):
+    return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}"
 def pdf_url_to_filename(url):
+    return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf"
 def get_download_link(file_path, mime_type="application/pdf", label="Download"):
+    return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>'
+def zip_directory(directory_path, zip_path):
     with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
         [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
+         for root, _, files in os.walk(directory_path) for file in files]
 def get_model_files(model_type="causal_lm"):
+    return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"]
 def get_gallery_files(file_types=["png", "pdf"]):
+    return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))
 def get_pdf_files():
+    return sorted(glob.glob("*.pdf"))
 def download_pdf(url, output_path):
+    try:
         response = requests.get(url, stream=True, timeout=10)
         if response.status_code == 200:
             with open(output_path, "wb") as f:
             ret = True
         else:
             ret = False
+    except requests.RequestException as e:
         logger.error(f"Failed to download {url}: {e}")
         ret = False
+    return ret
+# Async PDF Snapshot: Snap your PDF pages without blocking.
+async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
     status.text(f"Processing PDF Snapshot ({mode})... (0s)")
         doc.close()
         elapsed = int(time.time() - start_time)
         status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
         return output_files
     except Exception as e:
         status.error(f"Failed to process PDF: {str(e)}")
         return []
+# Async OCR: Convert images to text.
+async def process_ocr(image, output_file):
     start_time = time.time()
     status = st.empty()
     status.text("Processing GOT-OCR2_0... (0s)")
     status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
         await f.write(result)
     return result
+# Async Image Gen: Your image genie.
+async def process_image_gen(prompt, output_file):
     start_time = time.time()
     status = st.empty()
     status.text("Processing Image Gen... (0s)")
+    pipeline = (st.session_state['builder'].pipeline
+                if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder)
+                and st.session_state['builder'].pipeline
+                else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu"))
     gen_image = pipeline(prompt, num_inference_steps=20).images[0]
     elapsed = int(time.time() - start_time)
     status.text(f"Image Gen completed in {elapsed}s!")
     gen_image.save(output_file)
     return gen_image
+# GPT-Image Interpreter: Turning pixels into prose!
+def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
     buffered = BytesIO()
+    image.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
+        ]
+    }]
     try:
         response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
         return response.choices[0].message.content
     except Exception as e:
         return f"Error processing image with GPT: {str(e)}"
+# GPT-Text Alchemist: Merging prompt and text.
+def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
     messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
+    try:
         response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
         return response.choices[0].message.content
     except Exception as e:
         return f"Error processing text with GPT: {str(e)}"
+# ----------------- SIDEBAR UPDATES -----------------
+# Sidebar: Gallery Settings
+st.sidebar.subheader("Gallery Settings")
+st.session_state.setdefault('gallery_size', 2)
+st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
+# ----------------- TAB SETUP -----------------
+tabs = st.tabs([
+    "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
+    "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
+])
 (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
+# ----------------- TAB: Camera Snap -----------------
 with tab_camera:
+    st.header("Camera Snap 📷")
+    st.subheader("Single Capture")
     cols = st.columns(2)
     with cols[0]:
         cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
         if cam0_img:
                 f.write(cam0_img.getvalue())
             st.session_state['cam0_file'] = filename
             entry = f"Snapshot from Cam 0: {filename}"
+            st.session_state['history'].append(entry)
             st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 0: {filename}")
     with cols[1]:
         cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
         if cam1_img:
                 f.write(cam1_img.getvalue())
             st.session_state['cam1_file'] = filename
             entry = f"Snapshot from Cam 1: {filename}"
+            st.session_state['history'].append(entry)
             st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 1: {filename}")
+# ----------------- TAB: Download PDFs -----------------
 with tab_download:
     st.header("Download PDFs 📥")
     if st.button("Examples 📚"):
             "https://arxiv.org/pdf/2106.10504"
         ]
         st.session_state['pdf_urls'] = "\n".join(example_urls)
     url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
     if st.button("Robo-Download 🤖"):
         urls = url_input.strip().split("\n")
                         st.session_state['downloaded_pdfs'][url] = output_path
                         logger.info(f"Downloaded PDF from {url} to {output_path}")
                         entry = f"Downloaded PDF: {output_path}"
+                        st.session_state['history'].append(entry)
                         st.session_state['asset_checkboxes'][output_path] = True
                     else:
                         st.error(f"Failed to nab {url} 😿")
                     st.session_state['downloaded_pdfs'][url] = output_path
                 progress_bar.progress((idx + 1) / total_urls)
         status_text.text("Robo-Download complete! 🚀")
     mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
     if st.button("Snapshot Selected 📸"):
         selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
                 if not os.path.exists(pdf_path):
                     st.warning(f"File not found: {pdf_path}. Skipping.")
                     continue
+                mode_key = {"Single Page (High-Res)": "single",
+                            "Two Pages (High-Res)": "twopage",
+                            "All Pages (High-Res)": "allpages"}[mode]
                 snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
                 for snapshot in snapshots:
                     st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
                     st.session_state['asset_checkboxes'][snapshot] = True
+            # No update_gallery() call here; will update once later.
         else:
             st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
+# ----------------- TAB: Test OCR -----------------
 with tab_ocr:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
                 result = asyncio.run(process_ocr(image, output_file))
                 full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
                 entry = f"OCR Test: {file} -> {output_file}"
+                st.session_state['history'].append(entry)
             md_output_file = f"full_ocr_{int(time.time())}.md"
             with open(md_output_file, "w") as f:
                 f.write(full_text)
                 st.session_state['processing']['ocr'] = True
                 result = asyncio.run(process_ocr(image, output_file))
                 entry = f"OCR Test: {selected_file} -> {output_file}"
+                st.session_state['history'].append(entry)
                 st.text_area("OCR Result", result, height=200, key="ocr_result")
                 st.success(f"OCR output saved to {output_file}")
                 st.session_state['processing']['ocr'] = False
                     result = asyncio.run(process_ocr(image, output_file))
                     full_text += f"## Page {i + 1}\n\n{result}\n\n"
                     entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
+                    st.session_state['history'].append(entry)
                 md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
                 with open(md_output_file, "w") as f:
                     f.write(full_text)
     else:
         st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
+# ----------------- TAB: Build Titan -----------------
 with tab_build:
     st.header("Build Titan 🌱")
     model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
     base_model = st.selectbox(
         "Select Tiny Model",
+        ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
         else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
     )
     model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
             name=model_name, base_model=base_model, size="small", domain=domain
         )
         builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
+        builder.load_model(base_model, config)
+        builder.save_model(config.model_path)
+        st.session_state['builder'] = builder
+        st.session_state['model_loaded'] = True
+        st.session_state['selected_model_type'] = model_type
+        st.session_state['selected_model'] = config.model_path
         entry = f"Built {model_type} model: {model_name}"
+        st.session_state['history'].append(entry)
+        st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
+        st.experimental_rerun()
+# ----------------- TAB: Test Image Gen -----------------
 with tab_imggen:
     st.header("Test Image Gen 🎨")
     all_files = get_gallery_files()
                 st.session_state['processing']['gen'] = True
                 result = asyncio.run(process_image_gen(prompt, output_file))
                 entry = f"Image Gen Test: {prompt} -> {output_file}"
+                st.session_state['history'].append(entry)
                 st.image(result, caption="Generated Image", use_container_width=True)
                 st.success(f"Image saved to {output_file}")
                 st.session_state['processing']['gen'] = False
     else:
         st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
+# ----------------- TAB: PDF Process -----------------
 with tab_pdf_process:
     st.header("PDF Process")
     st.subheader("Upload PDFs for GPT-based text extraction")
         st.success(f"PDF processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
+# ----------------- TAB: Image Process -----------------
 with tab_image_process:
     st.header("Image Process")
     st.subheader("Upload Images for GPT-based OCR")
         st.success(f"Image processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
+# ----------------- TAB: MD Gallery -----------------
 with tab_md_gallery:
     st.header("MD Gallery and GPT Processing")
     gpt_models = ["gpt-4o", "gpt-4o-mini"]
                 st.warning("No MD files selected.")
     else:
         st.warning("No MD files found.")
+# ----------------- FINAL SIDEBAR UPDATE -----------------
+# Update the asset gallery once (using its container).
+def update_gallery():
+    container = st.session_state['asset_gallery_container']
+    container.empty()  # Clear previous gallery content.
+    all_files = get_gallery_files()
+    if all_files:
+        container.markdown("### Asset Gallery 📸📖")
+        cols = container.columns(2)
+        for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
+            with cols[idx % 2]:
+                st.session_state['unique_counter'] += 1
+                unique_id = st.session_state['unique_counter']
+                if file.endswith('.png'):
+                    st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
+                else:
+                    doc = fitz.open(file)
+                    pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
+                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                    st.image(img, caption=os.path.basename(file), use_container_width=True)
+                    doc.close()
+                checkbox_key = f"asset_{file}_{unique_id}"
+                st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
+                mime_type = "image/png" if file.endswith('.png') else "application/pdf"
+                st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
+                if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
+                    os.remove(file)
+                    st.session_state['asset_checkboxes'].pop(file, None)
+                    st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
+                    st.experimental_rerun()
+# Call the gallery update once after all tabs have been processed.
+update_gallery()
+# Finally, update the Action Logs and History in the sidebar.
+st.sidebar.subheader("Action Logs 📜")
+for record in log_records:
+    st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
+st.sidebar.subheader("History 📜")
+for entry in st.session_state.get("history", []):
+    if entry is not None:
+        st.sidebar.write(entry)