Spaces:

awacke1
/

TorchTransformers-CV-SFT

Running

App Files Files Community

awacke1 commited on Mar 21

Commit

01d7524

verified ·

1 Parent(s): 292333e

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -21

app.py CHANGED Viewed

@@ -5,12 +5,11 @@ import base64
 import streamlit as st
 import pandas as pd
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from torch.utils.data import Dataset, DataLoader
 import csv
 import time
 from dataclasses import dataclass
-from typing import Optional, Tuple
 import zipfile
 import math
 from PIL import Image
@@ -18,7 +17,7 @@ import random
 import logging
 import numpy as np
 import cv2
-from diffusers import DiffusionPipeline  # For FLUX.1 and LDM
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
@@ -42,7 +41,6 @@ st.set_page_config(
     }
 )
-# Session State Setup 🌍 - Persistent playground for our tiny titans!
 if 'captured_images' not in st.session_state:
     st.session_state['captured_images'] = []
 if 'cv_builder' not in st.session_state:
@@ -62,7 +60,6 @@ class DiffusionConfig:
     def model_path(self):
         return f"diffusion_models/{self.name}"
-# Datasets 🎲 - Feeding our titans with pixel snacks and text treats!
 class DiffusionDataset(Dataset):
     """Pixel party platter 🍕 - Images and text for diffusion delight!"""
     def __init__(self, images, texts):
@@ -82,8 +79,7 @@ class MicroDiffusionBuilder:
     def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
         try:
             with st.spinner(f"Loading {model_path}... ⏳ (Tiny titan powering up!)"):
-                # Micro Diffusion isn’t on HF yet; use a small U-Net placeholder from diffusers
-                self.pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", custom_pipeline="small_diffusion")
                 self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
                 if config:
                     self.config = config
@@ -154,7 +150,7 @@ class LatentDiffusionBuilder:
     def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
         try:
             with st.spinner(f"Loading {model_path}... ⏳ (Latent titan rising!)"):
-                self.pipeline = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
                 self.pipeline.unet = torch.nn.Sequential(*list(self.pipeline.unet.children())[:2])  # Scale down U-Net
                 self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
                 if config:
@@ -226,7 +222,7 @@ class FluxDiffusionBuilder:
     def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
         try:
             with st.spinner(f"Loading {model_path}... ⏳ (Flux titan charging!)"):
-                self.pipeline = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
                 self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
                 if config:
                     self.config = config
@@ -348,11 +344,11 @@ def update_gallery():
                 st.markdown(get_download_link(file, "image/png", "Download Snap 📸"), unsafe_allow_html=True)
 def get_available_video_devices():
-    """Camera roll call 🎥 - Who’s ready to shine?"""
-    video_devices = [f"Camera {i} 🎥" for i in range(6)]  # 6 cams as per your setup
     try:
         detected = []
-        for i in range(10):
             cap = cv2.VideoCapture(i, cv2.CAP_V4L2)
             if not cap.isOpened():
                 cap = cv2.VideoCapture(i)
@@ -361,9 +357,11 @@ def get_available_video_devices():
                 logger.info(f"Detected camera at index {i}")
                 cap.release()
         if detected:
-            video_devices = detected[:6]  # Cap at 6
     except Exception as e:
-        logger.error(f"Error detecting cameras: {str(e)}")
     return video_devices
 st.title("SFT Tiny Titans 🚀 (Small Diffusion Delight!)")
@@ -445,7 +443,7 @@ with tab1:
     st.header("Build Titan 🌱")
     model_type = st.selectbox("Diffusion Type", ["Micro Diffusion", "Latent Diffusion", "FLUX.1 Distilled"], key="build_type")
     base_model = st.selectbox("Select Tiny Model",
-        ["sony/micro-diffusion" if model_type == "Micro Diffusion" else "runwayml/stable-diffusion-v1-5" if model_type == "Latent Diffusion" else "black-forest-labs/flux.1-distilled"])
     model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
     if st.button("Download Model ⬇️"):
         config = DiffusionConfig(name=model_name, base_model=base_model, size="small")
@@ -469,7 +467,6 @@ with tab2:
     st.write(f"🎉 Detected Cameras: {', '.join(video_devices)}")
     st.info("Switch cams in your browser settings (e.g., Chrome > Privacy > Camera) since I’m a browser star! 🌟")
-    # Camera 0 Settings
     st.subheader("Camera 0 🎬 - Lights, Camera, Action!")
     cam0_cols = st.columns(4)
     with cam0_cols[0]:
@@ -481,7 +478,6 @@ with tab2:
     with cam0_cols[3]:
         cam0_vis = st.selectbox("Show 🖼️", ["visible", "hidden", "collapsed"], index=0, key="cam0_vis", help="Label vibes: Visible, Sneaky, or Gone!")
-    # Camera 1 Settings
     st.subheader("Camera 1 🎥 - Roll the Film!")
     cam1_cols = st.columns(4)
     with cam1_cols[0]:
@@ -493,7 +489,6 @@ with tab2:
     with cam1_cols[3]:
         cam1_vis = st.selectbox("Show 🖼️", ["visible", "hidden", "collapsed"], index=0, key="cam1_vis", help="Label style: Show it, Hide it, Poof!")
-    # Capture Widgets
     cols = st.columns(2)
     with cols[0]:
         st.subheader(f"Camera 0 ({cam0_device}) 🎬")
@@ -539,7 +534,6 @@ with tab3:
     else:
         captured_images = get_gallery_files(["png"])
         if len(captured_images) >= 2:
-            # Use Case 1: Denoising (Micro Diffusion)
             st.subheader("Use Case 1: Denoise Snapshots 🌟")
             denoising_data = [{"image": img, "text": f"Denoised {os.path.basename(img).split('-')[4]} snap"} for img in captured_images[:min(len(captured_images), 10)]]
             denoising_edited = st.data_editor(pd.DataFrame(denoising_data), num_rows="dynamic", help="Craft denoising pairs! 🌟")
@@ -564,7 +558,6 @@ with tab3:
                     writer.writerow([row["image"], row["text"]])
             st.markdown(get_download_link(denoising_csv, "text/csv", "Download Denoising CSV 📜"), unsafe_allow_html=True)
-            # Use Case 2: Stylization (Latent Diffusion)
             st.subheader("Use Case 2: Stylize Snapshots 🎨")
             stylize_data = [{"image": img, "text": f"Neon {os.path.basename(img).split('-')[4]} style"} for img in captured_images[:min(len(captured_images), 10)]]
             stylize_edited = st.data_editor(pd.DataFrame(stylize_data), num_rows="dynamic", help="Craft stylized pairs! 🎨")
@@ -588,7 +581,6 @@ with tab3:
                     f.write(f"- `{row['image']}`: {row['text']}\n")
             st.markdown(get_download_link(stylize_md, "text/markdown", "Download Stylization MD 📝"), unsafe_allow_html=True)
-            # Use Case 3: Multi-Angle Generation (FLUX.1)
             st.subheader("Use Case 3: Multi-Angle Snapshots 🌐")
             multiangle_data = [{"image": img, "text": f"View from {os.path.basename(img).split('-')[4]}"} for img in captured_images[:min(len(captured_images), 10)]]
             multiangle_edited = st.data_editor(pd.DataFrame(multiangle_data), num_rows="dynamic", help="Craft multi-angle pairs! 🌐")

 import streamlit as st
 import pandas as pd
 import torch
 from torch.utils.data import Dataset, DataLoader
 import csv
 import time
 from dataclasses import dataclass
+from typing import Optional
 import zipfile
 import math
 from PIL import Image
 import logging
 import numpy as np
 import cv2
+from diffusers import DiffusionPipeline
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
     }
 )
 if 'captured_images' not in st.session_state:
     st.session_state['captured_images'] = []
 if 'cv_builder' not in st.session_state:
     def model_path(self):
         return f"diffusion_models/{self.name}"
 class DiffusionDataset(Dataset):
     """Pixel party platter 🍕 - Images and text for diffusion delight!"""
     def __init__(self, images, texts):
     def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
         try:
             with st.spinner(f"Loading {model_path}... ⏳ (Tiny titan powering up!)"):
+                self.pipeline = DiffusionPipeline.from_pretrained(model_path, low_cpu_mem_usage=True)
                 self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
                 if config:
                     self.config = config
     def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
         try:
             with st.spinner(f"Loading {model_path}... ⏳ (Latent titan rising!)"):
+                self.pipeline = DiffusionPipeline.from_pretrained(model_path, low_cpu_mem_usage=True)
                 self.pipeline.unet = torch.nn.Sequential(*list(self.pipeline.unet.children())[:2])  # Scale down U-Net
                 self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
                 if config:
     def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
         try:
             with st.spinner(f"Loading {model_path}... ⏳ (Flux titan charging!)"):
+                self.pipeline = DiffusionPipeline.from_pretrained(model_path, low_cpu_mem_usage=True)
                 self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
                 if config:
                     self.config = config
                 st.markdown(get_download_link(file, "image/png", "Download Snap 📸"), unsafe_allow_html=True)
 def get_available_video_devices():
+    """Camera roll call 🎥 - Who’s ready to shine? Fallback if OpenCV flops!"""
+    video_devices = [f"Camera {i} 🎥" for i in range(6)]  # Default to 6 cams
     try:
         detected = []
+        for i in range(6):  # Limit to 6 as per your setup
             cap = cv2.VideoCapture(i, cv2.CAP_V4L2)
             if not cap.isOpened():
                 cap = cv2.VideoCapture(i)
                 logger.info(f"Detected camera at index {i}")
                 cap.release()
         if detected:
+            video_devices = detected
+        else:
+            logger.warning("No cameras detected by OpenCV; using defaults")
     except Exception as e:
+        logger.error(f"Error detecting cameras: {str(e)} - Falling back to defaults")
     return video_devices
 st.title("SFT Tiny Titans 🚀 (Small Diffusion Delight!)")
     st.header("Build Titan 🌱")
     model_type = st.selectbox("Diffusion Type", ["Micro Diffusion", "Latent Diffusion", "FLUX.1 Distilled"], key="build_type")
     base_model = st.selectbox("Select Tiny Model",
+        ["CompVis/ldm-text2im-large-256" if model_type == "Micro Diffusion" else "runwayml/stable-diffusion-v1-5" if model_type == "Latent Diffusion" else "black-forest-labs/flux.1-distilled"])
     model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
     if st.button("Download Model ⬇️"):
         config = DiffusionConfig(name=model_name, base_model=base_model, size="small")
     st.write(f"🎉 Detected Cameras: {', '.join(video_devices)}")
     st.info("Switch cams in your browser settings (e.g., Chrome > Privacy > Camera) since I’m a browser star! 🌟")
     st.subheader("Camera 0 🎬 - Lights, Camera, Action!")
     cam0_cols = st.columns(4)
     with cam0_cols[0]:
     with cam0_cols[3]:
         cam0_vis = st.selectbox("Show 🖼️", ["visible", "hidden", "collapsed"], index=0, key="cam0_vis", help="Label vibes: Visible, Sneaky, or Gone!")
     st.subheader("Camera 1 🎥 - Roll the Film!")
     cam1_cols = st.columns(4)
     with cam1_cols[0]:
     with cam1_cols[3]:
         cam1_vis = st.selectbox("Show 🖼️", ["visible", "hidden", "collapsed"], index=0, key="cam1_vis", help="Label style: Show it, Hide it, Poof!")
     cols = st.columns(2)
     with cols[0]:
         st.subheader(f"Camera 0 ({cam0_device}) 🎬")
     else:
         captured_images = get_gallery_files(["png"])
         if len(captured_images) >= 2:
             st.subheader("Use Case 1: Denoise Snapshots 🌟")
             denoising_data = [{"image": img, "text": f"Denoised {os.path.basename(img).split('-')[4]} snap"} for img in captured_images[:min(len(captured_images), 10)]]
             denoising_edited = st.data_editor(pd.DataFrame(denoising_data), num_rows="dynamic", help="Craft denoising pairs! 🌟")
                     writer.writerow([row["image"], row["text"]])
             st.markdown(get_download_link(denoising_csv, "text/csv", "Download Denoising CSV 📜"), unsafe_allow_html=True)
             st.subheader("Use Case 2: Stylize Snapshots 🎨")
             stylize_data = [{"image": img, "text": f"Neon {os.path.basename(img).split('-')[4]} style"} for img in captured_images[:min(len(captured_images), 10)]]
             stylize_edited = st.data_editor(pd.DataFrame(stylize_data), num_rows="dynamic", help="Craft stylized pairs! 🎨")
                     f.write(f"- `{row['image']}`: {row['text']}\n")
             st.markdown(get_download_link(stylize_md, "text/markdown", "Download Stylization MD 📝"), unsafe_allow_html=True)
             st.subheader("Use Case 3: Multi-Angle Snapshots 🌐")
             multiangle_data = [{"image": img, "text": f"View from {os.path.basename(img).split('-')[4]}"} for img in captured_images[:min(len(captured_images), 10)]]
             multiangle_edited = st.data_editor(pd.DataFrame(multiangle_data), num_rows="dynamic", help="Craft multi-angle pairs! 🌐")