Spaces:

ignitariumcloud
/

Gas_Pipe_Quality_Monitor

Runtime error

App Files Files Community

arjunanand13 commited on Jul 14

Commit

22307f0

verified ·

1 Parent(s): a9c397d

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -151

app.py CHANGED Viewed

@@ -1,62 +1,29 @@
-# app.py - Hugging Face Spaces App
-import gradio as gr
 import torch
-import cv2
 import numpy as np
 from PIL import Image
-from transformers import AutoProcessor, LlavaNextForConditionalGeneration, BitsAndBytesConfig
-from peft import PeftModel
-import tempfile
-import os
-# Model configuration
-MODEL_ID = "llava-hf/llava-v1.6-mistral-7b-hf"
-PEFT_MODEL_ID = "arjunanand13/gas_pipe_llava_finetunedv2"
-@torch.no_grad()
-def load_model():
-    """Load the fine-tuned PEFT model"""
-    print("Loading PEFT model...")
-    # Quantization config
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.float16,
-        bnb_4bit_use_double_quant=True,
-        bnb_4bit_quant_storage=torch.uint8
-    )
-    # Load processor
-    processor = AutoProcessor.from_pretrained(PEFT_MODEL_ID)
-    # Load base model
-    base_model = LlavaNextForConditionalGeneration.from_pretrained(
-        MODEL_ID,
-        torch_dtype=torch.float16,
-        quantization_config=bnb_config,
-        device_map="auto",
-        low_cpu_mem_usage=True,
-        trust_remote_code=True
-    )
-    # Load PEFT adapters
-    model = PeftModel.from_pretrained(base_model, PEFT_MODEL_ID)
-    print("Model loaded successfully!")
-    return processor, model
-# Load model once at startup
-processor, model = load_model()
-def preprocess_video(video_path):
-    """Extract 4 frames and create 2x2 grid"""
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError(f"Cannot open video: {video_path}")
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    frame_indices = np.linspace(0, max(0, total_frames - 1), 4, dtype=int)
     frames = []
     for frame_idx in frame_indices:
@@ -64,8 +31,10 @@ def preprocess_video(video_path):
         ret, frame = cap.read()
         if ret:
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame_pil = Image.fromarray(frame_rgb).resize((112, 112))
-            frames.append(frame_pil)
     cap.release()
     while len(frames) < 4:
@@ -74,124 +43,118 @@ def preprocess_video(video_path):
         else:
             frames.append(Image.new('RGB', (112, 112), color='black'))
-    grid_image = Image.new('RGB', (224, 224))
-    positions = [(0, 0), (112, 0), (0, 112), (112, 112)]
-    for i, frame in enumerate(frames[:4]):
-        grid_image.paste(frame, positions[i])
     return grid_image
-def predict_cheating(video_file):
-    """Analyze video for gas pipe testing compliance"""
-    if video_file is None:
-        return "Please upload a video file", None
     try:
-        # Process video
-        grid_image = preprocess_video(video_file)
-        # Use forced JSON prompt
-        prompt = "[INST] <image>\nGas pipe test result? [/INST] {\"cheating\":"
-        inputs = processor(text=prompt, images=grid_image, return_tensors="pt")
-        # Move to device
-        inputs = {k: v.to(model.device) if hasattr(v, 'to') else v for k, v in inputs.items()}
-        # Generate prediction
-        generated_ids = model.generate(
-            **inputs,
-            max_new_tokens=16,
-            do_sample=False,
-            pad_token_id=processor.tokenizer.eos_token_id
-        )
-        # Decode result
-        result = processor.decode(generated_ids[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
-        # Parse result
-        full_result = '{"cheating":' + result
-        if "true" in result.lower():
-            prediction = "CHEATING DETECTED"
-            explanation = "The system detected violations in the testing procedure."
-            status = "🚨"
-        else:
-            prediction = "COMPLIANT PROCEDURE"
-            explanation = "The testing procedure appears to follow proper protocols."
-            status = "✅"
-        formatted_result = f"{status} **{prediction}**\n\n{explanation}\n\nModel output: {full_result}"
-        return formatted_result, grid_image
     except Exception as e:
-        return f"Error processing video: {str(e)}", None
-# Gradio Interface
-with gr.Blocks(
-    title="Gas Pipe Quality Control Detection",
-    theme=gr.themes.Soft(),
-    css="""
-    .gradio-container {
-        max-width: 1200px;
-        margin: auto;
-    }
-    """
-) as demo:
-    gr.HTML("""
-    <div style="text-align: center; margin-bottom: 20px;">
-        <h1>🔧 Gas Pipe Quality Control Detection</h1>
-        <p>AI-powered detection of compliance violations in gas pipe testing procedures</p>
-    </div>
-    """)
-    with gr.Row():
-        with gr.Column(scale=2):
-            video_input = gr.Video(
-                label="Upload Gas Pipe Testing Video",
-                height=320
-            )
-            analyze_btn = gr.Button(
-                "Analyze Video",
-                variant="primary",
-                size="lg"
-            )
-            gr.Markdown("""
-            **Supported formats:** MP4, AVI, MOV, WMV
-            **Max duration:** 60 seconds recommended
-            """)
-        with gr.Column(scale=2):
-            result_text = gr.Textbox(
-                label="Detection Result",
-                lines=8,
-                max_lines=12
-            )
-            processed_image = gr.Image(
-                label="Processed Video Frames (2×2 Grid)",
-                height=320
-            )
-    analyze_btn.click(
-        fn=predict_cheating,
-        inputs=[video_input],
-        outputs=[result_text, processed_image]
-    )
-    gr.HTML("""
-    <div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
-        <p><strong>Model:</strong> Fine-tuned LLaVA v1.6 Mistral 7B with LoRA adapters</p>
-        <p><strong>Repository:</strong> <a href="https://huggingface.co/arjunanand13/gas_pipe_llava_finetunedv2" target="_blank">arjunanand13/gas_pipe_llava_finetunedv2</a></p>
-        <p><strong>Developed by:</strong> Arjun Anand</p>
-    </div>
-    """)
 if __name__ == "__main__":
-    demo.launch()

 import torch
+import gradio as gr
 import numpy as np
+import cv2
 from PIL import Image
+from transformers import BitsAndBytesConfig, LlavaNextForConditionalGeneration, AutoProcessor
+import gc
+MODEL_ID = "arjunanand13/gas_pipe_llava_finetunedv3"
+def clear_memory():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+def extract_frames_from_video(video_path, num_frames=4):
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError(f"Cannot open video: {video_path}")
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    if total_frames < num_frames:
+        num_frames = min(total_frames, num_frames)
+    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
     frames = []
     for frame_idx in frame_indices:
         ret, frame = cap.read()
         if ret:
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            frame_pil = Image.fromarray(frame_rgb)
+            frame_resized = frame_pil.resize((112, 112), Image.Resampling.LANCZOS)
+            frames.append(frame_resized)
     cap.release()
     while len(frames) < 4:
         else:
             frames.append(Image.new('RGB', (112, 112), color='black'))
+    return frames[:4]
+def create_frame_grid(frames, grid_size=(2, 2)):
+    cols, rows = grid_size
+    frame_size = 112
+    grid_width = frame_size * cols
+    grid_height = frame_size * rows
+    grid_image = Image.new('RGB', (grid_width, grid_height))
+    for i, frame in enumerate(frames):
+        row = i // cols
+        col = i % cols
+        x = col * frame_size
+        y = row * frame_size
+        grid_image.paste(frame, (x, y))
     return grid_image
+@torch.no_grad()
+def load_model():
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_storage=torch.uint8
+    )
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    processor.tokenizer.padding_side = "right"
+    processor.tokenizer.pad_token = processor.tokenizer.eos_token
+    model = LlavaNextForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        torch_dtype=torch.float16,
+        quantization_config=bnb_config,
+        device_map="auto",
+        low_cpu_mem_usage=True,
+        trust_remote_code=True
+    )
+    model.config.use_cache = False
+    model.eval()
+    return model, processor
+model, processor = load_model()
+def predict_gas_pipe_quality(video_path):
     try:
+        frames = extract_frames_from_video(video_path, num_frames=4)
+        grid_image = create_frame_grid(frames, grid_size=(2, 2))
+        prompt = "[INST] <image>\nGas pipe test result? [/INST]"
+        inputs = processor(text=prompt, images=grid_image, return_tensors="pt")
+        if torch.cuda.is_available():
+            inputs = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
+        with torch.no_grad():
+            generated_ids = model.generate(
+                input_ids=inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                pixel_values=inputs["pixel_values"],
+                image_sizes=inputs["image_sizes"],
+                max_new_tokens=16,
+                do_sample=False,
+                pad_token_id=processor.tokenizer.eos_token_id
+            )
+        prediction = processor.batch_decode(
+            generated_ids[:, inputs["input_ids"].size(1):],
+            skip_special_tokens=True
+        )[0].strip()
+        clear_memory()
+        return grid_image, prediction
     except Exception as e:
+        clear_memory()
+        return None, f"Error: {str(e)}"
+def create_interface():
+    with gr.Blocks(title="Gas Pipe Quality Control") as iface:
+        gr.Markdown("# Gas Pipe Quality Control")
+        with gr.Row():
+            with gr.Column():
+                video_input = gr.Video(label="Upload Video")
+                analyze_btn = gr.Button("Analyze", variant="primary")
+            with gr.Column():
+                frame_grid = gr.Image(label="Extracted Frames")
+                result_output = gr.Textbox(label="Model Output", lines=3)
+        analyze_btn.click(
+            fn=predict_gas_pipe_quality,
+            inputs=video_input,
+            outputs=[frame_grid, result_output]
+        )
+        video_input.change(
+            fn=predict_gas_pipe_quality,
+            inputs=video_input,
+            outputs=[frame_grid, result_output]
+        )
+    return iface
 if __name__ == "__main__":
+    iface = create_interface()
+    iface.launch(share=True)