sohamnk commited on
Commit
4b7d3dc
·
verified ·
1 Parent(s): 97c0463

Update pipeline/logic.py

Browse files
Files changed (1) hide show
  1. pipeline/logic.py +19 -11
pipeline/logic.py CHANGED
@@ -24,26 +24,26 @@ def download_image_from_url(image_url: str) -> Image.Image:
24
  print(" [Download] ✅ Image downloaded and standardized to RGB.")
25
  return image_rgb
26
 
27
- # In pipeline/logic.py
28
-
29
  def detect_and_crop(image: Image.Image, object_name: str, models: dict) -> Image.Image:
30
  print(f"\n [Detect & Crop] Starting detection for object: '{object_name}'")
31
  image_np = np.array(image.convert("RGB"))
32
  height, width = image_np.shape[:2]
33
  prompt = [[f"a {object_name}"]]
34
  inputs = models['processor_gnd'](images=image, text=prompt, return_tensors="pt").to(models['device'])
 
35
  with torch.no_grad():
36
- outputs = models['model_gnd'](**inputs)
 
 
 
 
37
 
38
- # THIS IS THE CORRECTED LINE:
39
  results = models['processor_gnd'].post_process_grounded_object_detection(
40
- outputs,
41
- inputs.input_ids,
42
- object_detection_threshold=0.4, # Renamed from box_threshold
43
- text_detection_threshold=0.3, # Renamed from text_threshold
44
  target_sizes=[(height, width)]
45
  )
46
-
47
  if not results or len(results[0]['boxes']) == 0:
48
  print(" [Detect & Crop] ⚠ Warning: Grounding DINO did not detect the object. Using full image.")
49
  return image
@@ -72,7 +72,15 @@ def extract_features(segmented_image: Image.Image) -> dict:
72
  mask = a
73
  gray = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2GRAY)
74
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
75
- hu_moments = cv2.HuMoments(cv2.moments(contours[0])).flatten() if contours else np.zeros(7)
 
 
 
 
 
 
 
 
76
  color_hist = cv2.calcHist([image_rgb], [0, 1, 2], mask, [8, 8, 8], [0, 256, 0, 256, 0, 256])
77
  cv2.normalize(color_hist, color_hist)
78
  color_hist = color_hist.flatten()
@@ -89,7 +97,7 @@ def extract_features(segmented_image: Image.Image) -> dict:
89
 
90
  def get_text_embedding(text: str, models: dict) -> list:
91
  print(f" [Embedding] Generating text embedding for: '{text[:50]}...'")
92
- text_with_instruction = f"Represent this sentence for searching relevant passages: {text}"
93
  inputs = models['tokenizer_text'](text_with_instruction, return_tensors='pt', padding=True, truncation=True, max_length=512).to(models['device'])
94
  with torch.no_grad():
95
  outputs = models['model_text'](**inputs)
 
24
  print(" [Download] ✅ Image downloaded and standardized to RGB.")
25
  return image_rgb
26
 
 
 
27
  def detect_and_crop(image: Image.Image, object_name: str, models: dict) -> Image.Image:
28
  print(f"\n [Detect & Crop] Starting detection for object: '{object_name}'")
29
  image_np = np.array(image.convert("RGB"))
30
  height, width = image_np.shape[:2]
31
  prompt = [[f"a {object_name}"]]
32
  inputs = models['processor_gnd'](images=image, text=prompt, return_tensors="pt").to(models['device'])
33
+
34
  with torch.no_grad():
35
+ outputs = models['model_gnd'](
36
+ **inputs,
37
+ box_threshold=0.4,
38
+ text_threshold=0.3
39
+ )
40
 
 
41
  results = models['processor_gnd'].post_process_grounded_object_detection(
42
+ outputs=outputs,
43
+ input_ids=inputs.input_ids,
 
 
44
  target_sizes=[(height, width)]
45
  )
46
+
47
  if not results or len(results[0]['boxes']) == 0:
48
  print(" [Detect & Crop] ⚠ Warning: Grounding DINO did not detect the object. Using full image.")
49
  return image
 
72
  mask = a
73
  gray = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2GRAY)
74
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
75
+ if not contours:
76
+ # If no contours are found, return zero-filled features
77
+ print(" [Features] ⚠ Warning: No contours found in segmented image. Returning zero features.")
78
+ return {
79
+ "shape_features": [0.0] * 7,
80
+ "color_features": [0.0] * 512, # 8*8*8
81
+ "texture_features": [0.0] * 26
82
+ }
83
+ hu_moments = cv2.HuMoments(cv2.moments(contours[0])).flatten()
84
  color_hist = cv2.calcHist([image_rgb], [0, 1, 2], mask, [8, 8, 8], [0, 256, 0, 256, 0, 256])
85
  cv2.normalize(color_hist, color_hist)
86
  color_hist = color_hist.flatten()
 
97
 
98
  def get_text_embedding(text: str, models: dict) -> list:
99
  print(f" [Embedding] Generating text embedding for: '{text[:50]}...'")
100
+ text_with_instruction = f"Represent this description of a lost item for similarity search: {text}"
101
  inputs = models['tokenizer_text'](text_with_instruction, return_tensors='pt', padding=True, truncation=True, max_length=512).to(models['device'])
102
  with torch.no_grad():
103
  outputs = models['model_text'](**inputs)