Spaces:

sohamnk
/

lost-and-found-ai-pipeline

Running

App Files Files Community

sohamnk commited on 29 days ago

Commit

0d341d8

verified ·

1 Parent(s): 9e8ec58

Update pipeline/routes.py

Browse files

Files changed (1) hide show

pipeline/routes.py +11 -22

pipeline/routes.py CHANGED Viewed

@@ -6,6 +6,9 @@ from flask import request, jsonify
 # Import app, models, and logic functions
 from pipeline import app, models, logic
 @app.route('/process', methods=['POST'])
 def process_item():
     print("\n" + "="*50)
@@ -16,7 +19,7 @@ def process_item():
         object_name = data.get('objectName')
         description = data.get('objectDescription')
-        image_url = data.get('objectImage')
         if not all([object_name, description]):
             return jsonify({"error": "objectName and objectDescription are required."}), 400
@@ -47,10 +50,6 @@ def process_item():
         traceback.print_exc()
         return jsonify({"error": str(e)}), 500
-@app.route('/compare', methods=['POST'])
-# Add a new constant at the top of the file
-TOP_N_CANDIDATES = 20 # The number of items to re-rank
 @app.route('/compare', methods=['POST'])
 def compare_items():
     print("\n" + "="*50)
@@ -64,19 +63,16 @@ def compare_items():
         if not all([query_item, search_list]):
             return jsonify({"error": "queryItem and searchList are required."}), 400
         # === STAGE 1: FAST RETRIEVAL (using Bi-Encoder) ===
         print(f"--- Stage 1: Retrieving top candidates from {len(search_list)} items... ---")
         initial_candidates = []
         query_text_emb = np.array(query_item['text_embedding'])
         for item in search_list:
             text_emb_found = np.array(item['text_embedding'])
             text_score = logic.cosine_similarity(query_text_emb, text_emb_found)
-            # For now, just use the text_score as the initial score
-            # We will calculate the full score later for the top candidates
             initial_candidates.append({"item": item, "initial_score": text_score})
         # Sort by the initial score and keep the best ones
@@ -91,10 +87,10 @@ def compare_items():
         print(f"\n--- Stage 2: Re-ranking top {len(top_candidates)} candidates... ---")
         query_description = query_item['objectDescription']
         # Create pairs of [query, candidate_description] for the cross-encoder
         rerank_pairs = [(query_description, cand['item']['objectDescription']) for cand in top_candidates]
         # Get new, highly accurate scores from the cross-encoder
         cross_encoder_scores = models['cross_encoder'].predict(rerank_pairs)
@@ -109,9 +105,8 @@ def compare_items():
             # Now we calculate the final image and combined score, just like before
             has_query_image = 'shape_features' in query_item and query_item['shape_features']
             has_item_image = 'shape_features' in item and item['shape_features']
             if has_query_image and has_item_image:
-                # (This image scoring logic is the same as your old code)
                 from pipeline import FEATURE_WEIGHTS
                 query_shape = np.array(query_item['shape_features'])
                 query_color = np.array(query_item['color_features']).astype("float32")
@@ -127,11 +122,10 @@ def compare_items():
                                    FEATURE_WEIGHTS["color"] * color_score +
                                    FEATURE_WEIGHTS["texture"] * texture_score)
                 image_score = logic.stretch_image_score(raw_image_score)
-                # Use the new cross_score for the text part
-                final_score = 0.4 * image_score + 0.6 * cross_score
                 print(f" - Image Score: {image_score:.4f} | Final Re-ranked Score: {final_score:.4f}")
             else:
-                final_score = cross_score # If no image, the final score is the cross-encoder score
             from pipeline import FINAL_SCORE_THRESHOLD
             if final_score >= FINAL_SCORE_THRESHOLD:
@@ -151,11 +145,6 @@ def compare_items():
         print("="*50)
         return jsonify({"matches": final_results}), 200
-    except Exception as e:
-        print(f"❌ Error in /compare: {e}")
-        traceback.print_exc()
-        return jsonify({"error": str(e)}), 500
     except Exception as e:
         print(f"❌ Error in /compare: {e}")
         traceback.print_exc()

 # Import app, models, and logic functions
 from pipeline import app, models, logic
+# This constant should be at the top level, after imports
+TOP_N_CANDIDATES = 20
 @app.route('/process', methods=['POST'])
 def process_item():
     print("\n" + "="*50)
         object_name = data.get('objectName')
         description = data.get('objectDescription')
+        image_url = data.get('objectImage')
         if not all([object_name, description]):
             return jsonify({"error": "objectName and objectDescription are required."}), 400
         traceback.print_exc()
         return jsonify({"error": str(e)}), 500
 @app.route('/compare', methods=['POST'])
 def compare_items():
     print("\n" + "="*50)
         if not all([query_item, search_list]):
             return jsonify({"error": "queryItem and searchList are required."}), 400
         # === STAGE 1: FAST RETRIEVAL (using Bi-Encoder) ===
         print(f"--- Stage 1: Retrieving top candidates from {len(search_list)} items... ---")
         initial_candidates = []
         query_text_emb = np.array(query_item['text_embedding'])
         for item in search_list:
             text_emb_found = np.array(item['text_embedding'])
             text_score = logic.cosine_similarity(query_text_emb, text_emb_found)
             initial_candidates.append({"item": item, "initial_score": text_score})
         # Sort by the initial score and keep the best ones
         print(f"\n--- Stage 2: Re-ranking top {len(top_candidates)} candidates... ---")
         query_description = query_item['objectDescription']
         # Create pairs of [query, candidate_description] for the cross-encoder
         rerank_pairs = [(query_description, cand['item']['objectDescription']) for cand in top_candidates]
         # Get new, highly accurate scores from the cross-encoder
         cross_encoder_scores = models['cross_encoder'].predict(rerank_pairs)
             # Now we calculate the final image and combined score, just like before
             has_query_image = 'shape_features' in query_item and query_item['shape_features']
             has_item_image = 'shape_features' in item and item['shape_features']
             if has_query_image and has_item_image:
                 from pipeline import FEATURE_WEIGHTS
                 query_shape = np.array(query_item['shape_features'])
                 query_color = np.array(query_item['color_features']).astype("float32")
                                    FEATURE_WEIGHTS["color"] * color_score +
                                    FEATURE_WEIGHTS["texture"] * texture_score)
                 image_score = logic.stretch_image_score(raw_image_score)
+                final_score = 0.4 * image_score + 0.6 * cross_score
                 print(f" - Image Score: {image_score:.4f} | Final Re-ranked Score: {final_score:.4f}")
             else:
+                final_score = cross_score
             from pipeline import FINAL_SCORE_THRESHOLD
             if final_score >= FINAL_SCORE_THRESHOLD:
         print("="*50)
         return jsonify({"matches": final_results}), 200
     except Exception as e:
         print(f"❌ Error in /compare: {e}")
         traceback.print_exc()