Spaces:

selvaonline
/

shopping-assistant-demo

Running

App Files Files Community

selvaonline commited on 25 days ago

Commit

e667020

verified ·

1 Parent(s): 6f89f62

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +62 -18

app.py CHANGED Viewed

@@ -83,20 +83,40 @@ def process_deals_data(deals_data):
     return processed_deals
-# Load the model and tokenizer
-model_id = "selvaonline/shopping-assistant"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForSequenceClassification.from_pretrained(model_id)
-# Load the categories
 try:
-    from huggingface_hub import hf_hub_download
     categories_path = hf_hub_download(repo_id=model_id, filename="categories.json")
     with open(categories_path, "r") as f:
-        categories = json.load(f)
-except Exception as e:
-    print(f"Error loading categories: {str(e)}")
-    categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]
 # Global variable to store deals data
 deals_cache = None
@@ -113,13 +133,37 @@ def classify_text(text, fetch_deals=True):
     # Get the model prediction
     with torch.no_grad():
         outputs = model(**inputs)
-        predictions = torch.sigmoid(outputs.logits)
-    # Get the top categories
-    top_categories = []
-    for i, score in enumerate(predictions[0]):
-        if score > 0.5:  # Threshold for multi-label classification
-            top_categories.append((categories[i], score.item()))
     # Sort by score
     top_categories.sort(key=lambda x: x[1], reverse=True)

     return processed_deals
+# Load the e-commerce specific model and tokenizer
 try:
+    # Try to load the e-commerce BERT model
+    tokenizer = AutoTokenizer.from_pretrained("prithivida/ecommerce-bert-base-uncased")
+    model = AutoModelForSequenceClassification.from_pretrained("prithivida/ecommerce-bert-base-uncased")
+    # E-commerce BERT categories
+    categories = [
+        "electronics", "computers", "mobile_phones", "accessories",
+        "clothing", "footwear", "watches", "jewelry",
+        "home", "kitchen", "furniture", "decor",
+        "beauty", "personal_care", "health", "wellness",
+        "toys", "games", "sports", "outdoors",
+        "books", "stationery", "music", "movies"
+    ]
+    print("Using e-commerce BERT model")
+except Exception as e:
+    # Fall back to local model if e-commerce BERT fails to load
+    print(f"Error loading e-commerce BERT model: {str(e)}")
+    print("Falling back to local model")
+    model_id = "selvaonline/shopping-assistant"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSequenceClassification.from_pretrained(model_id)
+    # Load the local categories
+    try:
+        from huggingface_hub import hf_hub_download
     categories_path = hf_hub_download(repo_id=model_id, filename="categories.json")
     with open(categories_path, "r") as f:
+            categories = json.load(f)
+    except Exception as e:
+        print(f"Error loading categories: {str(e)}")
+        categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]
 # Global variable to store deals data
 deals_cache = None
     # Get the model prediction
     with torch.no_grad():
         outputs = model(**inputs)
+        # Handle different model output formats
+        if hasattr(outputs, 'logits'):
+            # For models that return logits
+            if outputs.logits.shape[1] == len(categories):
+                # Multi-label classification
+                predictions = torch.sigmoid(outputs.logits)
+                # Get the top categories
+                top_categories = []
+                for i, score in enumerate(predictions[0]):
+                    if score > 0.3:  # Lower threshold for e-commerce model
+                        top_categories.append((categories[i], score.item()))
+            else:
+                # Single-label classification
+                probabilities = torch.softmax(outputs.logits, dim=1)
+                values, indices = torch.topk(probabilities, 3)
+                top_categories = []
+                for i, idx in enumerate(indices[0]):
+                    if idx < len(categories):
+                        top_categories.append((categories[idx.item()], values[0][i].item()))
+        else:
+            # Fallback for other model formats
+            predictions = torch.sigmoid(outputs[0])
+            # Get the top categories
+            top_categories = []
+            for i, score in enumerate(predictions[0]):
+                if score > 0.5:
+                    top_categories.append((categories[i], score.item()))
     # Sort by score
     top_categories.sort(key=lambda x: x[1], reverse=True)