lisekarimi commited on
Commit
8366946
·
1 Parent(s): ba52162

Deploy version 0.1.0

Browse files
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install uv
4
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
5
+
6
+ WORKDIR /app
7
+
8
+ # Copy dependency files first (changes rarely)
9
+ COPY pyproject.toml uv.lock ./
10
+
11
+ # Put venv outside of /app so it won't be affected by volume mounts
12
+ ENV UV_PROJECT_ENVIRONMENT=/opt/venv
13
+
14
+ # Install dependencies (this will now create venv at /opt/venv)
15
+ RUN uv sync --locked --no-group docs --no-group notebook
16
+
17
+ # Create a non-root user
18
+ RUN useradd -m appuser
19
+
20
+ # Copy all source code
21
+ COPY . .
22
+
23
+ # Set ownership of writable dirs
24
+ RUN mkdir -p /app/memory && chown -R appuser:appuser /app/memory
25
+
26
+ # Switch to non-root user
27
+ USER appuser
28
+
29
+ # Set environment variables
30
+ ENV PYTHONUNBUFFERED=1
31
+
32
+ # Default command - use uv run to execute with the virtual environment
33
+ CMD ["bash", "-c", "set -e && uv run modal deploy -m src.modal_services.entry && uv run main.py"]
34
+ # CMD ["uv", "run", "main.py"]
README.md CHANGED
@@ -1,11 +1,56 @@
1
  ---
2
  title: Snapr
3
- emoji: 📈
4
  colorFrom: yellow
5
- colorTo: green
6
  sdk: docker
7
  pinned: false
8
- short_description: 🏷️ Multi-AI agent system trained to snap the best deals
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Snapr
3
+ emoji: 🏷️
4
  colorFrom: yellow
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
+ # 🏷️ Snapr - Multi-AI Agent Snaps Best Deals
11
+
12
+ - [🚀 Live Demo on Hugging Face](https://huggingface.co/spaces/lisekarimi/snapr)
13
+ - [📚 Full Documentation](https://lisekarimi.github.io/snapr)
14
+
15
+ 📷 **Screenshots**
16
+
17
+ <a href="docs/styles/assets/ui/full_app.png">
18
+ <img src="docs/styles/assets/ui/full_app.png" width="400">
19
+ </a>
20
+
21
+ ---
22
+
23
+ ## 📖 Overview
24
+ **Snapr** is an AI-powered system that finds the best online deals using a pipeline of collaborative agents powered by LLMs, traditional ML models, embeddings, and retrieval-augmented generation (RAG).
25
+
26
+ ### ⚙️ Key Capabilities
27
+ - Central agent orchestrates deal processing with specialized agents
28
+ - Real-time deal fetching and price prediction using LLMs and traditional models
29
+ - Automatic filtering and memory to avoid duplicates
30
+ - Remote execution on Modal with live activity logs
31
+ - DevOps: Docker, CI/CD with GitHub Actions, pre-commit hooks
32
+ - Unit testing, security checks, and code quality enforcement
33
+ - Auto-generated changelogs with LLM and MkDocs-based documentation
34
+
35
+ 👉 For full details, see [Core Features and Development & DevOps.](https://lisekarimi.github.io/snapr).
36
+
37
+ ---
38
+
39
+ ## ⚙️ Setup & Installation
40
+
41
+ To set up locally and install the app, see the "Local Deployment" section in the [technical documentation](https://lisekarimi.github.io/snapr/technical/localdev/).
42
+
43
+ ---
44
+
45
+ ## 📚 Docs & Architecture
46
+
47
+ - Documentation includes:
48
+
49
+ - Technical and functional docs
50
+ - Schematics of technical architecture, functional workflow, and user flow
51
+
52
+ ---
53
+
54
+ ## 🪪 License
55
+
56
+ MIT
main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """Entry point for the application."""
2
+
3
+ from src.ui.gradio_app import build_ui
4
+
5
+ demo = build_ui()
6
+
7
+ if __name__ == "__main__":
8
+ demo.launch(server_name="0.0.0.0", server_port=7860)
pyproject.toml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "snapr"
3
+ version = "0.1.0"
4
+ description = "Multi-AI agent system trained to snap the best deals"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11, <3.12"
7
+ dependencies = [
8
+ "beautifulsoup4>=4.13.4",
9
+ "feedparser>=6.0.11",
10
+ "gradio==5.29.1",
11
+ "modal==0.74.20",
12
+ "openai==1.65.5",
13
+ "python-dotenv>=1.1.0",
14
+ "requests>=2.32.3",
15
+ "rich>=14.0.0",
16
+ ]
17
+
18
+ [dependency-groups]
19
+ docs = [
20
+ "mkdocs-glightbox>=0.4.0",
21
+ "mkdocs-macros-plugin>=1.3.7",
22
+ "mkdocs-material>=9.6.14",
23
+ ]
24
+ notebook = [
25
+ "datasets==2.21.0",
26
+ "ipykernel>=6.29.5",
27
+ "ipywidgets>=8.1.7",
28
+ "matplotlib>=3.10.3",
29
+ "numpy>=2.2.6",
30
+ "pandas>=2.2.3",
31
+ "transformers>=4.52.3",
32
+ ]
33
+
34
+ [tool.pytest.ini_options]
35
+ pythonpath = ["."]
36
+ filterwarnings = [
37
+ "ignore::DeprecationWarning:websockets.legacy",
38
+ ]
39
+
40
+ [tool.ruff]
41
+ target-version = "py39"
42
+ line-length = 88
43
+
44
+ [tool.ruff.lint]
45
+ select = ["E", "F", "I"]
46
+ ignore = ["F821"]
47
+
48
+ [tool.ruff.lint.per-file-ignores]
49
+ "src/agents/*.py" = ["ANN"]
50
+ "tests/*.py" = ["ANN"]
51
+ "notebooks/**/*" = ["ALL"]
52
+ "**/*.ipynb" = ["ALL"]
src/__init__.py ADDED
File without changes
src/agents/__init__.py ADDED
File without changes
src/agents/base_agent.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Base class for Agents, providing colorful logging with Rich."""
2
+
3
+ from src.config.logging_queue import log_queue
4
+ from src.utils.logger import console
5
+
6
+
7
+ class Agent:
8
+ """Abstract superclass for Agents, with colorful Rich logging."""
9
+
10
+ name: str = ""
11
+ color: str = "white"
12
+
13
+ def log(self, message: str) -> None:
14
+ """Print log with colored message using Rich."""
15
+ # Terminal (Rich)
16
+ console.print(f"[{self.color} on black][{self.name}] {message}[/]")
17
+
18
+ # Gradio UI (HTML)
19
+ log_queue.put(
20
+ f"<span style='color:{self.color}'>[{self.name}] {message}</span><br>"
21
+ )
src/agents/deal_scanner_agent.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Scan deals and return top 5 via OpenAI.
2
+
3
+ 1. Fetch deals from RSS feeds.
4
+ 2. Prompt OpenAI with the deal list.
5
+ 3. Return top 5 detailed, clearly priced deals as structured JSON.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ from src.agents.base_agent import Agent
13
+ from src.config.constants import DEALS_FILE
14
+ from src.deals.raw_deals import ScrapedDeal
15
+ from src.deals.structured_deals import OpportunitiesCollection
16
+ from src.models.frontier_model import OPENAI_MODEL, openai
17
+
18
+
19
+ class DealScannerAgent(Agent):
20
+ """Agent for scanning and filtering deals.
21
+
22
+ Processes them via OpenAI.
23
+ """
24
+
25
+ name = "Deal Scanner Agent"
26
+ color = "green"
27
+
28
+ SYSTEM_PROMPT = """
29
+ You are a deal filtering assistant.
30
+
31
+ Your task is to identify the 5 deals with the most detailed product descriptions
32
+ and clearly stated prices. Focus only on the product itself — not the deal terms,
33
+ discounts, or promotions.
34
+
35
+ Only include deals where the price is explicitly mentioned and easy to extract.
36
+ Avoid entries with phrases like "$XXX off" or "reduced by $XXX" — those are not
37
+ valid prices. Only include deals when you are confident about the actual
38
+ product price.
39
+
40
+ Respond strictly in JSON with no explanation, using the following format:
41
+
42
+ {
43
+ "deals": [
44
+ {
45
+ "product_description": "A clear, 4–5 sentence summary of the product.",
46
+ "price": 99.99,
47
+ "url": "..."
48
+ },
49
+ ...
50
+ ]
51
+ }"""
52
+
53
+ def __init__(self, memory_path: str = DEALS_FILE) -> None:
54
+ """Initialize OpenAI client."""
55
+ self.openai = openai
56
+ self.memory_path = memory_path
57
+ self.log("is ready")
58
+
59
+ def _load_memory(self) -> Dict[str, List[Dict[str, Any]]]:
60
+ """Load memory from file, returning seen URLs and the full memory."""
61
+ if os.path.exists(self.memory_path):
62
+ try:
63
+ with open(self.memory_path, "r") as f:
64
+ memory_json = json.load(f)
65
+ seen_urls = [op["url"] for op in memory_json.get("opportunities", [])]
66
+ return {
67
+ "seen_urls": seen_urls,
68
+ "memory": memory_json.get("opportunities", []),
69
+ }
70
+ except (json.JSONDecodeError, KeyError, IOError) as e:
71
+ self.log(f"Error loading memory: {e}. Creating new memory.")
72
+ return {"seen_urls": [], "memory": []}
73
+ else:
74
+ self.log("No memory file found. Assuming first run")
75
+ return {"seen_urls": [], "memory": []}
76
+
77
+ def fetch_deals(self, categories: List[str]) -> List[ScrapedDeal]:
78
+ """Fetch new RSS deals not present in memory."""
79
+ self.log("is fetching deals from RSS feed")
80
+
81
+ # Load memory to get seen URLs
82
+ memory_data = self._load_memory()
83
+ seen_urls = set(memory_data["seen_urls"])
84
+
85
+ # Fetch all deals and filter out seen ones
86
+ try:
87
+ scraped = ScrapedDeal.fetch(categories)
88
+ result = [deal for deal in scraped if deal.url not in seen_urls]
89
+ overlap = [deal for deal in scraped if deal.url in seen_urls]
90
+ self.log(f"{len(overlap)} deals skipped")
91
+ self.log(f"{len(result)} new deals fetched")
92
+ return result
93
+ except Exception as e:
94
+ self.log(f"Error fetching deals: {e}")
95
+ return []
96
+
97
+ def make_user_prompt(self, scraped: List[ScrapedDeal]) -> str:
98
+ """Build the full user prompt for OpenAI."""
99
+ return (
100
+ "Select the 5 best deals with the clearest product descriptions "
101
+ "and exact prices. Here is the list:\n\n"
102
+ + "\n\n".join(deal.describe() for deal in scraped)
103
+ )
104
+
105
+ def scan(self, categories: List[str]) -> Optional[OpportunitiesCollection]:
106
+ """Return top 5 new deals."""
107
+ # Step 1: Fetch new deals not already in memory
108
+ scraped = self.fetch_deals(categories)
109
+ if not scraped:
110
+ self.log("❌ found no new deals to process ")
111
+ return None
112
+
113
+ # Step 2: Construct prompt with all new deals
114
+ user_prompt = self.make_user_prompt(scraped)
115
+
116
+ # Step 3: Call OpenAI - allow RuntimeError to propagate
117
+ result = self._call_openai(user_prompt)
118
+
119
+ # Step 4: Filter out invalid deals
120
+ filtered_result = self._filter_invalid_deals(result)
121
+
122
+ return filtered_result if filtered_result.opportunities else None
123
+
124
+ def _call_openai(self, user_prompt: str) -> OpportunitiesCollection:
125
+ """Call OpenAI API to get the processed deals."""
126
+ self.log("📞 is calling OpenAI")
127
+ try:
128
+ result = self.openai.beta.chat.completions.parse(
129
+ model=OPENAI_MODEL,
130
+ messages=[
131
+ {"role": "system", "content": self.SYSTEM_PROMPT},
132
+ {"role": "user", "content": user_prompt},
133
+ ],
134
+ response_format=OpportunitiesCollection,
135
+ )
136
+ except Exception as e:
137
+ self.log(f"[ERROR] OpenAI call failed: {e}")
138
+ raise RuntimeError(
139
+ "DealScannerAgent failed to get response from OpenAI."
140
+ ) from e
141
+ return result
142
+
143
+ def _filter_invalid_deals(
144
+ self, result: OpportunitiesCollection
145
+ ) -> OpportunitiesCollection:
146
+ """Filter out deals with invalid prices."""
147
+ result = result.choices[0].message.parsed
148
+ result.opportunities = [op for op in result.opportunities if op.price > 0]
149
+ self.log(f"✅ received {len(result.opportunities)} valid opportunities ")
150
+ return result
src/agents/ensemble_price_agent.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent calling a remote ensemble model on Modal.
2
+
3
+ Computes final price from multiple predictions.
4
+ """
5
+
6
+ import modal
7
+
8
+ from src.agents.base_agent import Agent
9
+ from src.agents.ft_price_agent import FTPriceAgent
10
+ from src.agents.rag_price_agent import RAGPriceAgent
11
+ from src.agents.xgb_price_agent import XGBoostPriceAgent
12
+ from src.config.constants import CURRENCY
13
+ from src.modal_services.app_config import APP_NAME
14
+
15
+
16
+ class EnsemblePriceAgent(Agent):
17
+ """Agent that aggregates FT, RAG, and XGB predictions.
18
+
19
+ Sends them to the remote EnsemblePricer on Modal.
20
+ """
21
+
22
+ name = "EnsemblePrice Agent"
23
+ color = "magenta"
24
+
25
+ def __init__(self) -> None:
26
+ """Initialize the agent."""
27
+ self._modal_called = False
28
+ self.ft_agent = FTPriceAgent()
29
+ self.rag_agent = RAGPriceAgent()
30
+ self.xgb_agent = XGBoostPriceAgent()
31
+ remote_ensemble = modal.Cls.from_name(APP_NAME, "EnsemblePricer")
32
+ self.ensemble = remote_ensemble()
33
+ self.log("is ready")
34
+
35
+ def price(self, description: str) -> float:
36
+ """Get individual predictions and pass them to the ensemble model."""
37
+ ft_pred = self.ft_agent.price(description)
38
+ rag_pred = self.rag_agent.price(description)
39
+ xgb_pred = self.xgb_agent.price(description)
40
+
41
+ if not self._modal_called:
42
+ self.log("📡 Connecting to Modal — Loading trained linear model...")
43
+ self._modal_called = True
44
+
45
+ self.log(
46
+ f"Predictions — FT={CURRENCY}{ft_pred}, "
47
+ f"RAG={CURRENCY}{rag_pred}, "
48
+ f"XGB={CURRENCY}{xgb_pred}"
49
+ )
50
+
51
+ try:
52
+ result = self.ensemble.price.remote(ft_pred, rag_pred, xgb_pred)
53
+ self.log(f"Final estimate: {CURRENCY}{result:.2f}")
54
+ return result
55
+ except Exception as e:
56
+ self.log(f"[ERROR] Remote EnsemblePricer failed: {e}")
57
+ raise RuntimeError("EnsemblePriceAgent failed to get final price.") from e
src/agents/ft_price_agent.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FTPriceAgent uses a remote fine-tuned LLM on Modal.
2
+
3
+ Predicts item prices from descriptions.
4
+ """
5
+
6
+ import modal
7
+
8
+ from src.agents.base_agent import Agent
9
+ from src.modal_services.app_config import APP_NAME
10
+
11
+
12
+ class FTPriceAgent(Agent):
13
+ """Agent for running a fine-tuned LLM remotely."""
14
+
15
+ name = "FTPrice Agent"
16
+ color = "red"
17
+
18
+ def __init__(self) -> None:
19
+ """Initialize agent with Modal class instance."""
20
+ self._modal_called = False
21
+ ft_pricer = modal.Cls.from_name(APP_NAME, "FTPricer")
22
+ self.ftpricer = ft_pricer()
23
+ self.log("is ready")
24
+
25
+ def price(self, description: str) -> float:
26
+ """Remote call to estimate price, with error handling."""
27
+ if not self._modal_called:
28
+ self.log("🧠 Calling Modal's fine-tuned LLM...")
29
+ self._modal_called = True
30
+ try:
31
+ result = self.ftpricer.price.remote(
32
+ description
33
+ ) # 2nd API call: run price method
34
+ # self.log(f"predicting ${result:.2f} ✅")
35
+ return result
36
+ except Exception as e:
37
+ self.log(f"[ERROR] Remote pricing failed: {e}")
38
+ raise RuntimeError("FTPriceAgent failed to get price from Modal.") from e
src/agents/pipeline.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Executes planning agent and streams logs/results.
2
+
3
+ Integrates with the Gradio UI.
4
+ """
5
+
6
+ import queue
7
+ import threading
8
+ import time
9
+ import traceback
10
+ from typing import Any, Generator, List, Optional, Tuple, Union
11
+
12
+ import gradio as gr
13
+
14
+ from src.agents.planning_agent import PlanningAgent
15
+ from src.config.constants import (
16
+ DEALS_FILE,
17
+ IS_DEMO_VERSION,
18
+ MAX_CATEGORY_SELECTION,
19
+ )
20
+ from src.config.logging_queue import log_queue
21
+ from src.ui.formatting import format_deals_table, html_for
22
+ from src.utils.cleanup import delete_if_old
23
+ from src.utils.state_manager import can_run_app, get_state, update_state
24
+
25
+
26
+ def run_pipeline(log_queue: queue.Queue, selected_categories: List[str]) -> None:
27
+ """Runs the planning agent pipeline and stores accepted deals."""
28
+ try:
29
+ delete_if_old(DEALS_FILE)
30
+ agent = PlanningAgent()
31
+ results = agent.plan(selected_categories)
32
+
33
+ global accepted_deals
34
+ accepted_deals = [
35
+ [
36
+ opp.product_description,
37
+ f"${opp.price:.2f}",
38
+ f"${opp.estimate:.2f}",
39
+ f"${opp.discount:.2f}",
40
+ opp.url,
41
+ ]
42
+ for opp in results
43
+ ]
44
+
45
+ except Exception as e:
46
+ log_queue.put(
47
+ f"<span style='color:red'>❌ Error during pipeline execution: "
48
+ f"{str(e)}</span>"
49
+ )
50
+ log_queue.put(f"<pre>{traceback.format_exc()}</pre>")
51
+
52
+
53
+ def validate_categories(
54
+ selected_categories: Union[str, List[str]],
55
+ ) -> Tuple[bool, Optional[str]]:
56
+ """Validates the selected categories."""
57
+ if isinstance(selected_categories, str):
58
+ selected_categories = [selected_categories]
59
+
60
+ if not selected_categories:
61
+ return False, "⚠️ Please select at least one category before running."
62
+
63
+ if len(selected_categories) > MAX_CATEGORY_SELECTION:
64
+ return (
65
+ False,
66
+ f"⚠️ You can select up to {MAX_CATEGORY_SELECTION} categories only.",
67
+ )
68
+
69
+ return True, None
70
+
71
+
72
+ def check_demo_restrictions() -> Tuple[bool, Optional[str], Optional[str]]:
73
+ """Checks if the app can run under demo restrictions."""
74
+ can_run, message = can_run_app()
75
+
76
+ if not can_run:
77
+ return False, f"⚠️ {message}", None
78
+
79
+ if IS_DEMO_VERSION:
80
+ # Just update the run count, but use the message from can_run_app
81
+ update_state({"run_count": get_state()["run_count"] + 1})
82
+ # Get fresh message after updating the state
83
+ _, status_msg = can_run_app()
84
+ else:
85
+ status_msg = ""
86
+
87
+ return True, None, status_msg
88
+
89
+
90
+ def initial_ui_update(
91
+ log_data: List[str], status_msg: str
92
+ ) -> Tuple[str, str, Any, str]:
93
+ """Returns initial UI state for the app."""
94
+ disable_btn = gr.update(
95
+ interactive=False, elem_classes=["run-button", "btn-disabled"]
96
+ )
97
+ return html_for(log_data), format_deals_table([]), disable_btn, status_msg
98
+
99
+
100
+ def run_pipeline_threaded(
101
+ selected_categories: List[str],
102
+ log_data: List[str],
103
+ status_msg: str,
104
+ enable_btn: Any, # noqa: ANN401
105
+ ) -> Generator[Tuple[str, str, Any, str], None, None]:
106
+ """Runs pipeline in background thread with log streaming.
107
+
108
+ Yields UI updates until completion.
109
+
110
+ """
111
+ thread = threading.Thread(
112
+ target=run_pipeline, args=(log_queue, selected_categories)
113
+ )
114
+ thread.start()
115
+
116
+ disable_btn = gr.update(
117
+ interactive=False, elem_classes=["run-button", "btn-disabled"]
118
+ )
119
+
120
+ while thread.is_alive() or not log_queue.empty():
121
+ while not log_queue.empty():
122
+ log_msg = log_queue.get()
123
+ log_data.append(log_msg)
124
+ yield (
125
+ html_for(log_data),
126
+ format_deals_table(accepted_deals),
127
+ disable_btn,
128
+ status_msg,
129
+ )
130
+
131
+ if thread.is_alive():
132
+ time.sleep(0.2)
133
+ yield (
134
+ html_for(log_data),
135
+ format_deals_table(accepted_deals),
136
+ disable_btn,
137
+ status_msg,
138
+ )
139
+
140
+ # Final UI update after thread finishes
141
+ yield html_for(log_data), format_deals_table(accepted_deals), enable_btn, status_msg
142
+
143
+
144
+ def handle_pipeline_error(
145
+ e: Exception,
146
+ log_data: List[str],
147
+ enable_btn: gr.components.Component,
148
+ status_msg: str,
149
+ ) -> Tuple[str, str, gr.components.Component, str]:
150
+ """Handles exceptions and appends error logs.
151
+
152
+ Returns the final UI update tuple.
153
+ """
154
+ log_data.append(f"<span style='color:red'>❌ Unexpected error: {str(e)}</span>")
155
+ log_data.append(f"<pre>{traceback.format_exc()}</pre>")
156
+ return html_for(log_data), format_deals_table([]), enable_btn, status_msg
157
+
158
+
159
+ def run_and_stream_logs(
160
+ selected_categories: Union[str, List[str]],
161
+ ) -> Generator[Tuple[str, str, bool, str], None, None]:
162
+ """Runs pipeline in a thread, streaming logs and results to the UI.
163
+
164
+ Returns HTML logs, deal table, button state, and status message.
165
+ """
166
+ global accepted_deals
167
+ accepted_deals = []
168
+ log_data = []
169
+
170
+ # Step 1: Validate categories
171
+ is_valid, error_msg = validate_categories(selected_categories)
172
+ if not is_valid:
173
+ yield None, None, gr.update(interactive=True), error_msg
174
+ return
175
+
176
+ # Step 2: Check demo restrictions
177
+ can_run, error_msg, status_msg = check_demo_restrictions()
178
+ if not can_run:
179
+ yield (
180
+ html_for([error_msg]),
181
+ format_deals_table([]),
182
+ gr.update(interactive=True),
183
+ error_msg,
184
+ )
185
+ return
186
+
187
+ # Step 3: Initial UI update showing we're starting
188
+ enable_btn = gr.update(interactive=True, elem_classes=["run-button"])
189
+ yield initial_ui_update(log_data, status_msg)
190
+
191
+ try:
192
+ # Step 4: Run the pipeline in a thread
193
+ yield from run_pipeline_threaded(
194
+ selected_categories, log_data, status_msg, enable_btn
195
+ )
196
+
197
+ except Exception as e:
198
+ yield handle_pipeline_error(e, log_data, enable_btn, status_msg)
src/agents/planning_agent.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PlanningAgent coordinates deal scanning and enrichment."""
2
+
3
+ import json
4
+ from typing import List
5
+
6
+ from rich import print_json
7
+
8
+ from src.agents.base_agent import Agent
9
+ from src.agents.deal_scanner_agent import DealScannerAgent
10
+ from src.agents.ensemble_price_agent import EnsemblePriceAgent
11
+ from src.config.constants import CURRENCY, DEAL_THRESHOLD
12
+ from src.deals.structured_deals import OpportunitiesCollection, Opportunity
13
+ from src.utils.logger import console
14
+ from src.utils.memory_utils import save_opportunities_to_memory
15
+
16
+
17
+ class PlanningAgent(Agent):
18
+ """Create instances of the Agents that this planner coordinates across."""
19
+
20
+ name = "Planning Agent"
21
+ color = "cyan"
22
+
23
+ def __init__(self) -> None:
24
+ """Initialize agents."""
25
+ self.log("🧠 Let’s wake up the agents — time to sniff out some sweet deals!")
26
+ self.log("is ready")
27
+ self.scanner = DealScannerAgent()
28
+ self.ensemble = EnsemblePriceAgent()
29
+ self.log("🚀 All AI Agents are caffeinated, calibrated, and ready to hustle..")
30
+
31
+ def scan_deals(self, categories: List[str]) -> List[Opportunity]:
32
+ """Scans deals and returns GPT-processed opportunities."""
33
+ result = self.scanner.scan(categories)
34
+ if result is None:
35
+ self.log("❌ No valid deals found.")
36
+ return []
37
+ return result.opportunities
38
+
39
+ def enrich(self, opportunity: Opportunity) -> Opportunity:
40
+ """Add estimated market price and discount to an opportunity."""
41
+ estimate = self.ensemble.price(opportunity.product_description)
42
+ discount = round(estimate - opportunity.price, 2)
43
+ opportunity.estimate = estimate
44
+ opportunity.discount = discount
45
+ return opportunity
46
+
47
+ def _log_result(self, idx: int, opportunity: Opportunity) -> None:
48
+ """Logs if a deal was accepted or rejected.
49
+
50
+ Decision is based on discount vs. threshold.
51
+
52
+ """
53
+ if opportunity.discount >= DEAL_THRESHOLD:
54
+ self.log(
55
+ f"✅ Deal #{idx} accepted — discount: "
56
+ f"{CURRENCY}{opportunity.discount:.2f}"
57
+ )
58
+ else:
59
+ self.log(
60
+ f"❌ Deal #{idx} rejected — discount below threshold: "
61
+ f"{CURRENCY}{opportunity.discount:.2f}"
62
+ )
63
+
64
+ def _report_summary(self, enriched: List[Opportunity]) -> None:
65
+ """Display a summary of accepted opportunities after enrichment."""
66
+ if not enriched:
67
+ self.log("❌ No opportunities met the discount threshold.")
68
+ else:
69
+ for opp in enriched:
70
+ console.print(
71
+ f"- {opp.product_description}\n"
72
+ f" Price: {CURRENCY}{opp.price:.2f} | "
73
+ f"AI Estimate: {CURRENCY}{opp.estimate:.2f} | "
74
+ f"Discount: {CURRENCY}{opp.discount:.2f}\n"
75
+ f" URL: {opp.url}\n"
76
+ )
77
+
78
+ def plan(self, categories: List[str]) -> List[Opportunity]:
79
+ """Full pipeline: scan → enrich → filter → save."""
80
+ self.log(
81
+ "************** SCANNING INITIATED — HUNTING JUICY DEALS...**************"
82
+ )
83
+
84
+ deals = self.scan_deals(categories)
85
+ if not deals:
86
+ self.log("❌ No deals found from scanner.")
87
+ return []
88
+ print_json(
89
+ data=json.loads(
90
+ OpportunitiesCollection(opportunities=deals).model_dump_json()
91
+ )
92
+ ) # For debugging/inspection
93
+
94
+ self.log(
95
+ "************** SCANNING COMPLETE — STARTING ENRICHMENT **************"
96
+ )
97
+
98
+ enriched = []
99
+ for idx, deal in enumerate(deals, start=1):
100
+ opportunity = self.enrich(deal)
101
+ self._log_result(idx, opportunity)
102
+ if opportunity.discount >= DEAL_THRESHOLD:
103
+ enriched.append(opportunity)
104
+
105
+ self.log(
106
+ "************** ENRICHMENT COMPLETE — SAVING OPPORTUNITIES **************"
107
+ )
108
+ save_opportunities_to_memory([opp.model_dump() for opp in enriched])
109
+ self.log(f"💾 {len(enriched)} top deals saved to memory.")
110
+
111
+ self._report_summary(enriched)
112
+ self.log(
113
+ "************** ✅ MISSION COMPLETE — BEST DEALS LOCKED IN **************"
114
+ )
115
+
116
+ return enriched
src/agents/rag_price_agent.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Handles the integration of RAG model with Modal to predict item prices."""
2
+
3
+ import modal
4
+
5
+ from src.agents.base_agent import Agent
6
+ from src.modal_services.app_config import APP_NAME
7
+
8
+
9
+ class RAGPriceAgent(Agent):
10
+ """RAGPriceAgent connects to a remote Modal container.
11
+
12
+ Uses a RAG pipeline to predict item prices.
13
+ """
14
+
15
+ name = "RAGPrice Agent"
16
+ color = "blue"
17
+
18
+ def __init__(self) -> None:
19
+ """Initialize the agent."""
20
+ self._modal_called = False
21
+ remote_rag_pricer = modal.Cls.from_name(APP_NAME, "RAGPricer")
22
+ self.rag = remote_rag_pricer()
23
+ self.log("is ready")
24
+
25
+ def price(self, description: str) -> float:
26
+ """Call the remote RAGPricer to estimate price."""
27
+ if not self._modal_called:
28
+ self.log("📡 Connecting to Modal — loading embedding model and ChromaDB...")
29
+ self._modal_called = True
30
+ try:
31
+ result = self.rag.price.remote(description)
32
+ # self.log(f"predicting ${result:.2f} ✅")
33
+ return result
34
+ except Exception as e:
35
+ self.log(f"[ERROR] Remote RAGPricer failed: {e}")
36
+ raise RuntimeError("RAGPriceAgent failed to get price from Modal.") from e
src/agents/xgb_price_agent.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Handles the integration of XGBoost model with Modal to predict item prices."""
2
+
3
+ import modal
4
+
5
+ from src.agents.base_agent import Agent
6
+ from src.modal_services.app_config import APP_NAME
7
+
8
+
9
+ class XGBoostPriceAgent(Agent):
10
+ """XGBoostPriceAgent connects to a remote Modal container.
11
+
12
+ Uses E5 and XGBoost to predict prices from descriptions.
13
+ """
14
+
15
+ name = "XGBPrice Agent"
16
+ color = "yellow"
17
+
18
+ def __init__(self) -> None:
19
+ """Initialize the agent."""
20
+ self._modal_called = False
21
+ remote_xgb_pricer = modal.Cls.from_name(APP_NAME, "XGBPricer")
22
+ self.xgb = remote_xgb_pricer()
23
+ self.log("is ready")
24
+
25
+ def price(self, description: str) -> float:
26
+ """Call the remote XGBPricer to estimate price."""
27
+ if not self._modal_called:
28
+ self.log("📡 Connecting to Modal — loading XGBoost and embedding model...")
29
+ self._modal_called = True
30
+ try:
31
+ result = self.xgb.price.remote(description)
32
+ # self.log(f"predicting ${result:.2f} ✅")
33
+ return result
34
+ except Exception as e:
35
+ self.log(f"[ERROR] Remote XGBPricer failed: {e}")
36
+ raise RuntimeError(
37
+ "XGBoostPriceAgent failed to get price from Modal."
38
+ ) from e
src/config/constants.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/config/constants.py
2
+ """Constants for configuration across the project."""
3
+
4
+ from pathlib import Path
5
+
6
+ import tomllib
7
+
8
+ # ==================== PROJECT METADATA ====================
9
+ root = Path(__file__).parent.parent.parent
10
+ with open(root / "pyproject.toml", "rb") as f:
11
+ pyproject = tomllib.load(f)
12
+
13
+ PROJECT_NAME = pyproject["project"]["name"]
14
+ VERSION = pyproject["project"]["version"]
15
+
16
+ # ==================== ENVIRONMENT CONFIG ====================
17
+ ENV = "PROD" # or "PROD"
18
+ IS_PROD = ENV == "PROD"
19
+ IS_DEMO_VERSION = IS_PROD # True if PROD, False if DEV
20
+
21
+ # App naming for Modal
22
+ APP_NAME = f"{PROJECT_NAME}-{ENV.lower()}-{VERSION}"
23
+
24
+ # ==================== APPLICATION LIMITS ====================
25
+ MAX_DEMO_RUNS_PER_DAY = 5
26
+ MAX_LOG_LINES = 50
27
+ MAX_DEALS_PER_FEED = 20
28
+ MAX_CATEGORY_SELECTION = 3
29
+ MEMORY_EXPIRATION_DAYS = 5
30
+
31
+ # ==================== BUSINESS LOGIC ====================
32
+ CURRENCY = "$"
33
+ DEAL_THRESHOLD = 50
34
+
35
+ # ==================== PATHS ====================
36
+ BASE_DIR = Path(__file__).resolve().parent.parent.parent
37
+ MEMORY_DIR = BASE_DIR / "memory"
38
+ STATE_FILE = MEMORY_DIR / "demo_state.json"
39
+ DEALS_FILE = MEMORY_DIR / "memory.json"
src/config/feeds.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Maps deal categories to RSS feed URLs.
2
+
3
+ Shared across UI, agents, and scrapers.
4
+ """
5
+
6
+ CATEGORY_FEEDS = {
7
+ "Home & Garden": "https://www.dealnews.com/c196/Home-Garden/?rss=1",
8
+ "Clothing & Accessories": "https://www.dealnews.com/c202/Clothing-Accessories/?rss=1",
9
+ "Electronics": "https://www.dealnews.com/c142/Electronics/?rss=1",
10
+ "Health & Beauty": "https://www.dealnews.com/c756/Health-Beauty/?rss=1",
11
+ "Computers": "https://www.dealnews.com/c39/Computers/?rss=1",
12
+ "Sports & Fitness": "https://www.dealnews.com/c211/Sports-Fitness/?rss=1",
13
+ "Gaming & Toys": "https://www.dealnews.com/c186/Gaming-Toys/?rss=1",
14
+ "Automotive": "https://www.dealnews.com/c238/Automotive/?rss=1",
15
+ "Movies, Music & Books": "https://www.dealnews.com/c178/Movies-Music-Books/?rss=1",
16
+ "Office & School Supplies": "https://www.dealnews.com/c182/Office-School-Supplies/?rss=1",
17
+ "Special Occasion": "https://www.dealnews.com/c636/Special-Occasion/?rss=1",
18
+ }
src/config/logging_queue.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """Initializes a thread-safe global log queue.
2
+
3
+ Used to manage log messages across threads.
4
+ """
5
+
6
+ import queue
7
+
8
+ log_queue = queue.Queue()
src/deals/__init__.py ADDED
File without changes
src/deals/raw_deals.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Handles scraping and preprocessing logic before OpenAI interaction."""
2
+
3
+ import time
4
+ from typing import Dict, List, Self
5
+
6
+ import feedparser
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+
10
+ from src.config.constants import MAX_DEALS_PER_FEED
11
+ from src.config.feeds import CATEGORY_FEEDS
12
+ from src.utils.logger import console
13
+
14
+
15
+ def extract(html_snippet: str) -> str:
16
+ """Cleans text from messy HTML with fallback handling."""
17
+ soup = BeautifulSoup(html_snippet, "html.parser")
18
+ snippet = soup.find("div", class_="snippet summary")
19
+
20
+ # Extract inner HTML or fallback to full snippet
21
+ raw_html = snippet.decode_contents() if snippet else html_snippet
22
+
23
+ # Parse again to clean any nested/malformed HTML
24
+ clean_soup = BeautifulSoup(raw_html, "html.parser")
25
+ text = clean_soup.get_text(" ", strip=True)
26
+
27
+ return text.replace("\n", " ")
28
+
29
+
30
+ class ScrapedDeal:
31
+ """Represents a deal from an RSS feed.
32
+
33
+ Flow: fetch() → __init__ → _load_content() → use methods.
34
+ """
35
+
36
+ category: str # Deal type
37
+ title: str # Deal title
38
+ summary: str # RSS summary
39
+ url: str # Deal link
40
+ details: str # Full description
41
+ features: str # Feature list
42
+
43
+ def __init__(self, entry: Dict[str, str]) -> None:
44
+ """Initialize deal from RSS entry and fetch content."""
45
+ # Basic metadata from RSS
46
+ self.title = entry["title"]
47
+ self.summary = extract(entry["summary"])
48
+ self.url = entry["links"][0]["href"]
49
+
50
+ # Initialize placeholders
51
+ self.details = ""
52
+ self.features = ""
53
+
54
+ # Fetch and parse full deal content
55
+ self._load_content()
56
+
57
+ def _load_content(self) -> None:
58
+ """Fetches and parses deal content; raises on failure to skip."""
59
+ try:
60
+ res = requests.get(self.url, timeout=5)
61
+ res.raise_for_status()
62
+
63
+ soup = BeautifulSoup(res.content, "html.parser")
64
+ content = soup.find("div", class_="content-section")
65
+
66
+ if content:
67
+ text = content.get_text().replace("\nmore", "").replace("\n", " ")
68
+ if "Features" in text:
69
+ self.details, self.features = text.split("Features", 1)
70
+ else:
71
+ self.details = text
72
+ self.features = ""
73
+ else:
74
+ raise ValueError("No content section found.")
75
+
76
+ except Exception as e:
77
+ raise RuntimeError(f"Failed to load deal content from {self.url}: {e}")
78
+
79
+ def __repr__(self) -> str:
80
+ """Quick string representation of the deal."""
81
+ return f"<{self.title}>"
82
+
83
+ def describe(self) -> str:
84
+ """Detailed description of the deal."""
85
+ return (
86
+ f"Title: {self.title.strip()}\n"
87
+ f"Details: {self.details.strip()}\n"
88
+ f"Features: {self.features.strip()}\n"
89
+ f"URL: {self.url.strip()}"
90
+ )
91
+
92
+ @classmethod
93
+ def fetch(cls, selected_categories: List[str]) -> List[Self]:
94
+ """Parses RSS feeds into ScrapedDeal instances.
95
+
96
+ Skips failed deals; stops app if all fail.
97
+ """
98
+ deals = []
99
+ feed_urls = [
100
+ CATEGORY_FEEDS[cat] for cat in selected_categories if cat in CATEGORY_FEEDS
101
+ ]
102
+
103
+ for feed_url in feed_urls:
104
+ feed = cls._parse_feed(feed_url)
105
+ if feed is None:
106
+ continue
107
+
108
+ console.print(
109
+ f"[bold blue]DEBUG[/] {len(feed.entries)} entries found in feed: "
110
+ f"{feed_url}"
111
+ )
112
+
113
+ for entry in feed.entries[:MAX_DEALS_PER_FEED]:
114
+ cls._process_deal(entry, deals)
115
+
116
+ # Throttle requests to avoid hitting servers too fast
117
+ time.sleep(0.5)
118
+
119
+ if not deals:
120
+ raise RuntimeError("❌ All deals failed to load. Stopping.")
121
+
122
+ return deals
123
+
124
+ @staticmethod
125
+ def _parse_feed(feed_url: str) -> feedparser.FeedParserDict | None:
126
+ """Helper method to parse the RSS feed and return the feed data."""
127
+ feed = feedparser.parse(feed_url)
128
+ if feed.bozo:
129
+ console.print(
130
+ f"[bold red]ERROR[/] Failed to parse RSS feed: {feed_url} "
131
+ f"({feed.bozo_exception})"
132
+ )
133
+ return None
134
+ return feed
135
+
136
+ @staticmethod
137
+ def _process_deal(entry: Dict[str, str], deals: List[Self]) -> None:
138
+ """Helper method to process each RSS entry and add valid deals."""
139
+ try:
140
+ deal = ScrapedDeal(entry)
141
+ deals.append(deal)
142
+ except Exception as e:
143
+ console.print(
144
+ f"[bold yellow]WARN[/] Skipped deal "
145
+ f"'{entry.get('title', 'Unknown')}' due to error: {e}"
146
+ )
src/deals/structured_deals.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Defines post-GPT deal classes.
2
+
3
+ Used for further analysis as structured opportunities.
4
+ """
5
+
6
+ from typing import List, Optional
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class Opportunity(BaseModel):
12
+ """A single opportunity (final deal) after GPT response."""
13
+
14
+ product_description: str # Full description of the product
15
+ price: float # Listed price from the deal feed
16
+ url: str # Link to the product
17
+ estimate: Optional[float] = None # predicted price
18
+ discount: Optional[float] = None # estimate - price
19
+
20
+
21
+ class OpportunitiesCollection(BaseModel):
22
+ """A list of top opportunities selected by GPT."""
23
+
24
+ opportunities: List[Opportunity] # High-quality final deals
src/modal_services/__init__.py ADDED
File without changes
src/modal_services/app_config.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Defines shared Modal configuration.
2
+
3
+ Includes constants, image, volume, secrets, and app setup.
4
+ """
5
+
6
+ from modal import App, Image, Secret, Volume
7
+
8
+ from src.config.constants import APP_NAME
9
+
10
+ CACHE_PATH = "/cache"
11
+ GPU = "T4"
12
+
13
+ # Modal image, volume, and secrets
14
+ image = (
15
+ Image.debian_slim()
16
+ .pip_install(
17
+ "huggingface",
18
+ "torch",
19
+ "transformers",
20
+ "bitsandbytes",
21
+ "accelerate",
22
+ "peft",
23
+ "sentence-transformers",
24
+ "xgboost",
25
+ "joblib",
26
+ "chromadb",
27
+ "openai",
28
+ "numpy",
29
+ "pandas",
30
+ )
31
+ .env({"HF_HUB_CACHE": CACHE_PATH})
32
+ .add_local_file(local_path="pyproject.toml", remote_path="/root/pyproject.toml")
33
+ )
34
+
35
+ # Modal setup
36
+ app = App(APP_NAME, image=image)
37
+
38
+ cache_vol = Volume.from_name("hf-hub-cache", create_if_missing=True)
39
+ secrets = [Secret.from_name("HF_TOKEN"), Secret.from_name("OPENAI_API_KEY")]
40
+
41
+ # Shared Modal class config for all model agents
42
+ modal_class_kwargs = dict(
43
+ image=image,
44
+ secrets=secrets,
45
+ volumes={CACHE_PATH: cache_vol}, # Mount volume into /cache
46
+ gpu=GPU,
47
+ timeout=1800, # 30-minute max runtime
48
+ min_containers=0, # 1 = always-on, uses credits
49
+ scaledown_window=180, # Shuts down the container
50
+ )
src/modal_services/e5_model_base.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Base class for E5 model handling.
2
+
3
+ Downloads, caches, and loads the model for reuse.
4
+ """
5
+
6
+ import logging
7
+ import os
8
+
9
+ # Import CACHE_PATH from your config
10
+ from src.modal_services.app_config import CACHE_PATH
11
+
12
+ # Define the model directory using the imported CACHE_PATH
13
+ E5_MODEL_DIR = f"{CACHE_PATH}/e5_model"
14
+
15
+
16
+ class E5ModelBase:
17
+ """Base class for downloading and loading the E5 model."""
18
+
19
+ def setup_e5_model(self) -> None:
20
+ """Downloads and loads the E5 embedding model."""
21
+ try:
22
+ # Lazy imports to avoid issues in Docker
23
+ from huggingface_hub import snapshot_download
24
+ from sentence_transformers import SentenceTransformer
25
+
26
+ # Cache E5 embedding model into /cache/e5_model
27
+ os.makedirs(E5_MODEL_DIR, exist_ok=True)
28
+ if not os.listdir(E5_MODEL_DIR):
29
+ snapshot_download("intfloat/e5-small-v2", local_dir=E5_MODEL_DIR)
30
+ logging.info("E5 model downloaded.")
31
+
32
+ self.vectorizer = SentenceTransformer(E5_MODEL_DIR, device="cuda")
33
+ logging.info("E5 model loaded on GPU.")
34
+
35
+ except Exception as e:
36
+ logging.error(f"[E5ModelBase] Failed to setup E5 model: {e}")
37
+ raise RuntimeError("[E5ModelBase] E5 model setup failed.") from e
src/modal_services/ensemble_pricer.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Remote Modal service for price prediction.
2
+
3
+ Combines outputs from multiple agents using an ensemble model.
4
+ """
5
+
6
+ # Standard library imports
7
+ import logging
8
+
9
+ import modal
10
+
11
+ # Third-party imports
12
+ # Local imports
13
+ from src.modal_services.app_config import (
14
+ CACHE_PATH,
15
+ app,
16
+ modal_class_kwargs,
17
+ )
18
+
19
+ # Configure logging after all imports
20
+ logging.basicConfig(level=logging.INFO)
21
+
22
+ REPO_ID = "lisekarimi/smart-deal-finder-models"
23
+
24
+ # Local paths inside Modal volume
25
+ ENSEMBLE_MODEL_DIR = f"{CACHE_PATH}/ensemble_model"
26
+ ENSEMBLE_MODEL_FILENAME = "ensemble_model.pkl"
27
+
28
+
29
+ @app.cls(**modal_class_kwargs)
30
+ class EnsemblePricer:
31
+ """Modal class for ensemble price prediction from agent outputs."""
32
+
33
+ @modal.enter()
34
+ def setup(self) -> None:
35
+ """Loads ensemble model from Hugging Face into Modal cache."""
36
+ try:
37
+ # Lazy load hf_hub_download and joblib
38
+ import joblib
39
+ from huggingface_hub import hf_hub_download
40
+
41
+ logging.info("Downloading Ensemble model...")
42
+ model_path = hf_hub_download(
43
+ repo_id=REPO_ID,
44
+ filename=ENSEMBLE_MODEL_FILENAME,
45
+ cache_dir=ENSEMBLE_MODEL_DIR,
46
+ )
47
+
48
+ logging.info("Ensemble model downloaded.")
49
+ self.model = joblib.load(model_path)
50
+ logging.info("Ensemble model loaded successfully.")
51
+
52
+ except Exception as e:
53
+ logging.error(f"[EnsemblePricer] Failed during setup: {e}")
54
+ raise RuntimeError("[EnsemblePricer] Setup failed.") from e
55
+
56
+ @modal.method()
57
+ def price(self, ft: float, rag: float, xgb: float) -> float:
58
+ """Predicts final price using ensemble of 3 models."""
59
+ try:
60
+ # Lazy load pandas and numpy for feature creation
61
+ import numpy as np
62
+ import pandas as pd
63
+
64
+ features = pd.DataFrame(
65
+ {
66
+ "FT_LLaMA": [ft],
67
+ "GPT4oMini": [rag],
68
+ "XGBoost": [xgb],
69
+ "Max": [max(ft, rag, xgb)],
70
+ "Mean": [np.mean([ft, rag, xgb])],
71
+ }
72
+ )
73
+ prediction = self.model.predict(features)[0]
74
+ return round(float(prediction), 2)
75
+ except Exception as e:
76
+ logging.error(f"[EnsemblePricer] Prediction failed: {e}")
77
+ return 0.0
src/modal_services/entry.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entry point for creating Modal classes."""
2
+
3
+ import os
4
+
5
+ import modal
6
+ from dotenv import load_dotenv
7
+
8
+ from src.modal_services.app_config import app
9
+ from src.modal_services.ensemble_pricer import EnsemblePricer
10
+ from src.modal_services.ft_pricer import FTPricer
11
+ from src.modal_services.rag_pricer import RAGPricer
12
+ from src.modal_services.xgb_pricer import XGBPricer
13
+
14
+ # Load environment variables after imports
15
+ load_dotenv()
16
+
17
+ MODAL_TOKEN_ID = os.getenv("MODAL_TOKEN_ID")
18
+ MODAL_TOKEN_SECRET = os.getenv("MODAL_TOKEN_SECRET")
19
+
20
+ if not MODAL_TOKEN_ID or not MODAL_TOKEN_SECRET:
21
+ raise ValueError("❌ Missing Modal tokens!")
22
+
23
+ # These imports are required for Modal class registration
24
+ __all__ = ["FTPricer", "XGBPricer", "RAGPricer", "EnsemblePricer", "app", "modal"]
src/modal_services/ft_pricer.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Defines Pricer agent using fine-tuned LLaMA on Modal."""
2
+
3
+ import logging
4
+ import os
5
+ from typing import Any
6
+
7
+ import modal
8
+
9
+ from src.modal_services.app_config import CACHE_PATH, app, modal_class_kwargs
10
+ from src.utils.text_utils import extract_tagged_price
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+ # Model identifiers
15
+ BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
16
+ FINETUNED_MODEL = "ed-donner/pricer-2024-09-13_13.04.39"
17
+ REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
18
+
19
+ # Local model paths in volume
20
+ BASE_MODEL_DIR = f"{CACHE_PATH}/llama_base_model"
21
+ FINETUNED_MODEL_DIR = f"{CACHE_PATH}/llama_finetuned_model"
22
+
23
+ QUESTION = "How much does this cost to the nearest dollar?"
24
+ PREFIX = "Price is $"
25
+
26
+
27
+ @app.cls(**modal_class_kwargs)
28
+ class FTPricer:
29
+ """Remote pricing with LLaMA, PEFT, and 4-bit quantization."""
30
+
31
+ @staticmethod
32
+ def _build_prompt(description: str) -> str:
33
+ return f"{QUESTION}\n\n{description}\n\n{PREFIX}"
34
+
35
+ @staticmethod
36
+ def _generate_output(
37
+ model: Any, # noqa: ANN401
38
+ inputs: dict,
39
+ tokenizer: Any, # noqa: ANN401
40
+ ) -> str:
41
+ """Generate output from model."""
42
+ import torch
43
+
44
+ with torch.no_grad():
45
+ outputs = model.generate(**inputs, max_new_tokens=5, num_return_sequences=1)
46
+ return tokenizer.decode(outputs[0])
47
+
48
+ @staticmethod
49
+ def _download_models() -> None:
50
+ from huggingface_hub import snapshot_download
51
+
52
+ snapshot_download(BASE_MODEL, local_dir=BASE_MODEL_DIR)
53
+ snapshot_download(
54
+ FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_MODEL_DIR
55
+ )
56
+
57
+ def _load_tokenizer(self) -> None:
58
+ from transformers import AutoTokenizer
59
+
60
+ self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_DIR)
61
+ self.tokenizer.pad_token = self.tokenizer.eos_token
62
+ self.tokenizer.padding_side = "right"
63
+ logging.info("Tokenizer loaded.")
64
+
65
+ def _load_models(self) -> None:
66
+ import torch
67
+ from peft import PeftModel
68
+ from transformers import AutoModelForCausalLM, BitsAndBytesConfig
69
+
70
+ base_model = AutoModelForCausalLM.from_pretrained(
71
+ BASE_MODEL_DIR,
72
+ quantization_config=BitsAndBytesConfig(
73
+ load_in_4bit=True,
74
+ bnb_4bit_use_double_quant=True,
75
+ bnb_4bit_compute_dtype=torch.bfloat16,
76
+ bnb_4bit_quant_type="nf4",
77
+ ),
78
+ device_map="auto",
79
+ )
80
+
81
+ self.fine_tuned_model = PeftModel.from_pretrained(
82
+ base_model, FINETUNED_MODEL_DIR, revision=REVISION
83
+ )
84
+ self.fine_tuned_model.eval()
85
+ gen_config = self.fine_tuned_model.generation_config
86
+ gen_config.pad_token_id = self.tokenizer.pad_token_id
87
+ gen_config.eos_token_id = self.tokenizer.eos_token_id
88
+ logging.info("Models loaded.")
89
+
90
+ @modal.enter()
91
+ def setup(self) -> None:
92
+ """Load base and fine-tuned models with tokenizer and quantization."""
93
+ try:
94
+ os.makedirs(CACHE_PATH, exist_ok=True)
95
+ self._download_models()
96
+ logging.info("Base and fine-tuned models downloaded.")
97
+ self._load_tokenizer()
98
+ self._load_models()
99
+ except Exception as e:
100
+ logging.error(f"[FTPricer] Setup failed: {e}")
101
+ raise RuntimeError("[FTPricer] Model setup failed") from e
102
+
103
+ @modal.method()
104
+ def price(self, description: str) -> float:
105
+ """Generate a price estimate based on a product description."""
106
+ from transformers import set_seed
107
+
108
+ try:
109
+ set_seed(42)
110
+ logging.info("[FTPricer] Generating price...")
111
+
112
+ prompt = self._build_prompt(description)
113
+ inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to(
114
+ "cuda"
115
+ )
116
+ result = self._generate_output(
117
+ self.fine_tuned_model, inputs, self.tokenizer
118
+ )
119
+ price = extract_tagged_price(result)
120
+
121
+ logging.info(f"[FTPricer] Predicted price: {price}")
122
+ return price
123
+
124
+ except Exception as e:
125
+ logging.error(f"[FTPricer] Prediction failed: {e}")
126
+ return 0.0
src/modal_services/rag_pricer.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Predicts item prices using RAG (Retrieval Augmented Generation).
2
+
3
+ With ChromaDB, E5 embeddings, and GPT-4o-mini.
4
+ """
5
+
6
+ # Standard library imports
7
+ import logging
8
+ import os
9
+ import zipfile
10
+
11
+ import modal
12
+
13
+ # Third-party imports
14
+ import numpy as np
15
+ import requests
16
+
17
+ # Local imports
18
+ from src.modal_services.app_config import CACHE_PATH, app, modal_class_kwargs
19
+ from src.modal_services.e5_model_base import E5ModelBase
20
+ from src.models.frontier_model import OPENAI_MODEL
21
+ from src.utils.text_utils import extract_price
22
+
23
+ # Configure logging after all imports
24
+ logging.basicConfig(level=logging.INFO)
25
+
26
+ # Paths
27
+ E5_MODEL_DIR = f"{CACHE_PATH}/e5_model"
28
+ CHROMA_DIR = f"{CACHE_PATH}/chroma"
29
+ CHROMA_ZIP_URL = "https://aiprojects-lise-karimi.s3.eu-west-3.amazonaws.com/smart-deal-finder/chroma.zip"
30
+ COLLECTION_NAME = "price_items"
31
+
32
+
33
+ @app.cls(**modal_class_kwargs)
34
+ class RAGPricer(E5ModelBase):
35
+ """Remote class for pricing products using RAG pipeline."""
36
+
37
+ @modal.enter()
38
+ def setup(self) -> None:
39
+ """Load E5 embedding model, ChromaDB and OpenAI client."""
40
+ try:
41
+ # Lazy load the required modules
42
+ import chromadb
43
+
44
+ # Setup E5 model using the base class method
45
+ self.setup_e5_model()
46
+
47
+ # ChromaDB setup remains the same
48
+ if not os.path.exists(CHROMA_DIR):
49
+ os.makedirs(CHROMA_DIR, exist_ok=True)
50
+ r = requests.get(CHROMA_ZIP_URL)
51
+ with open("/tmp/chroma.zip", "wb") as f:
52
+ f.write(r.content)
53
+ with zipfile.ZipFile("/tmp/chroma.zip", "r") as zip_ref:
54
+ zip_ref.extractall(CHROMA_DIR)
55
+ logging.info("ChromaDB ready.")
56
+
57
+ self.chroma_client = chromadb.PersistentClient(path=CHROMA_DIR)
58
+ self.collection = self.chroma_client.get_collection(name=COLLECTION_NAME)
59
+ logging.info("ChromaDB client ready.")
60
+
61
+ except Exception as e:
62
+ logging.error(f"[RAGPricer] Failed during setup: {e}")
63
+ raise RuntimeError("[RAGPricer] Setup failed.") from e
64
+
65
+ def _get_embedding(self, item: str) -> np.ndarray:
66
+ """Encodes the item description into embeddings using the E5 model."""
67
+ return self.vectorizer.encode(["passage: " + item], normalize_embeddings=True)
68
+
69
+ def _find_similar_items(self, item: str) -> tuple[list[str], list[float]]:
70
+ """Finds similar items from ChromaDB based on embeddings."""
71
+ query_emb = self._get_embedding(item).astype(float).tolist()
72
+ results = self.collection.query(query_embeddings=query_emb, n_results=5)
73
+ documents = results["documents"][0][:]
74
+ prices = [m["price"] for m in results["metadatas"][0][:]]
75
+
76
+ # Log similar items and their prices
77
+ for doc, price in zip(documents, prices):
78
+ logging.info(f"[RAGPricer] Similar item: '{doc}' | Price: ${price:.2f}")
79
+
80
+ return documents, prices
81
+
82
+ def _format_context(self, similars: list[str], prices: list[float]) -> str:
83
+ """Formats the context for the RAG pipeline."""
84
+ message = "To provide some context, here are some other items "
85
+ message += "that might be similar to the item you need to estimate.\n\n"
86
+
87
+ for similar, price in zip(similars, prices):
88
+ message += (
89
+ f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
90
+ )
91
+
92
+ return message
93
+
94
+ def _build_messages(
95
+ self, item: dict, similars: list[str], prices: list[float]
96
+ ) -> list[dict[str, str]]:
97
+ """Builds messages for the GPT-4o-mini model to predict the price."""
98
+ system_message = (
99
+ "You are a pricing expert. "
100
+ "Given a product description and a few similar products with their prices, "
101
+ "you must estimate the most likely price for the given product. "
102
+ "Always respond ONLY with a number, no words or explanation."
103
+ )
104
+ context = self._format_context(similars, prices)
105
+ user_prompt = (
106
+ "Estimate the price for the following product:\n\n"
107
+ + item["description"]
108
+ + "\n\n"
109
+ + context
110
+ )
111
+
112
+ return [
113
+ {"role": "system", "content": system_message},
114
+ {"role": "user", "content": user_prompt},
115
+ {"role": "assistant", "content": "Price is $"},
116
+ ]
117
+
118
+ @modal.method()
119
+ def price(self, description: str) -> float:
120
+ """Predicts price from description using RAG and Frontier."""
121
+ try:
122
+ logging.info("[RAGPricer] Searching similar items...")
123
+ documents, prices = self._find_similar_items(description)
124
+ messages = self._build_messages(
125
+ {"description": description}, documents, prices
126
+ )
127
+
128
+ # Lazy import OpenAI API
129
+ import openai
130
+
131
+ response = openai.chat.completions.create(
132
+ model=OPENAI_MODEL, messages=messages, seed=42, max_tokens=5
133
+ )
134
+ reply = response.choices[0].message.content
135
+ price = extract_price(reply)
136
+
137
+ logging.info(f"[RAGPricer] Predicted price: {price}")
138
+ return price
139
+ except Exception as e:
140
+ logging.error(f"[RAGPricer] Failed to predict price: {e}")
141
+ return 0.0
src/modal_services/xgb_pricer.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Predicts prices using XGBoost and E5 embeddings."""
2
+
3
+ import logging
4
+
5
+ import modal
6
+
7
+ from src.modal_services.app_config import (
8
+ CACHE_PATH,
9
+ app,
10
+ modal_class_kwargs,
11
+ )
12
+ from src.modal_services.e5_model_base import E5ModelBase
13
+
14
+ REPO_ID = "lisekarimi/smart-deal-finder-models"
15
+
16
+ # Local paths inside Modal volume
17
+ E5_MODEL_DIR = f"{CACHE_PATH}/e5_model"
18
+ XGB_MODEL_DIR = f"{CACHE_PATH}/xgb_model"
19
+ XGB_MODEL_FILENAME = "xgboost_model.pkl"
20
+
21
+
22
+ @app.cls(**modal_class_kwargs)
23
+ class XGBPricer(E5ModelBase):
24
+ """Remote pricing via E5 and XGBoost."""
25
+
26
+ @modal.enter()
27
+ def setup(self) -> None:
28
+ """Loads E5 and XGBoost into Modal cache."""
29
+ try:
30
+ # Setup E5 model using the base class method
31
+ self.setup_e5_model()
32
+
33
+ # Lazy load XGBoost model and download it inside the setup method
34
+ import joblib
35
+ from huggingface_hub import hf_hub_download
36
+
37
+ logging.info("Downloading XGBoost model...")
38
+ model_path = hf_hub_download(
39
+ repo_id=REPO_ID, filename=XGB_MODEL_FILENAME, cache_dir=XGB_MODEL_DIR
40
+ )
41
+
42
+ logging.info("XGBoost model downloaded.")
43
+ self.model = joblib.load(model_path)
44
+ logging.info("XGBoost model loaded.")
45
+
46
+ except Exception as e:
47
+ logging.error(f"[XGBPricer] Failed during setup: {e}")
48
+ raise RuntimeError("[XGBPricer] Setup failed.") from e
49
+
50
+ @modal.method()
51
+ def price(self, description: str) -> float:
52
+ """Predict price from product description using E5 + XGBoost."""
53
+ try:
54
+ logging.info("[XGBPricer] Encoding description...")
55
+ vector = self.vectorizer.encode(["passage: " + description])
56
+ pred = self.model.predict(vector)[0]
57
+ logging.info(f"[XGBPricer] Predicted price: {pred}")
58
+ return round(float(max(0, pred)), 2)
59
+ except Exception as e:
60
+ logging.error(f"[XGBPricer] Failed to predict price: {e}")
61
+ return 0.0
src/models/__init__.py ADDED
File without changes
src/models/frontier_model.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Initializes access to frontier AI models using environment variables."""
2
+
3
+ import os
4
+
5
+ from dotenv import load_dotenv
6
+ from openai import OpenAI
7
+
8
+ load_dotenv(override=True)
9
+
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
11
+
12
+ if not OPENAI_API_KEY:
13
+ raise ValueError("❌ OpenAI API Key is missing!")
14
+
15
+ openai = OpenAI(api_key=OPENAI_API_KEY)
16
+ OPENAI_MODEL = "gpt-4o-mini"
src/ui/assets/styles.css ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ==== Global Reset & Layout ==== */
2
+ html, body, #app, body > div, .gradio-container {
3
+ background-color: #1f2937 !important;
4
+ margin: 0;
5
+ padding: 0;
6
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
7
+ display: flex;
8
+ justify-content: center;
9
+ }
10
+
11
+ /* ==== Main Container ==== */
12
+ #app-container {
13
+ background-color: #0e2d53 !important;
14
+ margin: 0 auto;
15
+ padding: 20px;
16
+ border-radius: 50px;
17
+ box-shadow: 0 4px 30px 5px rgba(0, 0, 0, 0.7);
18
+ border: 1px solid #1f2f46;
19
+ max-width: 1000px;
20
+ width: 100%;
21
+ color: white;
22
+ margin-bottom: 10px !important;
23
+ }
24
+
25
+ #app-container > *:last-child {
26
+ margin-bottom: 0 !important;
27
+ }
28
+
29
+ #app-container h4,
30
+ #app-container p,
31
+ #app-container ol,
32
+ #app-container li,
33
+ #app-container strong {
34
+ font-size: 16px;
35
+ line-height: 1.6;
36
+ color: white !important;
37
+ }
38
+
39
+ .gradio-container {
40
+ margin-bottom: 0 !important;
41
+ padding-bottom: 0 !important;
42
+ }
43
+
44
+ .version-banner {
45
+ margin-bottom: 0 !important;
46
+ padding-bottom: 0 !important;
47
+ margin-top: 10px;
48
+ }
49
+
50
+ /* ==== Titles ==== */
51
+ #app-title {
52
+ font-size: 40px;
53
+ font-weight: 800;
54
+ text-align: center;
55
+ margin-bottom: 6px;
56
+ background: linear-gradient(135deg, #FFA500, #FF4500);
57
+ -webkit-background-clip: text;
58
+ background-clip: text;
59
+ color: transparent;
60
+ }
61
+
62
+ #app-title::before {
63
+ filter: brightness(0.95);
64
+ }
65
+
66
+ #app-subtitle {
67
+ font-size: 24px;
68
+ font-weight: 600;
69
+ text-align: center;
70
+ margin-top: 0;
71
+ background: linear-gradient(135deg, #FFA500, #FF4500);
72
+ -webkit-background-clip: text;
73
+ background-clip: text;
74
+ color: transparent;
75
+ }
76
+
77
+ /* ==== Intro Text ==== */
78
+ #intro-text {
79
+ font-size: 16px;
80
+ color: white !important;
81
+ margin-top: 20px;
82
+ line-height: 1.6;
83
+ }
84
+
85
+ h4 ~ p {
86
+ padding-left: 20px;
87
+ }
88
+
89
+ .custom-links a {
90
+ color: #ffa500;
91
+ text-decoration: none;
92
+ }
93
+
94
+ .custom-links a:hover {
95
+ text-decoration: underline;
96
+ }
97
+
98
+ /* ==== Category Selector ==== */
99
+ #category-selector span {
100
+ font-size: 16px;
101
+ color: #ffa500;
102
+ display: flex;
103
+ align-items: center;
104
+ gap: 6px;
105
+ }
106
+
107
+ #category-selector {
108
+ background-color: #111827;
109
+ border-radius: 8px;
110
+ padding: 10px;
111
+ }
112
+ #category-selector .wrap {
113
+ background-color: #111827;
114
+ border: 1px solid #26313f;
115
+ }
116
+
117
+ .token {
118
+ background-color: #3f3f46 !important;
119
+ }
120
+
121
+ .wrap .options li.item {
122
+ background-color: #1f2937;
123
+ color: white;
124
+ padding: 8px;
125
+ }
126
+
127
+ .wrap .options li.item:hover {
128
+ background-color: #4b5563;
129
+ }
130
+
131
+ /* ==== Buttons ==== */
132
+ #run-btn {
133
+ background: linear-gradient(135deg, #FFA500, #FF4500);
134
+ color: #0a0f1a !important;
135
+ font-weight: bold;
136
+ border: none;
137
+ padding: 10px 20px;
138
+ border-radius: 8px;
139
+ cursor: pointer;
140
+ transition: filter 0.3s ease;
141
+ }
142
+
143
+ #run-btn:hover {
144
+ filter: brightness(1.1);
145
+ }
146
+
147
+ .btn-disabled {
148
+ opacity: 0.5;
149
+ pointer-events: none;
150
+ filter: grayscale(1);
151
+ }
152
+
153
+ /* ==== Status Message / Demo Notice ==== */
154
+ .html-container .prose {
155
+ font-size: 16px !important;
156
+ color: #e2e8f0;
157
+ font-weight: 500;
158
+ }
159
+
160
+ /* ==== Logs ==== */
161
+ #logs-label {
162
+ font-weight: bold;
163
+ color: #ffa500;
164
+ font-size: 16px;
165
+ }
166
+
167
+ #scrollContent {
168
+ height: 400px;
169
+ overflow-y: auto;
170
+ display: flex;
171
+ flex-direction: column-reverse;
172
+ border: 1px solid #444;
173
+ background-color: #111827;
174
+ font-family: monospace;
175
+ font-size: 16px;
176
+ color: #fefefe;
177
+ padding: 10px;
178
+ scroll-behavior: smooth;
179
+ }
180
+
181
+ .log-entry {
182
+ line-height: 1.5;
183
+ margin-bottom: 2px;
184
+ white-space: pre-wrap;
185
+ }
186
+
187
+
188
+ /* ==== Table ==== */
189
+ #deals-label {
190
+ font-weight: bold;
191
+ color: #ffa500;
192
+ margin-bottom: 6px;
193
+ font-size: 16px;
194
+ }
195
+
196
+ #deal-table {
197
+ overflow-x: auto;
198
+ }
199
+
200
+ #deal-table table {
201
+ width: 100% !important;
202
+ box-sizing: border-box;
203
+ border-collapse: collapse;
204
+ table-layout: fixed;
205
+ min-width: 700px;
206
+ }
207
+
208
+ #deal-table table th,
209
+ #deal-table table td {
210
+ padding: 10px;
211
+ text-align: left;
212
+ overflow-wrap: break-word;
213
+ word-wrap: break-word;
214
+ word-break: break-word;
215
+ }
216
+
217
+ #deal-table table th {
218
+ background-color: #1f2937 !important;
219
+ color: #f4a261 !important;
220
+ font-weight: bold;
221
+ border: 1px solid #334155;
222
+ }
223
+
224
+ #deal-table table td {
225
+ background-color: #1f2937 !important;
226
+ color: white !important;
227
+ border: 1px solid #334155;
228
+ }
229
+
230
+ #deal-table table th:first-child,
231
+ #deal-table table td:first-child {
232
+ width: 40% !important;
233
+ }
234
+
235
+ /* ==== Version ==== */
236
+ .version-banner {
237
+ text-align: center;
238
+ font-size: 0.9em;
239
+ }
240
+
241
+
242
+
src/ui/formatting.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Formatting utilities for the Gradio UI."""
2
+
3
+ from typing import List
4
+
5
+ from src.config.constants import MAX_LOG_LINES
6
+
7
+
8
+ def html_for(log_data: list[str]) -> str:
9
+ """Generate HTML for displaying the log lines in a scrollable container."""
10
+ logs = reversed(log_data[-MAX_LOG_LINES:])
11
+ output = "".join(f"<div class='log-entry'>{line}</div>" for line in logs)
12
+ return (
13
+ "<div id='logs-label'>📜 Live Agent Logs</div>"
14
+ "<div id='scrollContent'>" + output + "</div>"
15
+ )
16
+
17
+
18
+ def format_deals_table(deals: List[List[str]]) -> str:
19
+ """Formats accepted deals as an HTML table with styled links."""
20
+ html = """
21
+ <div id="deal-table">
22
+ <div id="deals-label">🛍️ Best Deals Found</div>
23
+ <table>
24
+ <thead>
25
+ <tr>
26
+ <th>Description</th>
27
+ <th>Price</th>
28
+ <th>AI Estimate</th>
29
+ <th>Discount</th>
30
+ <th>URL</th>
31
+ </tr>
32
+ </thead>
33
+ <tbody>
34
+ """
35
+ for desc, price, estimate, discount, url in deals:
36
+ html += f"""
37
+ <tr>
38
+ <td>{desc}</td>
39
+ <td>{price}</td>
40
+ <td>{estimate}</td>
41
+ <td>{discount}</td>
42
+ <td><a href="{url}" target="_blank">Link</a></td>
43
+ </tr>
44
+ """
45
+ html += """
46
+ </tbody>
47
+ </table>
48
+ </div>
49
+ """
50
+ return html
51
+
52
+
53
+ def get_server_timezone() -> str:
54
+ """Get the server's timezone information."""
55
+ import datetime
56
+
57
+ current_time = datetime.datetime.now()
58
+ try:
59
+ timezone_name = current_time.astimezone().tzinfo.tzname(current_time)
60
+ except AttributeError:
61
+ timezone_name = "Unknown"
62
+
63
+ return (
64
+ f"Server Time: {current_time.strftime('%Y-%m-%d %H:%M:%S')} "
65
+ f"(Timezone: {timezone_name})"
66
+ )
src/ui/gradio_app.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Launches a Gradio app that runs an AI planning agent to find smart online deals.
2
+
3
+ Uses Python's built-in logging system for consistent logs.
4
+ Includes demo mode restrictions: MAX_DEMO_RUNS_PER_DAY runs per day.
5
+ """
6
+
7
+ import os
8
+ from typing import Tuple
9
+
10
+ import gradio as gr
11
+
12
+ from src.agents.pipeline import run_and_stream_logs
13
+ from src.config.constants import (
14
+ IS_DEMO_VERSION,
15
+ MAX_CATEGORY_SELECTION,
16
+ MAX_DEMO_RUNS_PER_DAY,
17
+ PROJECT_NAME,
18
+ VERSION,
19
+ )
20
+ from src.config.feeds import CATEGORY_FEEDS
21
+ from src.ui.formatting import format_deals_table
22
+ from src.utils.logger import console
23
+ from src.utils.state_manager import can_run_app
24
+
25
+ PROJECT_NAME_CAP = PROJECT_NAME.capitalize()
26
+ REPO_URL = f"https://github.com/lisekarimi/{PROJECT_NAME}"
27
+ DOC_URL = f"https://lisekarimi.github.io/{PROJECT_NAME}"
28
+
29
+
30
+ def build_ui() -> gr.Blocks:
31
+ """Constructs and returns the Gradio UI interface, with error handling."""
32
+ try:
33
+ with open(
34
+ os.path.join(os.path.dirname(__file__), "assets", "styles.css"), "r"
35
+ ) as f:
36
+ css = f.read()
37
+ except Exception as e:
38
+ css = ""
39
+ console.print(f"[bold yellow]⚠️ Failed to load CSS:[/] {e}")
40
+
41
+ # Set initial UI state here
42
+ disable_btn = False
43
+ initial_status = "Loading demo status..."
44
+
45
+ # Building the UI
46
+ try:
47
+ with gr.Blocks(css=css, title=f"🏷️{PROJECT_NAME_CAP}") as ui:
48
+ with gr.Column(elem_id="app-container"):
49
+ # 🔝 Top content
50
+ gr.Markdown(f"<h1 id='app-title'>🏷️ {PROJECT_NAME_CAP} </h1>")
51
+ gr.Markdown(
52
+ "<h2 id='app-subtitle'>Autonomous AI Agents Snapping the Best "
53
+ "Deals Online</h2>"
54
+ )
55
+ gr.HTML(
56
+ f"""
57
+ <div id="intro-text">
58
+ <p>🏷️ <strong>Snapr</strong>,
59
+ <strong>an Agentic AI System</strong>,
60
+ discovers the best online deals for you—smart,
61
+ simple, and automatic.
62
+ Let <strong>AI</strong> do the work, so you can save money
63
+ without any effort.</p>
64
+
65
+
66
+ <h4>🤖 How It Works:</h4>
67
+ <p>1️⃣ Choose up to {MAX_CATEGORY_SELECTION}
68
+ categories to search.</p>
69
+ <p>2️⃣ Click "Find Smart Deals" — AI scans, estimates prices,
70
+ and filters top discounts.</p>
71
+ <p>3️⃣ See the best deals in a table with prices, discounts,
72
+ and direct links.</p>
73
+ </div>
74
+ """
75
+ )
76
+ if IS_DEMO_VERSION:
77
+ gr.Markdown(
78
+ f"""
79
+ <p>⚠️ This is a demo version — limited to
80
+ {MAX_DEMO_RUNS_PER_DAY} global runs per day for all users.</p>
81
+ """
82
+ )
83
+
84
+ gr.Markdown(
85
+ f"""
86
+ <p class="custom-links">📦 Want more?
87
+ You can <a href="{REPO_URL}"
88
+ target="_blank">run it locally</a>
89
+ with full <a href="{DOC_URL}/technical/localdev/"
90
+ target="_blank">instructions</a> and
91
+ <a href="{DOC_URL}"
92
+ target="_blank">documentation</a>.</p>
93
+ """
94
+ )
95
+
96
+ with gr.Column(elem_id="left-col"):
97
+ category_selector = gr.Dropdown(
98
+ choices=list(CATEGORY_FEEDS.keys()),
99
+ value="Electronics",
100
+ multiselect=True,
101
+ label=(
102
+ f"🧭 Select up to {MAX_CATEGORY_SELECTION} Deal Categories"
103
+ ),
104
+ elem_id="category-selector",
105
+ elem_classes="custom-dropdown",
106
+ )
107
+ run_btn = gr.Button(
108
+ "🔍 Find Smart Deals",
109
+ elem_id="run-btn",
110
+ variant="primary",
111
+ elem_classes="run-button",
112
+ interactive=not disable_btn,
113
+ )
114
+
115
+ status_msg = gr.HTML(value=initial_status, elem_id="status-message")
116
+
117
+ # Logs + deals
118
+ logs_output = gr.HTML(
119
+ value="""
120
+ <div id="logs-label">📜 Live Agent Logs</div>
121
+ <div id="scrollContent">
122
+ 🕵️‍♀️ Click "🔍 Find Smart Deals" to wake the agents and
123
+ stream logs here!
124
+ </div>
125
+ """
126
+ )
127
+
128
+ deals_output = gr.HTML(
129
+ value=format_deals_table([]), elem_id="deal-table"
130
+ )
131
+
132
+ # Connect button
133
+ run_btn.click(
134
+ fn=run_and_stream_logs,
135
+ inputs=[category_selector],
136
+ outputs=[logs_output, deals_output, run_btn, status_msg],
137
+ )
138
+
139
+ # Status update on load
140
+ @ui.load(outputs=[status_msg, run_btn])
141
+ def update_status_on_load() -> Tuple[str, gr.update]:
142
+ """Sets demo status and button state on UI load."""
143
+ can_run, status = can_run_app()
144
+ btn_state = gr.update(
145
+ interactive=can_run,
146
+ elem_classes=(
147
+ ["run-button", "btn-disabled"]
148
+ if not can_run
149
+ else ["run-button"]
150
+ ),
151
+ )
152
+ return status, btn_state
153
+
154
+ # Bottom: version info
155
+ gr.Markdown(
156
+ f"""
157
+ <p class="version-banner">
158
+ 🔖 <strong>
159
+ <a href="{DOC_URL}/changelog"
160
+ target="_blank">Version {VERSION}</a>
161
+ </strong>
162
+ </p>
163
+ """
164
+ )
165
+
166
+ return ui
167
+
168
+ except Exception as e:
169
+ console.print(f"[bold red]❌ Failed to build UI:[/] {e}")
170
+ raise
src/utils/cleanup.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deletes a file if it's older than a specified number of days."""
2
+
3
+ import os
4
+ import time
5
+
6
+ from src.config.constants import MEMORY_EXPIRATION_DAYS
7
+
8
+
9
+ def delete_if_old(path: str, max_age_days: int = MEMORY_EXPIRATION_DAYS) -> None:
10
+ """Deletes file if older than max_age_days."""
11
+ if os.path.exists(path):
12
+ age = time.time() - os.path.getmtime(path)
13
+ if age > max_age_days * 86400:
14
+ os.remove(path)
src/utils/file_io.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for safely handling JSON file operations."""
2
+
3
+ import json
4
+ import os
5
+ from typing import Any, Dict
6
+
7
+ from src.utils.logger import console
8
+
9
+
10
+ def write_json(path: str, data: Dict[str, Any]) -> None:
11
+ """Writes JSON to file, ensuring parent folder exists..
12
+
13
+ Args:
14
+ path (str): Full file path to write to.
15
+ data (Dict[str, Any]): The data to write as JSON.
16
+ """
17
+ try:
18
+ os.makedirs(os.path.dirname(path), exist_ok=True)
19
+ with open(path, "w") as f:
20
+ json.dump(data, f, indent=2)
21
+ except Exception as e:
22
+ console.print(f"Error writing to {path}: {e}", style="red")
23
+
24
+
25
+ def load_json(path: str) -> Dict[str, Any]:
26
+ """Safely loads and returns JSON data from the given file path.
27
+
28
+ Args:
29
+ path (str): Full file path to read from.
30
+
31
+ Returns:
32
+ Dict[str, Any]: Parsed JSON content, or an empty dict if loading fails.
33
+ """
34
+ try:
35
+ os.makedirs(os.path.dirname(path), exist_ok=True)
36
+
37
+ if not os.path.exists(path):
38
+ console.print(
39
+ f"[yellow]File not found at {path}. Assuming first run.[/yellow]"
40
+ )
41
+ return {}
42
+
43
+ with open(path, "r") as f:
44
+ return json.load(f)
45
+
46
+ except Exception as e:
47
+ console.print(f"[red]Error reading from {path}: {e}[/red]")
48
+ return {}
src/utils/logger.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Provides a shared Rich console instance for styled logging."""
2
+
3
+ from rich.console import Console
4
+
5
+ console = Console()
src/utils/memory_utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utilities for handling deal memory and persistence."""
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import Dict, List
5
+
6
+ from src.config.constants import DEALS_FILE
7
+ from src.utils.file_io import load_json, write_json
8
+
9
+
10
+ def save_opportunities_to_memory(
11
+ new_opportunities: List[Dict], memory_path: str = DEALS_FILE
12
+ ) -> None:
13
+ """Updates opportunities with deduplication and saves to memory."""
14
+ existing_data = load_json(memory_path) or {"opportunities": []}
15
+
16
+ all_opportunities = existing_data.get("opportunities", []) + new_opportunities
17
+ unique_by_url = {op["url"]: op for op in all_opportunities}
18
+ final_list = list(unique_by_url.values())
19
+
20
+ write_json(
21
+ memory_path,
22
+ {
23
+ "opportunities": final_list,
24
+ "last_updated": datetime.now(timezone.utc).isoformat(),
25
+ },
26
+ )
src/utils/state_manager.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Handles persistent state management and access control for the application."""
2
+
3
+ import datetime
4
+ import os
5
+ import threading
6
+ from typing import Any, Dict, Tuple
7
+
8
+ from src.config.constants import IS_DEMO_VERSION, MAX_DEMO_RUNS_PER_DAY, STATE_FILE
9
+ from src.ui.formatting import get_server_timezone
10
+ from src.utils.file_io import load_json, write_json
11
+ from src.utils.logger import console
12
+
13
+ state_lock = threading.Lock()
14
+
15
+
16
+ def get_default_state() -> Dict[str, Any]:
17
+ """Returns the default initial state dictionary."""
18
+ return {"date": datetime.datetime.now().strftime("%Y-%m-%d"), "run_count": 0}
19
+
20
+
21
+ def get_state() -> Dict[str, Any]:
22
+ """Get the current state from the JSON file, initializing if needed."""
23
+ if not os.path.exists(STATE_FILE):
24
+ default_state = get_default_state()
25
+ write_json(STATE_FILE, default_state)
26
+ return default_state
27
+
28
+ try:
29
+ state = load_json(STATE_FILE)
30
+
31
+ # Reset counter if it's a new day
32
+ current_date = datetime.datetime.now().strftime("%Y-%m-%d")
33
+ if state["date"] != current_date:
34
+ state = get_default_state()
35
+ write_json(STATE_FILE, state)
36
+
37
+ return state
38
+
39
+ except Exception as e:
40
+ console.print(f"Error reading state file: {e}", style="red")
41
+ return get_default_state()
42
+
43
+
44
+ def update_state(state_updates: Dict[str, Any]) -> Dict[str, Any]:
45
+ """Update the state file with new values."""
46
+ current_state = get_state()
47
+ current_state.update(state_updates)
48
+ write_json(STATE_FILE, current_state)
49
+ return current_state
50
+
51
+
52
+ def can_run_app() -> Tuple[bool, str]:
53
+ """Check if the app can be run based on demo restrictions."""
54
+ # No restrictions if not running in demo mode
55
+ if not IS_DEMO_VERSION:
56
+ return True, ""
57
+
58
+ # Get server timezone for both cases
59
+ server_time = get_server_timezone()
60
+ server_time = f"🌐 {server_time}"
61
+
62
+ # Load current run state
63
+ state = get_state()
64
+ runs_left = MAX_DEMO_RUNS_PER_DAY - state["run_count"]
65
+
66
+ # Block if daily limit reached
67
+ if state["run_count"] >= MAX_DEMO_RUNS_PER_DAY:
68
+ return (
69
+ False,
70
+ f"⛔ Daily limit reached ({MAX_DEMO_RUNS_PER_DAY} runs per day "
71
+ f"in demo mode). Please try again tomorrow!"
72
+ f"<br>{server_time}",
73
+ )
74
+
75
+ # Otherwise, allow and return runs remaining
76
+ return (
77
+ True,
78
+ f"🕒 Demo mode: {runs_left} run"
79
+ f"{'s' if runs_left != 1 else ''} left today."
80
+ f"<br>{server_time}",
81
+ )
src/utils/text_utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for text processing."""
2
+
3
+ import re
4
+
5
+
6
+ def extract_tagged_price(output: str) -> float:
7
+ """Extracts a float price from a string based on 'Price is $' keyword."""
8
+ try:
9
+ contents = output.split("Price is $")[1].replace(",", "")
10
+ match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
11
+ return float(match.group()) if match else 0.0
12
+ except Exception:
13
+ return 0.0
14
+
15
+
16
+ def extract_price(output: str) -> float:
17
+ """Extracts a float price from a string.
18
+
19
+ If no number is found, returns 0.0.
20
+ """
21
+ try:
22
+ cleaned_text = output.replace("$", "").replace(",", "")
23
+ match = re.search(r"[-+]?\d*\.\d+|\d+", cleaned_text)
24
+ if match:
25
+ return round(float(match.group()), 2)
26
+ return 0.0
27
+ except Exception:
28
+ # Optionally log the exception or handle differently
29
+ return 0.0
uv.lock ADDED
The diff for this file is too large to render. See raw diff