cache utils
Browse files- app.py +2 -10
- modular_graph_and_candidates.py +4 -0
app.py
CHANGED
|
@@ -52,16 +52,8 @@ def _escape_srcdoc(text: str) -> str:
|
|
| 52 |
|
| 53 |
|
| 54 |
def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
| 55 |
-
#
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
if sim_method == "embedding" and embeddings_cache.exists():
|
| 59 |
-
print("🚀 Using cached embeddings - skipping repo download")
|
| 60 |
-
# Use a dummy path since we won't need the actual repo
|
| 61 |
-
repo_path = Path("/tmp/dummy")
|
| 62 |
-
else:
|
| 63 |
-
print("📥 Downloading/updating repository")
|
| 64 |
-
repo_path = clone_or_cache(repo_url)
|
| 65 |
|
| 66 |
graph = build_graph_json(
|
| 67 |
transformers_dir=repo_path,
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
| 55 |
+
# Always download repo for now - let build_graph_json decide if it needs it
|
| 56 |
+
repo_path = clone_or_cache(repo_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
graph = build_graph_json(
|
| 59 |
transformers_dir=repo_path,
|
modular_graph_and_candidates.py
CHANGED
|
@@ -317,10 +317,14 @@ def build_graph_json(
|
|
| 317 |
|
| 318 |
# Check if we can use cached embeddings only
|
| 319 |
embeddings_cache = Path("embeddings_cache.npz")
|
|
|
|
|
|
|
| 320 |
if sim_method == "embedding" and embeddings_cache.exists():
|
| 321 |
try:
|
| 322 |
# Try to compute from cache without accessing repo
|
| 323 |
cached_sims = compute_similarities_from_cache(threshold)
|
|
|
|
|
|
|
| 324 |
if cached_sims:
|
| 325 |
# Create minimal graph with cached data
|
| 326 |
cached_data = np.load(embeddings_cache, allow_pickle=True)
|
|
|
|
| 317 |
|
| 318 |
# Check if we can use cached embeddings only
|
| 319 |
embeddings_cache = Path("embeddings_cache.npz")
|
| 320 |
+
print(f"🔍 Cache file exists: {embeddings_cache.exists()}, sim_method: {sim_method}")
|
| 321 |
+
|
| 322 |
if sim_method == "embedding" and embeddings_cache.exists():
|
| 323 |
try:
|
| 324 |
# Try to compute from cache without accessing repo
|
| 325 |
cached_sims = compute_similarities_from_cache(threshold)
|
| 326 |
+
print(f"🔍 Got {len(cached_sims)} cached similarities")
|
| 327 |
+
|
| 328 |
if cached_sims:
|
| 329 |
# Create minimal graph with cached data
|
| 330 |
cached_data = np.load(embeddings_cache, allow_pickle=True)
|