RohanVashisht commited on
Commit
2f9d235
·
verified ·
1 Parent(s): d185a2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -14
app.py CHANGED
@@ -8,10 +8,20 @@ from sentence_transformers import SentenceTransformer
8
  app = FastAPI()
9
 
10
  FIELDS = (
11
- "full_name", "description", "watchers_count", "forks_count", "license",
12
- "default_branch", "has_build_zig", "has_build_zig_zon", "fork",
13
- "open_issues", "stargazers_count", "updated_at", "created_at",
14
- "size"
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
  model = SentenceTransformer("all-MiniLM-L6-v2")
@@ -45,6 +55,23 @@ scroll_data = {
45
  "infiniteScrollPrograms": load_dataset_with_fields("zigistry/programs", include_readme=False)[0],
46
  }
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  @app.get("/infiniteScrollPackages/")
49
  def infinite_scroll_packages(q: int = Query(0, ge=0)):
50
  start = q * 10
@@ -64,21 +91,18 @@ def search_packages(q: str):
64
  key = "packages"
65
  index, dataset = indices[key]
66
  query_embedding = model.encode([q])
67
- distances, indices_ = index.search(np.array(query_embedding), len(dataset))
68
- sorted_results = sorted(zip(distances[0], indices_[0]), key=lambda x: x[0])
69
- results = [dataset[int(i)] for d, i in sorted_results]
70
- content = results[:280] if len(results) > 280 else results
71
  headers = {"Access-Control-Allow-Origin": "*", "Content-Type": "application/json"}
72
- return JSONResponse(content=content, headers=headers)
73
 
74
  @app.get("/searchPrograms/")
75
  def search_programs(q: str):
76
  key = "programs"
77
  index, dataset = indices[key]
78
  query_embedding = model.encode([q])
79
- distances, indices_ = index.search(np.array(query_embedding), len(dataset))
80
- sorted_results = sorted(zip(distances[0], indices_[0]), key=lambda x: x[0])
81
- results = [dataset[int(i)] for d, i in sorted_results]
82
- content = results[:280] if len(results) > 280 else results
83
  headers = {"Access-Control-Allow-Origin": "*", "Content-Type": "application/json"}
84
- return JSONResponse(content=content, headers=headers)
 
8
  app = FastAPI()
9
 
10
  FIELDS = (
11
+ "full_name",
12
+ "description",
13
+ "default_branch",
14
+ "open_issues",
15
+ "stargazers_count",
16
+ "forks_count",
17
+ "watchers_count",
18
+ "license",
19
+ "size",
20
+ "fork",
21
+ "updated_at",
22
+ "has_build_zig",
23
+ "has_build_zig_zon",
24
+ "created_at",
25
  )
26
 
27
  model = SentenceTransformer("all-MiniLM-L6-v2")
 
55
  "infiniteScrollPrograms": load_dataset_with_fields("zigistry/programs", include_readme=False)[0],
56
  }
57
 
58
+ def filter_results_by_distance(distances, idxs, dataset, max_results=50, threshold=0.6):
59
+ """
60
+ Only return results that are likely relevant (distance-based filtering).
61
+ Lower distance = more similar.
62
+ Threshold is a fraction of the *minimum* distance found.
63
+ """
64
+ if len(distances) == 0:
65
+ return []
66
+ min_dist = np.min(distances)
67
+ cutoff = min_dist + ((max(distances) - min_dist) * threshold)
68
+ filtered = [
69
+ dataset[int(i)]
70
+ for d, i in zip(distances, idxs)
71
+ if d <= cutoff
72
+ ]
73
+ return filtered[:max_results]
74
+
75
  @app.get("/infiniteScrollPackages/")
76
  def infinite_scroll_packages(q: int = Query(0, ge=0)):
77
  start = q * 10
 
91
  key = "packages"
92
  index, dataset = indices[key]
93
  query_embedding = model.encode([q])
94
+ distances, idxs = index.search(np.array(query_embedding), len(dataset))
95
+ # Only keep results that are likely relevant
96
+ results = filter_results_by_distance(distances[0], idxs[0], dataset)
 
97
  headers = {"Access-Control-Allow-Origin": "*", "Content-Type": "application/json"}
98
+ return JSONResponse(content=results, headers=headers)
99
 
100
  @app.get("/searchPrograms/")
101
  def search_programs(q: str):
102
  key = "programs"
103
  index, dataset = indices[key]
104
  query_embedding = model.encode([q])
105
+ distances, idxs = index.search(np.array(query_embedding), len(dataset))
106
+ results = filter_results_by_distance(distances[0], idxs[0], dataset)
 
 
107
  headers = {"Access-Control-Allow-Origin": "*", "Content-Type": "application/json"}
108
+ return JSONResponse(content=results, headers=headers)