Omar ID EL MOUMEN commited on
Commit
33aecf5
·
1 Parent(s): 3ae60ae

Add feature: Keyword search (FINALLY) + debug (frontend only)

Browse files
Files changed (4) hide show
  1. app.py +80 -5
  2. static/script.js +110 -1
  3. static/style.css +8 -0
  4. templates/index.html +18 -0
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import json
@@ -12,7 +15,7 @@ from fastapi.middleware.cors import CORSMiddleware
12
  from fastapi.responses import FileResponse
13
  from fastapi.staticfiles import StaticFiles
14
  from pydantic import BaseModel
15
- from typing import Dict, List, Optional
16
 
17
  load_dotenv()
18
 
@@ -53,6 +56,18 @@ class BatchDocResponse(BaseModel):
53
  missing: List[str]
54
  search_time: float
55
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  class TsgDocFinder:
57
  def __init__(self):
58
  self.main_ftp_url = "https://www.3gpp.org/ftp"
@@ -166,7 +181,7 @@ class SpecDocFinder:
166
  def __init__(self):
167
  self.chars = "0123456789abcdefghijklmnopqrstuvwxyz"
168
 
169
- def search_document(self, doc_id, release):
170
  series = doc_id.split(".")[0]
171
  while len(series) < 2:
172
  series = "0" + series
@@ -181,7 +196,6 @@ class SpecDocFinder:
181
  try:
182
  item = items[-1].find("a")
183
  except Exception as e:
184
- traceback.print_exc(e)
185
  return f"Unable to find specification {doc_id} : {e}"
186
  a, b, c = [_ for _ in item.get_text().split("-")[1].replace(".zip", "")]
187
  version = f"{self.chars.index(a)}.{self.chars.index(b)}.{self.chars.index(c)}"
@@ -205,6 +219,67 @@ finder_spec = SpecDocFinder()
205
  async def main_menu():
206
  return FileResponse(os.path.join("templates", "index.html"))
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  @app.post("/find", response_model=DocResponse)
209
  def find_document(request: DocRequest):
210
  start_time = time.time()
@@ -214,7 +289,7 @@ def find_document(request: DocRequest):
214
  result = finder.search_document(request.doc_id, request.release)
215
  print(result)
216
 
217
- if "not found" not in result and "Could not" not in result:
218
  return DocResponse(
219
  doc_id=request.doc_id,
220
  url=result,
@@ -231,7 +306,7 @@ def find_documents_batch(request: BatchDocRequest):
231
  missing = []
232
 
233
  for doc_id in request.doc_ids:
234
- finder = finder_tsg if request.doc_id[0].isalpha() else finder_spec
235
  result = finder.search_document(doc_id)
236
  if "not found" not in result and "Could not" not in result and "Unable" not in result:
237
  results[doc_id] = result
 
1
+ from io import StringIO
2
+ import numpy as np
3
+ import pandas as pd
4
  import requests
5
  from bs4 import BeautifulSoup
6
  import json
 
15
  from fastapi.responses import FileResponse
16
  from fastapi.staticfiles import StaticFiles
17
  from pydantic import BaseModel
18
+ from typing import Any, Dict, List, Literal, Optional
19
 
20
  load_dotenv()
21
 
 
56
  missing: List[str]
57
  search_time: float
58
 
59
+ class KeywordRequest(BaseModel):
60
+ keywords: str
61
+ release: Optional[str] = None
62
+ version: Optional[str] = None
63
+ wg: Optional[str] = None
64
+ spec_type: Optional[Literal["TS", "TR"]] = None
65
+ mode: Optional[Literal["and", "or"]] = "and"
66
+
67
+ class KeywordResponse(BaseModel):
68
+ results: List[Dict[str, str]]
69
+ search_time: float
70
+
71
  class TsgDocFinder:
72
  def __init__(self):
73
  self.main_ftp_url = "https://www.3gpp.org/ftp"
 
181
  def __init__(self):
182
  self.chars = "0123456789abcdefghijklmnopqrstuvwxyz"
183
 
184
+ def search_document(self, doc_id, release = None):
185
  series = doc_id.split(".")[0]
186
  while len(series) < 2:
187
  series = "0" + series
 
196
  try:
197
  item = items[-1].find("a")
198
  except Exception as e:
 
199
  return f"Unable to find specification {doc_id} : {e}"
200
  a, b, c = [_ for _ in item.get_text().split("-")[1].replace(".zip", "")]
201
  version = f"{self.chars.index(a)}.{self.chars.index(b)}.{self.chars.index(c)}"
 
219
  async def main_menu():
220
  return FileResponse(os.path.join("templates", "index.html"))
221
 
222
+ @app.post("/search-spec", response_model=KeywordResponse)
223
+ def search_spec(request: KeywordRequest):
224
+ start_time = time.time()
225
+ response = requests.get(f'https://www.3gpp.org/dynareport?code=status-report.htm', headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, verify=False)
226
+ dfs = pd.read_html(StringIO(response.text), storage_options={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, encoding="utf-8")
227
+
228
+ for x in range(len(dfs)):
229
+ dfs[x] = dfs[x].replace({np.nan: None})
230
+
231
+ columns_needed = [0, 1, 2, 3, 4]
232
+ extracted_dfs: List[pd.DataFrame] = [df.iloc[:, columns_needed] for df in dfs]
233
+ columns = [x.replace("\xa0", "_") for x in extracted_dfs[0].columns]
234
+
235
+ specifications = []
236
+
237
+ for df in extracted_dfs:
238
+ for index, row in df.iterrows():
239
+ doc = row.to_list()
240
+ doc_dict = dict(zip(columns, doc))
241
+ specifications.append(doc_dict)
242
+
243
+ kws = [_.lower() for _ in request.keywords.split(" ")]
244
+ results = []
245
+
246
+ for spec in specifications:
247
+ if request.mode == "and":
248
+ if not all(kw in spec["title"].lower() for kw in kws):
249
+ continue
250
+ elif request.mode == "or":
251
+ if not any(kw in spec["title"].lower() for kw in kws):
252
+ continue
253
+ release = request.release
254
+ version = request.version
255
+ working_group = request.wg
256
+ spec_type = request.spec_type
257
+
258
+ if spec.get('vers', None) is None or (release is not None and spec["vers"].split(".")[0] != str(release)):
259
+ continue
260
+ if spec.get('vers', None) is None or (version is not None and spec["vers"] != version):
261
+ continue
262
+ if spec.get('WG', None) is None or (working_group is not None and spec["WG"] != working_group):
263
+ continue
264
+ if spec_type is not None and spec["type"] != spec_type:
265
+ continue
266
+
267
+ results.append({
268
+ "id": str(spec["spec_num"]),
269
+ "title": spec["title"],
270
+ "type": "Technical Specification" if spec["type"] == "TS" else "Technical Report",
271
+ "release": str(spec["vers"].split(".")[0]),
272
+ "version": str(spec["vers"]),
273
+ "working_group": spec["WG"]
274
+ })
275
+
276
+ if len(results) > 0:
277
+ return KeywordResponse(
278
+ results=results,
279
+ search_time=time.time() - start_time
280
+ )
281
+ else:
282
+ raise HTTPException(status_code=404, detail="Specification not found")
283
  @app.post("/find", response_model=DocResponse)
284
  def find_document(request: DocRequest):
285
  start_time = time.time()
 
289
  result = finder.search_document(request.doc_id, request.release)
290
  print(result)
291
 
292
+ if "not found" not in result and "Could not" not in result and "Unable" not in result:
293
  return DocResponse(
294
  doc_id=request.doc_id,
295
  url=result,
 
306
  missing = []
307
 
308
  for doc_id in request.doc_ids:
309
+ finder = finder_tsg if doc_id[0].isalpha() else finder_spec
310
  result = finder.search_document(doc_id)
311
  if "not found" not in result and "Could not" not in result and "Unable" not in result:
312
  results[doc_id] = result
static/script.js CHANGED
@@ -1,12 +1,22 @@
1
  // DOM elements
2
  const singleModeBtn = document.getElementById('single-mode-btn');
3
  const batchModeBtn = document.getElementById('batch-mode-btn');
 
 
 
4
  const singleInput = document.querySelector('.single-input');
5
  const batchInput = document.querySelector('.batch-input');
 
 
 
6
  const docIdInput = document.getElementById('doc-id');
7
  const batchIdsInput = document.getElementById('batch-ids');
 
 
8
  const searchBtn = document.getElementById('search-btn');
9
  const batchSearchBtn = document.getElementById('batch-search-btn');
 
 
10
  const loader = document.getElementById('loader');
11
  const resultsContainer = document.getElementById('results-container');
12
  const resultsList = document.getElementById('results-list');
@@ -16,18 +26,86 @@ const errorMessage = document.getElementById('error-message');
16
  // Search mode toggle
17
  singleModeBtn.addEventListener('click', () => {
18
  singleModeBtn.classList.add('active');
 
19
  batchModeBtn.classList.remove('active');
 
 
20
  singleInput.style.display = 'block';
21
  batchInput.style.display = 'none';
 
 
22
  });
23
 
24
  batchModeBtn.addEventListener('click', () => {
25
  batchModeBtn.classList.add('active');
 
26
  singleModeBtn.classList.remove('active');
 
 
27
  batchInput.style.display = 'block';
 
 
28
  singleInput.style.display = 'none';
29
  });
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  // Single document search
32
  searchBtn.addEventListener('click', async () => {
33
  const docId = docIdInput.value.trim();
@@ -147,6 +225,31 @@ function displaySingleNotFound(docId, message) {
147
  resultsContainer.style.display = 'block';
148
  }
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  // Display batch results
151
  function displayBatchResults(data) {
152
  resultsList.innerHTML = '';
@@ -213,4 +316,10 @@ docIdInput.addEventListener('keypress', (e) => {
213
  if (e.key === 'Enter') {
214
  searchBtn.click();
215
  }
216
- });
 
 
 
 
 
 
 
1
  // DOM elements
2
  const singleModeBtn = document.getElementById('single-mode-btn');
3
  const batchModeBtn = document.getElementById('batch-mode-btn');
4
+ const keywordModeBtn = document.getElementById("keyword-mode-btn");
5
+ const indexerModeBtn = document.getElementById("indexer-mode-btn")
6
+
7
  const singleInput = document.querySelector('.single-input');
8
  const batchInput = document.querySelector('.batch-input');
9
+ const keywordSearchInput = document.querySelector(".keyword-input");
10
+ const indexerButtons = document.querySelector(".indexer-buttons")
11
+
12
  const docIdInput = document.getElementById('doc-id');
13
  const batchIdsInput = document.getElementById('batch-ids');
14
+ const keywordInput = document.getElementById("keywords");
15
+
16
  const searchBtn = document.getElementById('search-btn');
17
  const batchSearchBtn = document.getElementById('batch-search-btn');
18
+ const keywordSearchBtn = document.getElementById("keyword-search-btn");
19
+
20
  const loader = document.getElementById('loader');
21
  const resultsContainer = document.getElementById('results-container');
22
  const resultsList = document.getElementById('results-list');
 
26
  // Search mode toggle
27
  singleModeBtn.addEventListener('click', () => {
28
  singleModeBtn.classList.add('active');
29
+ keywordModeBtn.classList.remove("active");
30
  batchModeBtn.classList.remove('active');
31
+ indexerModeBtn.classList.remove("active");
32
+
33
  singleInput.style.display = 'block';
34
  batchInput.style.display = 'none';
35
+ keywordSearchInput.style.display = "none";
36
+ indexerButtons.style.display = "none";
37
  });
38
 
39
  batchModeBtn.addEventListener('click', () => {
40
  batchModeBtn.classList.add('active');
41
+ keywordModeBtn.classList.remove("active");
42
  singleModeBtn.classList.remove('active');
43
+ indexerModeBtn.classList.remove("active");
44
+
45
  batchInput.style.display = 'block';
46
+ keywordSearchInput.style.display = "none";
47
+ indexerButtons.style.display = "none";
48
  singleInput.style.display = 'none';
49
  });
50
 
51
+ keywordModeBtn.addEventListener('click', () => {
52
+ keywordModeBtn.classList.add("active");
53
+ singleModeBtn.classList.remove('active');
54
+ batchModeBtn.classList.remove("active");
55
+ indexerModeBtn.classList.remove("active");
56
+
57
+ singleInput.style.display = "none";
58
+ batchInput.style.display = "none";
59
+ indexerButtons.style.display = "none";
60
+ keywordSearchInput.style.display = "block";
61
+ })
62
+
63
+ indexerModeBtn.addEventListener('click', ()=>{
64
+ keywordModeBtn.classList.remove("active");
65
+ singleModeBtn.classList.remove('active');
66
+ batchModeBtn.classList.remove("active");
67
+ indexerModeBtn.classList.add("active");
68
+
69
+ singleInput.style.display = "none";
70
+ batchInput.style.display = "none";
71
+ indexerButtons.style.display = "block";
72
+ keywordSearchInput.style.display = "none";
73
+ })
74
+
75
+ keywordSearchBtn.addEventListener("click", async ()=>{
76
+ const keywords = keywordInput.value.trim();
77
+ if (!keywords) {
78
+ showError("Please enter at least one keyword");
79
+ return;
80
+ }
81
+
82
+ showLoader();
83
+ hideError();
84
+
85
+ try{
86
+ const response = await fetch("/search-spec", {
87
+ method: "POST",
88
+ headers: {
89
+ "Content-Type": "application/json"
90
+ },
91
+ body: JSON.stringify({ keywords })
92
+ });
93
+
94
+ const data = await response.json();
95
+ if (response.ok){
96
+ displayKeywordResults(data);
97
+ } else {
98
+ showError('Error processing batch request');
99
+ }
100
+ } catch (error) {
101
+ showError('Error connecting to the server. Please check if the API is running.');
102
+ console.error('Error:', error);
103
+ } finally {
104
+ hideLoader();
105
+ }
106
+ })
107
+
108
+
109
  // Single document search
110
  searchBtn.addEventListener('click', async () => {
111
  const docId = docIdInput.value.trim();
 
225
  resultsContainer.style.display = 'block';
226
  }
227
 
228
+ function displayKeywordResults(data) {
229
+ resultsList.innerHTML = '';
230
+
231
+ data.results.forEach(spec => {
232
+ const resultItem = document.createElement("div");
233
+ resultItem.className = "result-item"
234
+ resultItem.innerHTML = `
235
+ <div class="result-header">
236
+ <div class="result-id">${spec.id}</div>
237
+ <div class="result-status status-found">Found</div>
238
+ </div>
239
+ <div class="result-url">
240
+ <p>Title: ${spec.title}</p>
241
+ <p>Type: ${spec.type}</p>
242
+ <p>Release: ${spec.release}</p>
243
+ <p>Version: ${spec.version}</p>
244
+ <p>WG: ${spec.working_group}</p>
245
+ </div>
246
+ `;
247
+ resultsList.appendChild(resultItem);
248
+ });
249
+ resultsStats.textContent = `Found in ${data.search_time.toFixed(2)} seconds`
250
+ resultsContainer.style.display = 'block';
251
+ }
252
+
253
  // Display batch results
254
  function displayBatchResults(data) {
255
  resultsList.innerHTML = '';
 
316
  if (e.key === 'Enter') {
317
  searchBtn.click();
318
  }
319
+ });
320
+
321
+ keywordInput.addEventListener('keypress', (event)=>{
322
+ if (event.key === "Enter"){
323
+ keywordSearchBtn.click();
324
+ }
325
+ })
static/style.css CHANGED
@@ -167,6 +167,14 @@ header {
167
  display: none;
168
  }
169
 
 
 
 
 
 
 
 
 
170
  .batch-input textarea {
171
  width: 100%;
172
  height: 120px;
 
167
  display: none;
168
  }
169
 
170
+ .keyword-input {
171
+ display: none;
172
+ }
173
+
174
+ .indexer-buttons {
175
+ display: none;
176
+ }
177
+
178
  .batch-input textarea {
179
  width: 100%;
180
  height: 120px;
templates/index.html CHANGED
@@ -27,6 +27,8 @@
27
  <div class="search-mode">
28
  <button id="single-mode-btn" class="active">Single Document</button>
29
  <button id="batch-mode-btn">Batch Search</button>
 
 
30
  </div>
31
 
32
  <div class="search-form">
@@ -44,6 +46,22 @@
44
  <div class="hint">Enter one document ID per line</div>
45
  <button id="batch-search-btn" class="btn" style="margin-top: 10px;">Search All</button>
46
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  </div>
48
 
49
  <div class="error-message" id="error-message"></div>
 
27
  <div class="search-mode">
28
  <button id="single-mode-btn" class="active">Single Document</button>
29
  <button id="batch-mode-btn">Batch Search</button>
30
+ <button id="keyword-mode-btn">Keyword Search</button>
31
+ <button id="indexer-mode-btn">Indexer Options</button>
32
  </div>
33
 
34
  <div class="search-form">
 
46
  <div class="hint">Enter one document ID per line</div>
47
  <button id="batch-search-btn" class="btn" style="margin-top: 10px;">Search All</button>
48
  </div>
49
+
50
+ <div class="input-group keyword-input">
51
+ <label for="keywords">Keywords</label>
52
+ <div class="input-field">
53
+ <input type="text" id="keywords" placeholder="Enter your keywords separated by space">
54
+ <button id="keyword-search-btn" class="btn">Search</button>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="input-group indexer-buttons">
59
+ <label for="indexerBtns">Actions</label>
60
+ <div class="input-field">
61
+ <button id="indexing-btn" class="btn">Index all files</button>
62
+ <button id="testing-btn" class="btn">Test theory</button>
63
+ </div>
64
+ </div>
65
  </div>
66
 
67
  <div class="error-message" id="error-message"></div>