Omar ID EL MOUMEN
commited on
Commit
·
33aecf5
1
Parent(s):
3ae60ae
Add feature: Keyword search (FINALLY) + debug (frontend only)
Browse files- app.py +80 -5
- static/script.js +110 -1
- static/style.css +8 -0
- templates/index.html +18 -0
app.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
import json
|
@@ -12,7 +15,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
12 |
from fastapi.responses import FileResponse
|
13 |
from fastapi.staticfiles import StaticFiles
|
14 |
from pydantic import BaseModel
|
15 |
-
from typing import Dict, List, Optional
|
16 |
|
17 |
load_dotenv()
|
18 |
|
@@ -53,6 +56,18 @@ class BatchDocResponse(BaseModel):
|
|
53 |
missing: List[str]
|
54 |
search_time: float
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
class TsgDocFinder:
|
57 |
def __init__(self):
|
58 |
self.main_ftp_url = "https://www.3gpp.org/ftp"
|
@@ -166,7 +181,7 @@ class SpecDocFinder:
|
|
166 |
def __init__(self):
|
167 |
self.chars = "0123456789abcdefghijklmnopqrstuvwxyz"
|
168 |
|
169 |
-
def search_document(self, doc_id, release):
|
170 |
series = doc_id.split(".")[0]
|
171 |
while len(series) < 2:
|
172 |
series = "0" + series
|
@@ -181,7 +196,6 @@ class SpecDocFinder:
|
|
181 |
try:
|
182 |
item = items[-1].find("a")
|
183 |
except Exception as e:
|
184 |
-
traceback.print_exc(e)
|
185 |
return f"Unable to find specification {doc_id} : {e}"
|
186 |
a, b, c = [_ for _ in item.get_text().split("-")[1].replace(".zip", "")]
|
187 |
version = f"{self.chars.index(a)}.{self.chars.index(b)}.{self.chars.index(c)}"
|
@@ -205,6 +219,67 @@ finder_spec = SpecDocFinder()
|
|
205 |
async def main_menu():
|
206 |
return FileResponse(os.path.join("templates", "index.html"))
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
@app.post("/find", response_model=DocResponse)
|
209 |
def find_document(request: DocRequest):
|
210 |
start_time = time.time()
|
@@ -214,7 +289,7 @@ def find_document(request: DocRequest):
|
|
214 |
result = finder.search_document(request.doc_id, request.release)
|
215 |
print(result)
|
216 |
|
217 |
-
if "not found" not in result and "Could not" not in result:
|
218 |
return DocResponse(
|
219 |
doc_id=request.doc_id,
|
220 |
url=result,
|
@@ -231,7 +306,7 @@ def find_documents_batch(request: BatchDocRequest):
|
|
231 |
missing = []
|
232 |
|
233 |
for doc_id in request.doc_ids:
|
234 |
-
finder = finder_tsg if
|
235 |
result = finder.search_document(doc_id)
|
236 |
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
237 |
results[doc_id] = result
|
|
|
1 |
+
from io import StringIO
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
import json
|
|
|
15 |
from fastapi.responses import FileResponse
|
16 |
from fastapi.staticfiles import StaticFiles
|
17 |
from pydantic import BaseModel
|
18 |
+
from typing import Any, Dict, List, Literal, Optional
|
19 |
|
20 |
load_dotenv()
|
21 |
|
|
|
56 |
missing: List[str]
|
57 |
search_time: float
|
58 |
|
59 |
+
class KeywordRequest(BaseModel):
|
60 |
+
keywords: str
|
61 |
+
release: Optional[str] = None
|
62 |
+
version: Optional[str] = None
|
63 |
+
wg: Optional[str] = None
|
64 |
+
spec_type: Optional[Literal["TS", "TR"]] = None
|
65 |
+
mode: Optional[Literal["and", "or"]] = "and"
|
66 |
+
|
67 |
+
class KeywordResponse(BaseModel):
|
68 |
+
results: List[Dict[str, str]]
|
69 |
+
search_time: float
|
70 |
+
|
71 |
class TsgDocFinder:
|
72 |
def __init__(self):
|
73 |
self.main_ftp_url = "https://www.3gpp.org/ftp"
|
|
|
181 |
def __init__(self):
|
182 |
self.chars = "0123456789abcdefghijklmnopqrstuvwxyz"
|
183 |
|
184 |
+
def search_document(self, doc_id, release = None):
|
185 |
series = doc_id.split(".")[0]
|
186 |
while len(series) < 2:
|
187 |
series = "0" + series
|
|
|
196 |
try:
|
197 |
item = items[-1].find("a")
|
198 |
except Exception as e:
|
|
|
199 |
return f"Unable to find specification {doc_id} : {e}"
|
200 |
a, b, c = [_ for _ in item.get_text().split("-")[1].replace(".zip", "")]
|
201 |
version = f"{self.chars.index(a)}.{self.chars.index(b)}.{self.chars.index(c)}"
|
|
|
219 |
async def main_menu():
|
220 |
return FileResponse(os.path.join("templates", "index.html"))
|
221 |
|
222 |
+
@app.post("/search-spec", response_model=KeywordResponse)
|
223 |
+
def search_spec(request: KeywordRequest):
|
224 |
+
start_time = time.time()
|
225 |
+
response = requests.get(f'https://www.3gpp.org/dynareport?code=status-report.htm', headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, verify=False)
|
226 |
+
dfs = pd.read_html(StringIO(response.text), storage_options={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, encoding="utf-8")
|
227 |
+
|
228 |
+
for x in range(len(dfs)):
|
229 |
+
dfs[x] = dfs[x].replace({np.nan: None})
|
230 |
+
|
231 |
+
columns_needed = [0, 1, 2, 3, 4]
|
232 |
+
extracted_dfs: List[pd.DataFrame] = [df.iloc[:, columns_needed] for df in dfs]
|
233 |
+
columns = [x.replace("\xa0", "_") for x in extracted_dfs[0].columns]
|
234 |
+
|
235 |
+
specifications = []
|
236 |
+
|
237 |
+
for df in extracted_dfs:
|
238 |
+
for index, row in df.iterrows():
|
239 |
+
doc = row.to_list()
|
240 |
+
doc_dict = dict(zip(columns, doc))
|
241 |
+
specifications.append(doc_dict)
|
242 |
+
|
243 |
+
kws = [_.lower() for _ in request.keywords.split(" ")]
|
244 |
+
results = []
|
245 |
+
|
246 |
+
for spec in specifications:
|
247 |
+
if request.mode == "and":
|
248 |
+
if not all(kw in spec["title"].lower() for kw in kws):
|
249 |
+
continue
|
250 |
+
elif request.mode == "or":
|
251 |
+
if not any(kw in spec["title"].lower() for kw in kws):
|
252 |
+
continue
|
253 |
+
release = request.release
|
254 |
+
version = request.version
|
255 |
+
working_group = request.wg
|
256 |
+
spec_type = request.spec_type
|
257 |
+
|
258 |
+
if spec.get('vers', None) is None or (release is not None and spec["vers"].split(".")[0] != str(release)):
|
259 |
+
continue
|
260 |
+
if spec.get('vers', None) is None or (version is not None and spec["vers"] != version):
|
261 |
+
continue
|
262 |
+
if spec.get('WG', None) is None or (working_group is not None and spec["WG"] != working_group):
|
263 |
+
continue
|
264 |
+
if spec_type is not None and spec["type"] != spec_type:
|
265 |
+
continue
|
266 |
+
|
267 |
+
results.append({
|
268 |
+
"id": str(spec["spec_num"]),
|
269 |
+
"title": spec["title"],
|
270 |
+
"type": "Technical Specification" if spec["type"] == "TS" else "Technical Report",
|
271 |
+
"release": str(spec["vers"].split(".")[0]),
|
272 |
+
"version": str(spec["vers"]),
|
273 |
+
"working_group": spec["WG"]
|
274 |
+
})
|
275 |
+
|
276 |
+
if len(results) > 0:
|
277 |
+
return KeywordResponse(
|
278 |
+
results=results,
|
279 |
+
search_time=time.time() - start_time
|
280 |
+
)
|
281 |
+
else:
|
282 |
+
raise HTTPException(status_code=404, detail="Specification not found")
|
283 |
@app.post("/find", response_model=DocResponse)
|
284 |
def find_document(request: DocRequest):
|
285 |
start_time = time.time()
|
|
|
289 |
result = finder.search_document(request.doc_id, request.release)
|
290 |
print(result)
|
291 |
|
292 |
+
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
293 |
return DocResponse(
|
294 |
doc_id=request.doc_id,
|
295 |
url=result,
|
|
|
306 |
missing = []
|
307 |
|
308 |
for doc_id in request.doc_ids:
|
309 |
+
finder = finder_tsg if doc_id[0].isalpha() else finder_spec
|
310 |
result = finder.search_document(doc_id)
|
311 |
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
312 |
results[doc_id] = result
|
static/script.js
CHANGED
@@ -1,12 +1,22 @@
|
|
1 |
// DOM elements
|
2 |
const singleModeBtn = document.getElementById('single-mode-btn');
|
3 |
const batchModeBtn = document.getElementById('batch-mode-btn');
|
|
|
|
|
|
|
4 |
const singleInput = document.querySelector('.single-input');
|
5 |
const batchInput = document.querySelector('.batch-input');
|
|
|
|
|
|
|
6 |
const docIdInput = document.getElementById('doc-id');
|
7 |
const batchIdsInput = document.getElementById('batch-ids');
|
|
|
|
|
8 |
const searchBtn = document.getElementById('search-btn');
|
9 |
const batchSearchBtn = document.getElementById('batch-search-btn');
|
|
|
|
|
10 |
const loader = document.getElementById('loader');
|
11 |
const resultsContainer = document.getElementById('results-container');
|
12 |
const resultsList = document.getElementById('results-list');
|
@@ -16,18 +26,86 @@ const errorMessage = document.getElementById('error-message');
|
|
16 |
// Search mode toggle
|
17 |
singleModeBtn.addEventListener('click', () => {
|
18 |
singleModeBtn.classList.add('active');
|
|
|
19 |
batchModeBtn.classList.remove('active');
|
|
|
|
|
20 |
singleInput.style.display = 'block';
|
21 |
batchInput.style.display = 'none';
|
|
|
|
|
22 |
});
|
23 |
|
24 |
batchModeBtn.addEventListener('click', () => {
|
25 |
batchModeBtn.classList.add('active');
|
|
|
26 |
singleModeBtn.classList.remove('active');
|
|
|
|
|
27 |
batchInput.style.display = 'block';
|
|
|
|
|
28 |
singleInput.style.display = 'none';
|
29 |
});
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
// Single document search
|
32 |
searchBtn.addEventListener('click', async () => {
|
33 |
const docId = docIdInput.value.trim();
|
@@ -147,6 +225,31 @@ function displaySingleNotFound(docId, message) {
|
|
147 |
resultsContainer.style.display = 'block';
|
148 |
}
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
// Display batch results
|
151 |
function displayBatchResults(data) {
|
152 |
resultsList.innerHTML = '';
|
@@ -213,4 +316,10 @@ docIdInput.addEventListener('keypress', (e) => {
|
|
213 |
if (e.key === 'Enter') {
|
214 |
searchBtn.click();
|
215 |
}
|
216 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
// DOM elements
|
2 |
const singleModeBtn = document.getElementById('single-mode-btn');
|
3 |
const batchModeBtn = document.getElementById('batch-mode-btn');
|
4 |
+
const keywordModeBtn = document.getElementById("keyword-mode-btn");
|
5 |
+
const indexerModeBtn = document.getElementById("indexer-mode-btn")
|
6 |
+
|
7 |
const singleInput = document.querySelector('.single-input');
|
8 |
const batchInput = document.querySelector('.batch-input');
|
9 |
+
const keywordSearchInput = document.querySelector(".keyword-input");
|
10 |
+
const indexerButtons = document.querySelector(".indexer-buttons")
|
11 |
+
|
12 |
const docIdInput = document.getElementById('doc-id');
|
13 |
const batchIdsInput = document.getElementById('batch-ids');
|
14 |
+
const keywordInput = document.getElementById("keywords");
|
15 |
+
|
16 |
const searchBtn = document.getElementById('search-btn');
|
17 |
const batchSearchBtn = document.getElementById('batch-search-btn');
|
18 |
+
const keywordSearchBtn = document.getElementById("keyword-search-btn");
|
19 |
+
|
20 |
const loader = document.getElementById('loader');
|
21 |
const resultsContainer = document.getElementById('results-container');
|
22 |
const resultsList = document.getElementById('results-list');
|
|
|
26 |
// Search mode toggle
|
27 |
singleModeBtn.addEventListener('click', () => {
|
28 |
singleModeBtn.classList.add('active');
|
29 |
+
keywordModeBtn.classList.remove("active");
|
30 |
batchModeBtn.classList.remove('active');
|
31 |
+
indexerModeBtn.classList.remove("active");
|
32 |
+
|
33 |
singleInput.style.display = 'block';
|
34 |
batchInput.style.display = 'none';
|
35 |
+
keywordSearchInput.style.display = "none";
|
36 |
+
indexerButtons.style.display = "none";
|
37 |
});
|
38 |
|
39 |
batchModeBtn.addEventListener('click', () => {
|
40 |
batchModeBtn.classList.add('active');
|
41 |
+
keywordModeBtn.classList.remove("active");
|
42 |
singleModeBtn.classList.remove('active');
|
43 |
+
indexerModeBtn.classList.remove("active");
|
44 |
+
|
45 |
batchInput.style.display = 'block';
|
46 |
+
keywordSearchInput.style.display = "none";
|
47 |
+
indexerButtons.style.display = "none";
|
48 |
singleInput.style.display = 'none';
|
49 |
});
|
50 |
|
51 |
+
keywordModeBtn.addEventListener('click', () => {
|
52 |
+
keywordModeBtn.classList.add("active");
|
53 |
+
singleModeBtn.classList.remove('active');
|
54 |
+
batchModeBtn.classList.remove("active");
|
55 |
+
indexerModeBtn.classList.remove("active");
|
56 |
+
|
57 |
+
singleInput.style.display = "none";
|
58 |
+
batchInput.style.display = "none";
|
59 |
+
indexerButtons.style.display = "none";
|
60 |
+
keywordSearchInput.style.display = "block";
|
61 |
+
})
|
62 |
+
|
63 |
+
indexerModeBtn.addEventListener('click', ()=>{
|
64 |
+
keywordModeBtn.classList.remove("active");
|
65 |
+
singleModeBtn.classList.remove('active');
|
66 |
+
batchModeBtn.classList.remove("active");
|
67 |
+
indexerModeBtn.classList.add("active");
|
68 |
+
|
69 |
+
singleInput.style.display = "none";
|
70 |
+
batchInput.style.display = "none";
|
71 |
+
indexerButtons.style.display = "block";
|
72 |
+
keywordSearchInput.style.display = "none";
|
73 |
+
})
|
74 |
+
|
75 |
+
keywordSearchBtn.addEventListener("click", async ()=>{
|
76 |
+
const keywords = keywordInput.value.trim();
|
77 |
+
if (!keywords) {
|
78 |
+
showError("Please enter at least one keyword");
|
79 |
+
return;
|
80 |
+
}
|
81 |
+
|
82 |
+
showLoader();
|
83 |
+
hideError();
|
84 |
+
|
85 |
+
try{
|
86 |
+
const response = await fetch("/search-spec", {
|
87 |
+
method: "POST",
|
88 |
+
headers: {
|
89 |
+
"Content-Type": "application/json"
|
90 |
+
},
|
91 |
+
body: JSON.stringify({ keywords })
|
92 |
+
});
|
93 |
+
|
94 |
+
const data = await response.json();
|
95 |
+
if (response.ok){
|
96 |
+
displayKeywordResults(data);
|
97 |
+
} else {
|
98 |
+
showError('Error processing batch request');
|
99 |
+
}
|
100 |
+
} catch (error) {
|
101 |
+
showError('Error connecting to the server. Please check if the API is running.');
|
102 |
+
console.error('Error:', error);
|
103 |
+
} finally {
|
104 |
+
hideLoader();
|
105 |
+
}
|
106 |
+
})
|
107 |
+
|
108 |
+
|
109 |
// Single document search
|
110 |
searchBtn.addEventListener('click', async () => {
|
111 |
const docId = docIdInput.value.trim();
|
|
|
225 |
resultsContainer.style.display = 'block';
|
226 |
}
|
227 |
|
228 |
+
function displayKeywordResults(data) {
|
229 |
+
resultsList.innerHTML = '';
|
230 |
+
|
231 |
+
data.results.forEach(spec => {
|
232 |
+
const resultItem = document.createElement("div");
|
233 |
+
resultItem.className = "result-item"
|
234 |
+
resultItem.innerHTML = `
|
235 |
+
<div class="result-header">
|
236 |
+
<div class="result-id">${spec.id}</div>
|
237 |
+
<div class="result-status status-found">Found</div>
|
238 |
+
</div>
|
239 |
+
<div class="result-url">
|
240 |
+
<p>Title: ${spec.title}</p>
|
241 |
+
<p>Type: ${spec.type}</p>
|
242 |
+
<p>Release: ${spec.release}</p>
|
243 |
+
<p>Version: ${spec.version}</p>
|
244 |
+
<p>WG: ${spec.working_group}</p>
|
245 |
+
</div>
|
246 |
+
`;
|
247 |
+
resultsList.appendChild(resultItem);
|
248 |
+
});
|
249 |
+
resultsStats.textContent = `Found in ${data.search_time.toFixed(2)} seconds`
|
250 |
+
resultsContainer.style.display = 'block';
|
251 |
+
}
|
252 |
+
|
253 |
// Display batch results
|
254 |
function displayBatchResults(data) {
|
255 |
resultsList.innerHTML = '';
|
|
|
316 |
if (e.key === 'Enter') {
|
317 |
searchBtn.click();
|
318 |
}
|
319 |
+
});
|
320 |
+
|
321 |
+
keywordInput.addEventListener('keypress', (event)=>{
|
322 |
+
if (event.key === "Enter"){
|
323 |
+
keywordSearchBtn.click();
|
324 |
+
}
|
325 |
+
})
|
static/style.css
CHANGED
@@ -167,6 +167,14 @@ header {
|
|
167 |
display: none;
|
168 |
}
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
.batch-input textarea {
|
171 |
width: 100%;
|
172 |
height: 120px;
|
|
|
167 |
display: none;
|
168 |
}
|
169 |
|
170 |
+
.keyword-input {
|
171 |
+
display: none;
|
172 |
+
}
|
173 |
+
|
174 |
+
.indexer-buttons {
|
175 |
+
display: none;
|
176 |
+
}
|
177 |
+
|
178 |
.batch-input textarea {
|
179 |
width: 100%;
|
180 |
height: 120px;
|
templates/index.html
CHANGED
@@ -27,6 +27,8 @@
|
|
27 |
<div class="search-mode">
|
28 |
<button id="single-mode-btn" class="active">Single Document</button>
|
29 |
<button id="batch-mode-btn">Batch Search</button>
|
|
|
|
|
30 |
</div>
|
31 |
|
32 |
<div class="search-form">
|
@@ -44,6 +46,22 @@
|
|
44 |
<div class="hint">Enter one document ID per line</div>
|
45 |
<button id="batch-search-btn" class="btn" style="margin-top: 10px;">Search All</button>
|
46 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
</div>
|
48 |
|
49 |
<div class="error-message" id="error-message"></div>
|
|
|
27 |
<div class="search-mode">
|
28 |
<button id="single-mode-btn" class="active">Single Document</button>
|
29 |
<button id="batch-mode-btn">Batch Search</button>
|
30 |
+
<button id="keyword-mode-btn">Keyword Search</button>
|
31 |
+
<button id="indexer-mode-btn">Indexer Options</button>
|
32 |
</div>
|
33 |
|
34 |
<div class="search-form">
|
|
|
46 |
<div class="hint">Enter one document ID per line</div>
|
47 |
<button id="batch-search-btn" class="btn" style="margin-top: 10px;">Search All</button>
|
48 |
</div>
|
49 |
+
|
50 |
+
<div class="input-group keyword-input">
|
51 |
+
<label for="keywords">Keywords</label>
|
52 |
+
<div class="input-field">
|
53 |
+
<input type="text" id="keywords" placeholder="Enter your keywords separated by space">
|
54 |
+
<button id="keyword-search-btn" class="btn">Search</button>
|
55 |
+
</div>
|
56 |
+
</div>
|
57 |
+
|
58 |
+
<div class="input-group indexer-buttons">
|
59 |
+
<label for="indexerBtns">Actions</label>
|
60 |
+
<div class="input-field">
|
61 |
+
<button id="indexing-btn" class="btn">Index all files</button>
|
62 |
+
<button id="testing-btn" class="btn">Test theory</button>
|
63 |
+
</div>
|
64 |
+
</div>
|
65 |
</div>
|
66 |
|
67 |
<div class="error-message" id="error-message"></div>
|