Spaces:

OrganizedProgrammers
/

3GPPDocFinder

Running

App Files Files Community

Omar ID EL MOUMEN commited on 6 days ago

Commit

acaccf0

1 Parent(s): 8840360

Add specification search

Browse files

Files changed (2) hide show

app.py +60 -22
static/script.js +3 -3

app.py CHANGED Viewed

@@ -32,15 +32,17 @@ app.add_middleware(
 )
 class DocRequest(BaseModel):
-    tsg_doc_id: str
 class DocResponse(BaseModel):
-    tsg_doc_id: str
     url: str
     search_time: float
 class BatchDocRequest(BaseModel):
-    tsg_doc_ids: List[str]
 class BatchDocResponse(BaseModel):
     results: Dict[str, str]
@@ -65,12 +67,12 @@ class TsgDocFinder:
         with open(self.indexer_file, "w", encoding="utf-8") as f:
             json.dump(self.indexer, f, indent=4, ensure_ascii=False)
-    def get_workgroup(self, tsg_doc):
-        main_tsg = "tsg_ct" if tsg_doc[0] == "C" else "tsg_sa" if tsg_doc[0] == "S" else None
         if main_tsg is None:
             return None, None, None
-        workgroup = f"WG{int(tsg_doc[1])}" if tsg_doc[1].isnumeric() else main_tsg.upper()
-        return main_tsg, workgroup, tsg_doc
     def find_workgroup_url(self, main_tsg, workgroup):
         """Find the URL for the specific workgroup"""
@@ -94,25 +96,25 @@ class TsgDocFinder:
             print(f"Error accessing {url}: {e}")
             return []
-    def search_document(self, tsg_doc_id):
         """Search for a specific document by its ID"""
-        original_id = tsg_doc_id
         # Check if already indexed
         if original_id in self.indexer:
             return self.indexer[original_id]
         # Parse the document ID
-        main_tsg, workgroup, tsg_doc = self.get_workgroup(tsg_doc_id)
         if not main_tsg:
-            return f"Could not parse document ID: {tsg_doc_id}"
-        print(f"Searching for {original_id} (parsed as {tsg_doc}) in {main_tsg}/{workgroup}...")
         # Find the workgroup URL
         wg_url = self.find_workgroup_url(main_tsg, workgroup)
         if not wg_url:
-            return f"Could not find workgroup for {tsg_doc_id}"
         # Search in the workgroup directories
         meeting_folders = self.get_docs_from_url(wg_url)
@@ -128,7 +130,7 @@ class TsgDocFinder:
                 # Check for the document in the main Docs folder
                 for file in files:
-                    if tsg_doc in file.lower() or original_id in file:
                         doc_url = f"{docs_url}/{file}"
                         self.indexer[original_id] = doc_url
                         self.save_indexer()
@@ -141,16 +143,47 @@ class TsgDocFinder:
                     zip_files = self.get_docs_from_url(zip_url)
                     for file in zip_files:
-                        if tsg_doc in file.lower() or original_id in file:
                             doc_url = f"{zip_url}/{file}"
                             self.indexer[original_id] = doc_url
                             self.save_indexer()
                             return doc_url
-        return f"Document {tsg_doc_id} not found"
-# Create a global instance of the finder
-finder = TsgDocFinder()
 @app.get("/")
 async def main_menu():
@@ -159,12 +192,15 @@ async def main_menu():
 @app.post("/find", response_model=DocResponse)
 def find_document(request: DocRequest):
     start_time = time.time()
-    result = finder.search_document(request.tsg_doc_id)
     if "not found" not in result and "Could not" not in result:
         return DocResponse(
-            tsg_doc_id=request.tsg_doc_id,
             url=result,
             search_time=time.time() - start_time
         )
@@ -178,7 +214,8 @@ def find_documents_batch(request: BatchDocRequest):
     results = {}
     missing = []
-    for doc_id in request.tsg_doc_ids:
         result = finder.search_document(doc_id)
         if "not found" not in result and "Could not" not in result:
             results[doc_id] = result
@@ -193,4 +230,5 @@ def find_documents_batch(request: BatchDocRequest):
 @app.get("/indexed", response_model=List[str])
 def get_indexed_documents():
     return list(finder.indexer.keys())

 )
 class DocRequest(BaseModel):
+    doc_id: str
+    release: Optional[int] = None
 class DocResponse(BaseModel):
+    doc_id: str
     url: str
     search_time: float
 class BatchDocRequest(BaseModel):
+    doc_ids: List[str]
+    release: Optional[int] = None
 class BatchDocResponse(BaseModel):
     results: Dict[str, str]
         with open(self.indexer_file, "w", encoding="utf-8") as f:
             json.dump(self.indexer, f, indent=4, ensure_ascii=False)
+    def get_workgroup(self, doc):
+        main_tsg = "tsg_ct" if doc[0] == "C" else "tsg_sa" if doc[0] == "S" else None
         if main_tsg is None:
             return None, None, None
+        workgroup = f"WG{int(doc[1])}" if doc[1].isnumeric() else main_tsg.upper()
+        return main_tsg, workgroup, doc
     def find_workgroup_url(self, main_tsg, workgroup):
         """Find the URL for the specific workgroup"""
             print(f"Error accessing {url}: {e}")
             return []
+    def search_document(self, doc_id, release = None):
         """Search for a specific document by its ID"""
+        original_id = doc_id
         # Check if already indexed
         if original_id in self.indexer:
             return self.indexer[original_id]
         # Parse the document ID
+        main_tsg, workgroup, doc = self.get_workgroup(doc_id)
         if not main_tsg:
+            return f"Could not parse document ID: {doc_id}"
+        print(f"Searching for {original_id} (parsed as {doc}) in {main_tsg}/{workgroup}...")
         # Find the workgroup URL
         wg_url = self.find_workgroup_url(main_tsg, workgroup)
         if not wg_url:
+            return f"Could not find workgroup for {doc_id}"
         # Search in the workgroup directories
         meeting_folders = self.get_docs_from_url(wg_url)
                 # Check for the document in the main Docs folder
                 for file in files:
+                    if doc in file.lower() or original_id in file:
                         doc_url = f"{docs_url}/{file}"
                         self.indexer[original_id] = doc_url
                         self.save_indexer()
                     zip_files = self.get_docs_from_url(zip_url)
                     for file in zip_files:
+                        if doc in file.lower() or original_id in file:
                             doc_url = f"{zip_url}/{file}"
                             self.indexer[original_id] = doc_url
                             self.save_indexer()
                             return doc_url
+        return f"Document {doc_id} not found"
+class SpecDocFinder:
+    def __init__(self):
+        self.chars = "0123456789abcdefghijklmnopqrstuvwxyz"
+    def search_document(self, doc_id, release):
+        series = doc_id.split(".")[0]
+        while len(series) < 2:
+            series = "0" + series
+        url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}"
+        response = requests.get(url, verify=False)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        items = soup.find_all("tr")[1:]
+        version_found = None
+        if release is None:
+            item = items[-1].find("a")
+            a, b, c = [_ for _ in item.get_text().split("-")[1].replace(".zip", "")]
+            version = f"{self.chars.index(a)}.{self.chars.index(b)}.{self.chars.index(c)}"
+            version_found = (version, item.get("href"))
+            _, spec_url = version_found
+            return spec_url if version_found is not None else f"Specification {doc_id} not found"
+        else:
+            for item in items:
+                x = item.find("a")
+                if f"{doc_id.replace('.', '')}-{self.chars[int(release)]}" in x.get_text():
+                    a, b, c = [_ for _ in x.get_text().split("-")[1].replace(".zip", "")]
+                    version = f"{self.chars.index(a)}.{self.chars.index(b)}.{self.chars.index(c)}"
+                    version_found = (version, x.get("href"))
+            _, spec_url = version_found
+            return spec_url if version_found is not None else f"Specification {doc_id} not found"
 @app.get("/")
 async def main_menu():
 @app.post("/find", response_model=DocResponse)
 def find_document(request: DocRequest):
     start_time = time.time()
+    finder = TsgDocFinder() if request.doc_id[0].isalpha() else SpecDocFinder()
+    print(finder)
+    result = finder.search_document(request.doc_id, request.release)
+    print(result)
     if "not found" not in result and "Could not" not in result:
         return DocResponse(
+            doc_id=request.doc_id,
             url=result,
             search_time=time.time() - start_time
         )
     results = {}
     missing = []
+    for doc_id in request.doc_ids:
+        finder = TsgDocFinder() if request.doc_id[0].isalpha() else SpecDocFinder()
         result = finder.search_document(doc_id)
         if "not found" not in result and "Could not" not in result:
             results[doc_id] = result
 @app.get("/indexed", response_model=List[str])
 def get_indexed_documents():
+    finder = TsgDocFinder()
     return list(finder.indexer.keys())

static/script.js CHANGED Viewed

@@ -48,7 +48,7 @@ searchBtn.addEventListener('click', async () => {
             headers: {
                 'Content-Type': 'application/json'
             },
-            body: JSON.stringify({ tsg_doc_id: docId })
         });
         const data = await response.json();
@@ -93,7 +93,7 @@ batchSearchBtn.addEventListener('click', async () => {
             headers: {
                 'Content-Type': 'application/json'
             },
-            body: JSON.stringify({ tsg_doc_ids: docIds })
         });
         const data = await response.json();
@@ -120,7 +120,7 @@ function displaySingleResult(data) {
     resultItem.className = 'result-item';
     resultItem.innerHTML = `
         <div class="result-header">
-            <div class="result-id">${data.tsg_doc_id}</div>
             <div class="result-status status-found">Found</div>
         </div>
         <div class="result-url">

             headers: {
                 'Content-Type': 'application/json'
             },
+            body: JSON.stringify({ doc_id: docId, release: null })
         });
         const data = await response.json();
             headers: {
                 'Content-Type': 'application/json'
             },
+            body: JSON.stringify({ doc_ids: docIds })
         });
         const data = await response.json();
     resultItem.className = 'result-item';
     resultItem.innerHTML = `
         <div class="result-header">
+            <div class="result-id">${data.doc_id}</div>
             <div class="result-status status-found">Found</div>
         </div>
         <div class="result-url">