Spaces:

OrganizedProgrammers
/

3GPPDocFinder

Running

App Files Files Community

Omar ID EL MOUMEN commited on 2 days ago

Commit

3ae60ae

1 Parent(s): 5f91632

Reuse of standard IO method of indexing + update indexer

Browse files

Files changed (3) hide show

.gitattributes +1 -0
app.py +18 -40
indexed_docs.json +3 -8

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ from bs4 import BeautifulSoup
 import json
 import os
 import time
 import traceback
-import psycopg2
 from dotenv import load_dotenv
 import warnings
 from fastapi import FastAPI, HTTPException
@@ -56,46 +56,23 @@ class BatchDocResponse(BaseModel):
 class TsgDocFinder:
     def __init__(self):
         self.main_ftp_url = "https://www.3gpp.org/ftp"
-        self.host = os.environ.get("PGSQL_HOST")
-        self.port = os.environ.get("PGSQL_PORT")
-        self.user = os.environ.get("PGSQL_USER")
-        self.password = os.environ.get("PGSQL_PASSWORD")
-        self.database = os.environ.get("PGSQL_DATABASE")
-        self.conn = self.connect()
-        self.indexer = self.load_indexer()
-    def connect(self):
-        """Établit une connexion à la base de données PostgreSQL"""
-        try:
-            self.conn = psycopg2.connect(
-                host=self.host,
-                port=self.port,
-                user=self.user,
-                password=self.password,
-                dbname=self.database
-            )
-            return self.conn
-        except Exception as e:
-            print(f"Erreur de connexion à la base de données: {e}")
-            return None
     def load_indexer(self):
         """Load existing index if available"""
-        if not self.conn:
-            self.conn = self.connect()
-            if self.conn is None:
-                raise HTTPException(status_code=500, detail="Connexion à la base de donnée impossible")
-        cursor = self.conn.cursor()
-        try:
-            cursor.execute("SELECT doc_id, url FROM document_position")
-            rows = cursor.fetchall()
-            doc = {doc_id: url for doc_id, url in rows}
-        except:
-            raise HTTPException(status_code=500, detail="Erreur lors de la récupération")
-        finally:
-            cursor.close()
-        return dict(sorted(doc.items()))
     def get_workgroup(self, doc):
         main_tsg = "tsg_ct" if doc[0] == "C" else "tsg_sa" if doc[0] == "S" else None
@@ -179,6 +156,7 @@ class TsgDocFinder:
                         if doc in file.lower() or original_id in file:
                             doc_url = f"{zip_url}/{file}"
                             self.indexer[original_id] = doc_url
                             return doc_url
         return f"Document {doc_id} not found"
@@ -253,9 +231,9 @@ def find_documents_batch(request: BatchDocRequest):
     missing = []
     for doc_id in request.doc_ids:
-        finder = TsgDocFinder() if doc_id[0].isalpha() else SpecDocFinder()
         result = finder.search_document(doc_id)
-        if "not found" not in result and "Could not" not in result:
             results[doc_id] = result
         else:
             missing.append(doc_id)

 import json
 import os
 import time
+from datetime import datetime
 import traceback
 from dotenv import load_dotenv
 import warnings
 from fastapi import FastAPI, HTTPException
 class TsgDocFinder:
     def __init__(self):
         self.main_ftp_url = "https://www.3gpp.org/ftp"
+        self.indexer_file = "indexed_docs.json"
+        self.indexer, self.last_indexer_date = self.load_indexer()
     def load_indexer(self):
         """Load existing index if available"""
+        if os.path.exists(self.indexer_file):
+            with open(self.indexer_file, "r", encoding="utf-8") as f:
+                x = json.load(f)
+                return x["docs"], x["last_indexed_date"]
+        return {}, None
+    def save_indexer(self):
+        """Save the updated index"""
+        with open(self.indexer_file, "w", encoding="utf-8") as f:
+            today = datetime.today()
+            output = {"docs": self.indexer, "last_indexed_date": today.strftime("%d/%m/%Y-%H:%M:%S")}
+            json.dump(output, f, indent=4, ensure_ascii=False)
     def get_workgroup(self, doc):
         main_tsg = "tsg_ct" if doc[0] == "C" else "tsg_sa" if doc[0] == "S" else None
                         if doc in file.lower() or original_id in file:
                             doc_url = f"{zip_url}/{file}"
                             self.indexer[original_id] = doc_url
+                            self.save_indexer()
                             return doc_url
         return f"Document {doc_id} not found"
     missing = []
     for doc_id in request.doc_ids:
+        finder = finder_tsg if request.doc_id[0].isalpha() else finder_spec
         result = finder.search_document(doc_id)
+        if "not found" not in result and "Could not" not in result and "Unable" not in result:
             results[doc_id] = result
         else:
             missing.append(doc_id)

indexed_docs.json CHANGED Viewed

@@ -1,8 +1,3 @@
-{
-    "S4-110084": "https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_62/Docs/S4-110084.zip",
-    "SP-000182": "https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_08/Docs/ZIP/SP-000182.zip",
-    "SP-000183": "https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_08/Docs/ZIP/SP-000183.zip",
-    "SP-000184": "https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_08/Docs/ZIP/SP-000184.zip",
-    "SP-000185": "https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_08/Docs/ZIP/SP-000185.zip",
-    "SP-090017": "https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_43/Docs/SP-090017.zip"
-}

+version https://git-lfs.github.com/spec/v1
+oid sha256:27de1ffcbc0301e7eab274481928eff7dcf327b2943cbd47b37b531785c433b1
+size 68386245