Spaces:

OrganizedProgrammers
/

3GPPSpecSplitter

Sleeping

om4r932 commited on May 30

Commit

a7f45db

1 Parent(s): 00d0e4e

Update regex

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,6 +34,8 @@ app.add_middleware(
     allow_headers=["*"],
 )
 def get_text(specification: str, version: str):
     """Récupère les bytes du PDF à partir d'une spécification et d'une version."""
     doc_id = specification
@@ -264,7 +266,7 @@ def get_file_from_spec_id_version(req: SpecRequest) -> Dict[str, str]:
     chapters = []
     for line in toc_brut:
         x = line.split("\t")
-        m = re.search(r"^(\d+(?:\.\d+)*)\t[\ \S]+$", line)
         if m and any(line in c for c in text[forewords[0]:forewords[1]]):
             chapters.append(line)
             print(line)
@@ -306,7 +308,7 @@ def get_file_from_spec_id_version(req: SpecRequest) -> Dict:
     chapters = []
     for line in toc_brut:
         x = line.split("\t")
-        m = re.search(r"^(\d+(?:\.\d+)*)\t[\ \S]+$", line)
         if m and any(line in c for c in text[forewords[0]:forewords[1]]):
             chapters.append(line)
             print(line)

     allow_headers=["*"],
 )
+regex = r"^(\d+[a-z]?(?:\.\d+)*)\t[\ \S]+$"
 def get_text(specification: str, version: str):
     """Récupère les bytes du PDF à partir d'une spécification et d'une version."""
     doc_id = specification
     chapters = []
     for line in toc_brut:
         x = line.split("\t")
+        m = re.search(regex, line)
         if m and any(line in c for c in text[forewords[0]:forewords[1]]):
             chapters.append(line)
             print(line)
     chapters = []
     for line in toc_brut:
         x = line.split("\t")
+        m = re.search(regex, line)
         if m and any(line in c for c in text[forewords[0]:forewords[1]]):
             chapters.append(line)
             print(line)