document-summarization

Runtime error

App Files Files Community

MeetJivani commited on Sep 25, 2023

Commit

a5ee254

1 Parent(s): 970a7e9

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -31

app.py CHANGED Viewed

@@ -350,47 +350,90 @@ def load_single_example_text(
     return text
-def load_uploaded_file(file_obj, max_pages: int = 20, lower: bool = False) -> str:
     """
-    load_uploaded_file - loads a file uploaded by the user
-    :param file_obj (POTENTIALLY list): Gradio file object inside a list
     :param int max_pages: the maximum number of pages to load from a PDF
     :param bool lower: whether to lowercase the text
-    :return str: the text of the file
     """
     global ocr_model
     logger = logging.getLogger(__name__)
-    # check if mysterious file object is a list
-    if isinstance(file_obj, list):
-        file_obj = file_obj[0]
-    file_path = Path(file_obj.name)
-    try:
-        logger.info(f"Loading file:\t{file_path}")
-        if file_path.suffix in [".txt", ".md"]:
-            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
-                raw_text = f.read()
-            text = clean(raw_text, lower=lower)
-        elif file_path.suffix == ".pdf":
-            logger.info(f"loading a PDF file: {file_path.name}")
-            max_pages = int(os.environ.get("APP_OCR_MAX_PAGES", max_pages))
-            logger.info(f"max_pages is: {max_pages}. Starting conversion...")
-            conversion_stats = convert_PDF_to_Text(
-                file_path,
-                ocr_model=ocr_model,
-                max_pages=max_pages,
-            )
-            text = conversion_stats["converted_text"]
-        else:
-            logger.error(f"Unknown file type:\t{file_path.suffix}")
-            text = "ERROR - check file - unknown file type. PDF, TXT, and MD are supported."
-        return text
-    except Exception as e:
-        logger.error(f"Trying to load file:\t{file_path},\nerror:\t{e}")
-        return f"Error: Could not read file {file_path.name}. Make sure it is a PDF, TXT, or MD file."
 def parse_args():
     """arguments for the command line interface"""
     parser = argparse.ArgumentParser(

     return text
+# def load_uploaded_file(file_obj, max_pages: int = 20, lower: bool = False) -> str:
+#     """
+#     load_uploaded_file - loads a file uploaded by the user
+#     :param file_obj (POTENTIALLY list): Gradio file object inside a list
+#     :param int max_pages: the maximum number of pages to load from a PDF
+#     :param bool lower: whether to lowercase the text
+#     :return str: the text of the file
+#     """
+#     global ocr_model
+#     logger = logging.getLogger(__name__)
+#     # check if mysterious file object is a list
+#     if isinstance(file_obj, list):
+#         file_obj = file_obj[0]
+#     file_path = Path(file_obj.name)
+#     try:
+#         logger.info(f"Loading file:\t{file_path}")
+#         if file_path.suffix in [".txt", ".md"]:
+#             with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+#                 raw_text = f.read()
+#             text = clean(raw_text, lower=lower)
+#         elif file_path.suffix == ".pdf":
+#             logger.info(f"loading a PDF file: {file_path.name}")
+#             max_pages = int(os.environ.get("APP_OCR_MAX_PAGES", max_pages))
+#             logger.info(f"max_pages is: {max_pages}. Starting conversion...")
+#             conversion_stats = convert_PDF_to_Text(
+#                 file_path,
+#                 ocr_model=ocr_model,
+#                 max_pages=max_pages,
+#             )
+#             text = conversion_stats["converted_text"]
+#         else:
+#             logger.error(f"Unknown file type:\t{file_path.suffix}")
+#             text = "ERROR - check file - unknown file type. PDF, TXT, and MD are supported."
+#         return text
+#     except Exception as e:
+#         logger.error(f"Trying to load file:\t{file_path},\nerror:\t{e}")
+#         return f"Error: Could not read file {file_path.name}. Make sure it is a PDF, TXT, or MD file."
+def load_uploaded_files(file_objs, max_pages: int = 20, lower: bool = False) -> str:
     """
+    load_uploaded_files - loads multiple files uploaded by the user and concatenates their contents
+    :param file_objs (list): List of Gradio file objects
     :param int max_pages: the maximum number of pages to load from a PDF
     :param bool lower: whether to lowercase the text
+    :return str: the concatenated text of all the files
     """
     global ocr_model
     logger = logging.getLogger(__name__)
+    concatenated_text = ""  # Initialize an empty string to concatenate text
+    try:
+        for file_obj in file_objs:
+            file_path = Path(file_obj.name)
+            logger.info(f"Loading file:\t{file_path}")
+            if file_path.suffix in [".txt", ".md"]:
+                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                    raw_text = f.read()
+                text = clean(raw_text, lower=lower)
+            elif file_path.suffix == ".pdf":
+                logger.info(f"loading a PDF file: {file_path.name}")
+                max_pages = int(os.environ.get("APP_OCR_MAX_PAGES", max_pages))
+                logger.info(f"max_pages is: {max_pages}. Starting conversion...")
+                conversion_stats = convert_PDF_to_Text(
+                    file_path,
+                    ocr_model=ocr_model,
+                    max_pages=max_pages,
+                )
+                text = conversion_stats["converted_text"]
+            else:
+                logger.error(f"Unknown file type:\t{file_path.suffix}")
+                text = f"ERROR - check file - unknown file type. PDF, TXT, and MD are supported."
+            concatenated_text += text  # Concatenate text from each file
+        return concatenated_text
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return f"Error: Could not read one or more files. Make sure they are PDF, TXT, or MD files."
 def parse_args():
     """arguments for the command line interface"""
     parser = argparse.ArgumentParser(