Spaces:

MsChabane
/

SmartDoc

Running

App Files Files Community

MsChabane commited on Apr 9

Commit

b16dcae

verified ·

1 Parent(s): 0b9c1c4

update main add max size

Browse files

Files changed (1) hide show

main.py +19 -13

main.py CHANGED Viewed

@@ -71,6 +71,7 @@ app.add_middleware(
     allow_methods=["*"],
     allow_headers=["*"],
 )
 app.mount("/static",StaticFiles(directory='static'),'static')
 templates = Jinja2Templates(directory='templates')
@@ -108,19 +109,24 @@ def interpret(file_img:UploadFile=File(...)):
     return  JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
 @app.post("/summerize")
-def summerzation(file:UploadFile=File(...)):
     try:
         extension = file.filename.split(".")[-1]
         if extension == "pdf":
-            text = get_text_from_PDF(file.file)
         elif extension == "docx":
-            text = get_text_from_DOC(file.file)
         elif extension == "pptx":
-            text = get_text_from_PPT(file.file)
         elif extension == "xlsx":
-            text = get_text_from_EXCEL(file.file)
-        else:
-            return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
         if not text.strip():
             return JSONResponse(content={'error':'File is emplty'},status_code=400)
@@ -146,7 +152,7 @@ def plot(user_need:str,file:UploadFile=File(...)):
         if extension not in Supported_extensions:
             return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
-        df = pd.read_excel(io=file.file)
         message = f"""
 You are a helpful assistant that helps users write Python code.
@@ -215,8 +221,8 @@ def get_text_from_PDF(file_content):
       text += page.get_text()
   return text
-def get_text_from_PPT(file):
-  prs = Presentation(file)
   text = ""
   for slide in prs.slides:
       for shape in slide.shapes:
@@ -225,14 +231,14 @@ def get_text_from_PPT(file):
   return text
-def get_text_from_DOC(file):
-  doc = Document(file)
   text = ""
   for paragraph in doc.paragraphs:
       text += paragraph.text
   return text
 def get_text_from_EXCEL(file):
-  df = pd.read_excel(io=file)
   text = df.to_string()
   return text

     allow_methods=["*"],
     allow_headers=["*"],
 )
+MAX_SIZE= 1 * 1024 *1024
 app.mount("/static",StaticFiles(directory='static'),'static')
 templates = Jinja2Templates(directory='templates')
     return  JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
 @app.post("/summerize")
+async def summerzation(file:UploadFile=File(...)):
     try:
         extension = file.filename.split(".")[-1]
+        supported_ext=["pdf","xlxs","docx","ppt"]
+        if extension not in supported_ext :
+            return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
+        file_bytes = await file.file.read()
+        if len(file_bytes) >  MAX_SIZE :
+            return JSONResponse(content={"error": "too large file "},status_code=400)
         if extension == "pdf":
+            text = get_text_from_PDF(file_bytes)
         elif extension == "docx":
+            text = get_text_from_DOC(file_bytes)
         elif extension == "pptx":
+            text = get_text_from_PPT(file_bytes)
         elif extension == "xlsx":
+            text = get_text_from_EXCEL(file_bytes)
         if not text.strip():
             return JSONResponse(content={'error':'File is emplty'},status_code=400)
         if extension not in Supported_extensions:
             return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
+        df = pd.read_excel(io= file.file)
         message = f"""
 You are a helpful assistant that helps users write Python code.
       text += page.get_text()
   return text
+def get_text_from_PPT(file_content):
+  prs = Presentation(io.BytesIO(file_content))
   text = ""
   for slide in prs.slides:
       for shape in slide.shapes:
   return text
+def get_text_from_DOC(file_content):
+  doc = Document(io.BytesIO(file_content))
   text = ""
   for paragraph in doc.paragraphs:
       text += paragraph.text
   return text
 def get_text_from_EXCEL(file):
+  df = pd.read_excel(io=io.BytesIO(file))
   text = df.to_string()
   return text