MsChabane commited on
Commit
b16dcae
·
verified ·
1 Parent(s): 0b9c1c4

update main add max size

Browse files
Files changed (1) hide show
  1. main.py +19 -13
main.py CHANGED
@@ -71,6 +71,7 @@ app.add_middleware(
71
  allow_methods=["*"],
72
  allow_headers=["*"],
73
  )
 
74
 
75
  app.mount("/static",StaticFiles(directory='static'),'static')
76
  templates = Jinja2Templates(directory='templates')
@@ -108,19 +109,24 @@ def interpret(file_img:UploadFile=File(...)):
108
  return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
109
 
110
  @app.post("/summerize")
111
- def summerzation(file:UploadFile=File(...)):
112
  try:
113
  extension = file.filename.split(".")[-1]
 
 
 
 
 
 
114
  if extension == "pdf":
115
- text = get_text_from_PDF(file.file)
116
  elif extension == "docx":
117
- text = get_text_from_DOC(file.file)
118
  elif extension == "pptx":
119
- text = get_text_from_PPT(file.file)
120
  elif extension == "xlsx":
121
- text = get_text_from_EXCEL(file.file)
122
- else:
123
- return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
124
 
125
  if not text.strip():
126
  return JSONResponse(content={'error':'File is emplty'},status_code=400)
@@ -146,7 +152,7 @@ def plot(user_need:str,file:UploadFile=File(...)):
146
  if extension not in Supported_extensions:
147
  return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
148
 
149
- df = pd.read_excel(io=file.file)
150
 
151
  message = f"""
152
  You are a helpful assistant that helps users write Python code.
@@ -215,8 +221,8 @@ def get_text_from_PDF(file_content):
215
  text += page.get_text()
216
  return text
217
 
218
- def get_text_from_PPT(file):
219
- prs = Presentation(file)
220
  text = ""
221
  for slide in prs.slides:
222
  for shape in slide.shapes:
@@ -225,14 +231,14 @@ def get_text_from_PPT(file):
225
  return text
226
 
227
 
228
- def get_text_from_DOC(file):
229
- doc = Document(file)
230
  text = ""
231
  for paragraph in doc.paragraphs:
232
  text += paragraph.text
233
  return text
234
 
235
  def get_text_from_EXCEL(file):
236
- df = pd.read_excel(io=file)
237
  text = df.to_string()
238
  return text
 
71
  allow_methods=["*"],
72
  allow_headers=["*"],
73
  )
74
+ MAX_SIZE= 1 * 1024 *1024
75
 
76
  app.mount("/static",StaticFiles(directory='static'),'static')
77
  templates = Jinja2Templates(directory='templates')
 
109
  return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
110
 
111
  @app.post("/summerize")
112
+ async def summerzation(file:UploadFile=File(...)):
113
  try:
114
  extension = file.filename.split(".")[-1]
115
+ supported_ext=["pdf","xlxs","docx","ppt"]
116
+ if extension not in supported_ext :
117
+ return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
118
+ file_bytes = await file.file.read()
119
+ if len(file_bytes) > MAX_SIZE :
120
+ return JSONResponse(content={"error": "too large file "},status_code=400)
121
  if extension == "pdf":
122
+ text = get_text_from_PDF(file_bytes)
123
  elif extension == "docx":
124
+ text = get_text_from_DOC(file_bytes)
125
  elif extension == "pptx":
126
+ text = get_text_from_PPT(file_bytes)
127
  elif extension == "xlsx":
128
+ text = get_text_from_EXCEL(file_bytes)
129
+
 
130
 
131
  if not text.strip():
132
  return JSONResponse(content={'error':'File is emplty'},status_code=400)
 
152
  if extension not in Supported_extensions:
153
  return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
154
 
155
+ df = pd.read_excel(io= file.file)
156
 
157
  message = f"""
158
  You are a helpful assistant that helps users write Python code.
 
221
  text += page.get_text()
222
  return text
223
 
224
+ def get_text_from_PPT(file_content):
225
+ prs = Presentation(io.BytesIO(file_content))
226
  text = ""
227
  for slide in prs.slides:
228
  for shape in slide.shapes:
 
231
  return text
232
 
233
 
234
+ def get_text_from_DOC(file_content):
235
+ doc = Document(io.BytesIO(file_content))
236
  text = ""
237
  for paragraph in doc.paragraphs:
238
  text += paragraph.text
239
  return text
240
 
241
  def get_text_from_EXCEL(file):
242
+ df = pd.read_excel(io=io.BytesIO(file))
243
  text = df.to_string()
244
  return text