Spaces:
Running
Running
update main add max size
Browse files
main.py
CHANGED
@@ -71,6 +71,7 @@ app.add_middleware(
|
|
71 |
allow_methods=["*"],
|
72 |
allow_headers=["*"],
|
73 |
)
|
|
|
74 |
|
75 |
app.mount("/static",StaticFiles(directory='static'),'static')
|
76 |
templates = Jinja2Templates(directory='templates')
|
@@ -108,19 +109,24 @@ def interpret(file_img:UploadFile=File(...)):
|
|
108 |
return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
|
109 |
|
110 |
@app.post("/summerize")
|
111 |
-
def summerzation(file:UploadFile=File(...)):
|
112 |
try:
|
113 |
extension = file.filename.split(".")[-1]
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
if extension == "pdf":
|
115 |
-
text = get_text_from_PDF(
|
116 |
elif extension == "docx":
|
117 |
-
text = get_text_from_DOC(
|
118 |
elif extension == "pptx":
|
119 |
-
text = get_text_from_PPT(
|
120 |
elif extension == "xlsx":
|
121 |
-
text = get_text_from_EXCEL(
|
122 |
-
|
123 |
-
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
124 |
|
125 |
if not text.strip():
|
126 |
return JSONResponse(content={'error':'File is emplty'},status_code=400)
|
@@ -146,7 +152,7 @@ def plot(user_need:str,file:UploadFile=File(...)):
|
|
146 |
if extension not in Supported_extensions:
|
147 |
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
148 |
|
149 |
-
df = pd.read_excel(io=file.file)
|
150 |
|
151 |
message = f"""
|
152 |
You are a helpful assistant that helps users write Python code.
|
@@ -215,8 +221,8 @@ def get_text_from_PDF(file_content):
|
|
215 |
text += page.get_text()
|
216 |
return text
|
217 |
|
218 |
-
def get_text_from_PPT(
|
219 |
-
prs = Presentation(
|
220 |
text = ""
|
221 |
for slide in prs.slides:
|
222 |
for shape in slide.shapes:
|
@@ -225,14 +231,14 @@ def get_text_from_PPT(file):
|
|
225 |
return text
|
226 |
|
227 |
|
228 |
-
def get_text_from_DOC(
|
229 |
-
doc = Document(
|
230 |
text = ""
|
231 |
for paragraph in doc.paragraphs:
|
232 |
text += paragraph.text
|
233 |
return text
|
234 |
|
235 |
def get_text_from_EXCEL(file):
|
236 |
-
df = pd.read_excel(io=file)
|
237 |
text = df.to_string()
|
238 |
return text
|
|
|
71 |
allow_methods=["*"],
|
72 |
allow_headers=["*"],
|
73 |
)
|
74 |
+
MAX_SIZE= 1 * 1024 *1024
|
75 |
|
76 |
app.mount("/static",StaticFiles(directory='static'),'static')
|
77 |
templates = Jinja2Templates(directory='templates')
|
|
|
109 |
return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
|
110 |
|
111 |
@app.post("/summerize")
|
112 |
+
async def summerzation(file:UploadFile=File(...)):
|
113 |
try:
|
114 |
extension = file.filename.split(".")[-1]
|
115 |
+
supported_ext=["pdf","xlxs","docx","ppt"]
|
116 |
+
if extension not in supported_ext :
|
117 |
+
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
118 |
+
file_bytes = await file.file.read()
|
119 |
+
if len(file_bytes) > MAX_SIZE :
|
120 |
+
return JSONResponse(content={"error": "too large file "},status_code=400)
|
121 |
if extension == "pdf":
|
122 |
+
text = get_text_from_PDF(file_bytes)
|
123 |
elif extension == "docx":
|
124 |
+
text = get_text_from_DOC(file_bytes)
|
125 |
elif extension == "pptx":
|
126 |
+
text = get_text_from_PPT(file_bytes)
|
127 |
elif extension == "xlsx":
|
128 |
+
text = get_text_from_EXCEL(file_bytes)
|
129 |
+
|
|
|
130 |
|
131 |
if not text.strip():
|
132 |
return JSONResponse(content={'error':'File is emplty'},status_code=400)
|
|
|
152 |
if extension not in Supported_extensions:
|
153 |
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
154 |
|
155 |
+
df = pd.read_excel(io= file.file)
|
156 |
|
157 |
message = f"""
|
158 |
You are a helpful assistant that helps users write Python code.
|
|
|
221 |
text += page.get_text()
|
222 |
return text
|
223 |
|
224 |
+
def get_text_from_PPT(file_content):
|
225 |
+
prs = Presentation(io.BytesIO(file_content))
|
226 |
text = ""
|
227 |
for slide in prs.slides:
|
228 |
for shape in slide.shapes:
|
|
|
231 |
return text
|
232 |
|
233 |
|
234 |
+
def get_text_from_DOC(file_content):
|
235 |
+
doc = Document(io.BytesIO(file_content))
|
236 |
text = ""
|
237 |
for paragraph in doc.paragraphs:
|
238 |
text += paragraph.text
|
239 |
return text
|
240 |
|
241 |
def get_text_from_EXCEL(file):
|
242 |
+
df = pd.read_excel(io=io.BytesIO(file))
|
243 |
text = df.to_string()
|
244 |
return text
|