Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -254,9 +254,7 @@ async def process_document(
|
|
254 |
table_enable=table_enable,
|
255 |
language=language
|
256 |
)
|
257 |
-
source_1 = txt_content
|
258 |
|
259 |
-
# Source 4: PyMuPDF для таблиц
|
260 |
def extract_text_pymupdf(pdf_path):
|
261 |
try:
|
262 |
doc = fitz.open(pdf_path)
|
@@ -323,21 +321,22 @@ async def process_document(
|
|
323 |
|
324 |
source_4_text, source_4_tables = extract_text_pymupdf(temp_path)
|
325 |
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
"sources": {
|
328 |
-
"
|
329 |
-
"text":
|
330 |
-
}
|
331 |
-
"pymupdf": {
|
332 |
-
"text": source_4_text,
|
333 |
-
"tables": source_4_tables
|
334 |
-
},
|
335 |
-
"combined": f"{source_1}\n\n### MAGIC_PDF_DATA ###\n{source_1}\n\n### PYMUPDF_DATA ###\n{source_4_tables}"
|
336 |
}
|
337 |
-
}
|
338 |
-
|
339 |
-
return JSONResponse({
|
340 |
-
"sources": validated_sources
|
341 |
})
|
342 |
|
343 |
except Exception as e:
|
|
|
254 |
table_enable=table_enable,
|
255 |
language=language
|
256 |
)
|
|
|
257 |
|
|
|
258 |
def extract_text_pymupdf(pdf_path):
|
259 |
try:
|
260 |
doc = fitz.open(pdf_path)
|
|
|
321 |
|
322 |
source_4_text, source_4_tables = extract_text_pymupdf(temp_path)
|
323 |
|
324 |
+
# Форматируем в том виде, в котором ожидает DeepSeek
|
325 |
+
combined_text = f"""
|
326 |
+
### MAGIC_PDF_DATA ###
|
327 |
+
{txt_content}
|
328 |
+
|
329 |
+
### PYMUPDF_DATA ###
|
330 |
+
{source_4_tables}
|
331 |
+
"""
|
332 |
+
|
333 |
+
# Возвращаем в формате, который ожидает DeepSeek
|
334 |
+
return JSONResponse({
|
335 |
"sources": {
|
336 |
+
"combined": {
|
337 |
+
"text": combined_text
|
338 |
+
}
|
|
|
|
|
|
|
|
|
|
|
339 |
}
|
|
|
|
|
|
|
|
|
340 |
})
|
341 |
|
342 |
except Exception as e:
|