dmitrynovikov7211 commited on
Commit
9478da6
·
verified ·
1 Parent(s): 5832897

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -254,9 +254,7 @@ async def process_document(
254
  table_enable=table_enable,
255
  language=language
256
  )
257
- source_1 = txt_content
258
 
259
- # Source 4: PyMuPDF для таблиц
260
  def extract_text_pymupdf(pdf_path):
261
  try:
262
  doc = fitz.open(pdf_path)
@@ -323,21 +321,22 @@ async def process_document(
323
 
324
  source_4_text, source_4_tables = extract_text_pymupdf(temp_path)
325
 
326
- validated_sources = {
 
 
 
 
 
 
 
 
 
 
327
  "sources": {
328
- "magic_pdf": {
329
- "text": source_1
330
- },
331
- "pymupdf": {
332
- "text": source_4_text,
333
- "tables": source_4_tables
334
- },
335
- "combined": f"{source_1}\n\n### MAGIC_PDF_DATA ###\n{source_1}\n\n### PYMUPDF_DATA ###\n{source_4_tables}"
336
  }
337
- }
338
-
339
- return JSONResponse({
340
- "sources": validated_sources
341
  })
342
 
343
  except Exception as e:
 
254
  table_enable=table_enable,
255
  language=language
256
  )
 
257
 
 
258
  def extract_text_pymupdf(pdf_path):
259
  try:
260
  doc = fitz.open(pdf_path)
 
321
 
322
  source_4_text, source_4_tables = extract_text_pymupdf(temp_path)
323
 
324
+ # Форматируем в том виде, в котором ожидает DeepSeek
325
+ combined_text = f"""
326
+ ### MAGIC_PDF_DATA ###
327
+ {txt_content}
328
+
329
+ ### PYMUPDF_DATA ###
330
+ {source_4_tables}
331
+ """
332
+
333
+ # Возвращаем в формате, который ожидает DeepSeek
334
+ return JSONResponse({
335
  "sources": {
336
+ "combined": {
337
+ "text": combined_text
338
+ }
 
 
 
 
 
339
  }
 
 
 
 
340
  })
341
 
342
  except Exception as e: