WenqingZhang commited on
Commit
b80b2b5
·
verified ·
1 Parent(s): fcbbfa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -9
app.py CHANGED
@@ -42,19 +42,44 @@ print("Loading the transformer model...")
42
  transformer_vectorizer = TransformerVectorizer()
43
  vectorizer = TfidfVectorizer()
44
  def process_input(input_type, user_input, uploaded_file):
45
- print('ooooocr')
46
  if input_type == "File Upload" and uploaded_file is not None:
47
-
48
- with open(uploaded_file.name, "rb") as f:
49
- image = f.read()
50
- results = reader.readtext(image)
51
-
52
- extracted_text = ' '.join([text[1] for text in results])
53
- print("text:")
54
- print(extracted_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  return extracted_text
 
56
  elif input_type == "Text Input":
57
  return user_input
 
 
 
58
  def toggle_visibility(input_type):
59
  user_input_visible = input_type == "Text Input"
60
  file_upload_visible = input_type == "File Upload"
 
42
  transformer_vectorizer = TransformerVectorizer()
43
  vectorizer = TfidfVectorizer()
44
  def process_input(input_type, user_input, uploaded_file):
 
45
  if input_type == "File Upload" and uploaded_file is not None:
46
+ file_ext = os.path.splitext(uploaded_file.name)[1].lower()
47
+ extracted_text = ""
48
+
49
+ if file_ext in ['.jpg', '.jpeg', '.png']:
50
+ # 处理图片文件
51
+ results = reader.readtext(uploaded_file.name)
52
+ extracted_text = ' '.join([text[1] for text in results])
53
+ print("从图片提取的文本:")
54
+ print(extracted_text)
55
+
56
+ elif file_ext == '.txt':
57
+ # 处理TXT文件
58
+ with open(uploaded_file.name, 'r', encoding='utf-8') as f:
59
+ extracted_text = f.read()
60
+ print("从TXT文件提取的文本:")
61
+ print(extracted_text)
62
+
63
+ elif file_ext == '.pdf':
64
+ # 处理PDF文件
65
+ with open(uploaded_file.name, 'rb') as f:
66
+ reader_pdf = PyPDF2.PdfReader(f)
67
+ for page_num in range(len(reader_pdf.pages)):
68
+ page = reader_pdf.pages[page_num]
69
+ extracted_text += page.extract_text() + "\n"
70
+ print("从PDF文件提取的文本:")
71
+ print(extracted_text)
72
+
73
+ else:
74
+ return "不支持的文件类型。请上传 .jpg, .jpeg, .png, .txt 或 .pdf 文件。"
75
+
76
  return extracted_text
77
+
78
  elif input_type == "Text Input":
79
  return user_input
80
+ else:
81
+ return "无效的输入类型或未上传文件。
82
+
83
  def toggle_visibility(input_type):
84
  user_input_visible = input_type == "Text Input"
85
  file_upload_visible = input_type == "File Upload"