Djacon commited on
Commit
4a383f5
·
1 Parent(s): 0d4ebee

Add File support (.pdf | .txt)

Browse files
inference.py CHANGED
@@ -6,8 +6,8 @@ path = './model/'
6
  model = BertForSequenceClassification.from_pretrained(path)
7
  tokenizer = AutoTokenizer.from_pretrained(path)
8
 
9
- LABELS = ['радость', 'интерес', 'удивление', 'печаль', 'гнев', 'отвращение',
10
- 'страх', 'вина', 'нейтрально']
11
 
12
 
13
  # Probabilistic prediction of emotion in a text
 
6
  model = BertForSequenceClassification.from_pretrained(path)
7
  tokenizer = AutoTokenizer.from_pretrained(path)
8
 
9
+ LABELS = ['Joy', 'Interest', 'Surprise', 'Sadness', 'Anger', 'Disgust', 'Fear',
10
+ 'Guilt', 'Neutral']
11
 
12
 
13
  # Probabilistic prediction of emotion in a text
static/emotion_detection.html CHANGED
@@ -223,7 +223,7 @@
223
  <select id="sum-type"
224
  class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
225
  <option value="sum-text">Identify Text</option>
226
- <option value="sum-file">Identify PDF</option>
227
  </select>
228
  </div>
229
 
@@ -244,8 +244,8 @@
244
  d="M16.88 9.1A4 4 0 0 1 16 17H5a5 5 0 0 1-1-9.9V7a3 3 0 0 1 4.52-2.59A4.98 4.98 0 0 1 17 8c0 .38-.04.74-.12 1.1zM11 11h3l-4-4-4 4h3v3h2v-3z">
245
  </path>
246
  </svg>
247
- <span class="mt-4">Select PDF File</span>
248
- <input id="sum-file-input" type="file" class="hidden">
249
  </label>
250
  </div>
251
 
@@ -298,6 +298,7 @@
298
  </div>
299
  </div>
300
 
 
301
  <script src="js/detection.js"></script>
302
  </body>
303
  </html>
 
223
  <select id="sum-type"
224
  class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
225
  <option value="sum-text">Identify Text</option>
226
+ <option value="sum-file">Identify File</option>
227
  </select>
228
  </div>
229
 
 
244
  d="M16.88 9.1A4 4 0 0 1 16 17H5a5 5 0 0 1-1-9.9V7a3 3 0 0 1 4.52-2.59A4.98 4.98 0 0 1 17 8c0 .38-.04.74-.12 1.1zM11 11h3l-4-4-4 4h3v3h2v-3z">
245
  </path>
246
  </svg>
247
+ <span class="mt-4">Select File</span>
248
+ <input id="sum-file-input" type="file" accept=".pdf, .txt" class="hidden">
249
  </label>
250
  </div>
251
 
 
298
  </div>
299
  </div>
300
 
301
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.943/pdf.min.js"></script>
302
  <script src="js/detection.js"></script>
303
  </body>
304
  </html>
static/js/detection.js CHANGED
@@ -7,6 +7,7 @@ const sumVideo = document.getElementById("sum-video-div");
7
  // Form Data
8
  const selectOption = document.getElementById("sum-type");
9
  const sumTextInput = document.getElementById("sum-text-input");
 
10
  const sumVideoInput = document.getElementById("sum-video-input");
11
 
12
  // Error Output Section
@@ -16,8 +17,9 @@ const sumError = document.getElementById("sum-err");
16
  const extractText = document.getElementById("extracted-text");
17
  const summaryText = document.getElementById("summarized-text");
18
 
 
 
19
 
20
- // In progress...
21
  function _summarize(text) {
22
  var xhr = new XMLHttpRequest();
23
  xhr.open("POST", "/predict_emotion", true);
@@ -37,9 +39,40 @@ function _summarize(text) {
37
  return;
38
  }
39
 
40
- // In progress...
41
- function _extractPDF(file) {
42
- return file;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
 
45
  // In progress...
@@ -67,29 +100,37 @@ function summarize(event) {
67
  }
68
  break;
69
  case 'sum-file':
70
- sumError.innerText = 'This option is not supported, yet';
71
- sumError.classList.remove('hidden');
72
- return;
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
 
75
  sumError.classList.add('hidden');
76
 
77
  // Here we can finally summarize data
78
- let extractedText = '';
79
  switch (selectOption.value) {
80
  case 'sum-text':
81
- extractedText = sumTextInput.value;
82
  break;
83
  case 'sum-file':
84
- extractedText = _extractPDF(sumFileInput);
85
  break;
86
  case 'sum-video':
87
- extractedText = _getCaptions(sumVideoInput.value);
88
  break;
89
  }
90
-
91
- extractText.value = extractedText;
92
- _summarize(extractedText);
93
  }
94
 
95
 
 
7
  // Form Data
8
  const selectOption = document.getElementById("sum-type");
9
  const sumTextInput = document.getElementById("sum-text-input");
10
+ const sumFileInput = document.getElementById("sum-file-input");
11
  const sumVideoInput = document.getElementById("sum-video-input");
12
 
13
  // Error Output Section
 
17
  const extractText = document.getElementById("extracted-text");
18
  const summaryText = document.getElementById("summarized-text");
19
 
20
+ const MAX_SIZE = 5000;
21
+
22
 
 
23
  function _summarize(text) {
24
  var xhr = new XMLHttpRequest();
25
  xhr.open("POST", "/predict_emotion", true);
 
39
  return;
40
  }
41
 
42
+ function _extractFile() {
43
+ const file = sumFileInput.files[0];
44
+ if (file.type === 'text/plain') {
45
+ const reader = new FileReader();
46
+ reader.onload = function() {
47
+ extractText.value = reader.result.slice(0, MAX_SIZE);
48
+ };
49
+ reader.readAsText(file, 'CP1251');
50
+ return;
51
+ } else if (file.type === 'application/pdf') {
52
+ extractText.value = '';
53
+ const reader = new FileReader();
54
+ reader.onload = function (e) {
55
+ const pdfData = e.target.result;
56
+ pdfjsLib.getDocument(pdfData).promise.then(function (pdfDocument) {
57
+ for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++) {
58
+ pdfDocument.getPage(pageNum).then(function (pdfPage) {
59
+ pdfPage.getTextContent().then(function (textContent) {
60
+ let size = extractText.value.length;
61
+ let pageText = [];
62
+ for (const textItem of textContent.items) {
63
+ pageText.push(textItem.str);
64
+ size += textItem.str.length;
65
+ if (size > MAX_SIZE) break;
66
+ }
67
+ extractText.value += pageText.join(' ');
68
+ });
69
+ });
70
+ }
71
+ });
72
+ };
73
+ reader.readAsDataURL(file);
74
+ }
75
+ return;
76
  }
77
 
78
  // In progress...
 
100
  }
101
  break;
102
  case 'sum-file':
103
+ const allowedTypes = ['application/pdf', 'text/plain'];
104
+ const file = sumFileInput.files[0];
105
+
106
+ if (!file) {
107
+ sumError.innerText = 'There is no File';
108
+ sumError.classList.remove('hidden');
109
+ return;
110
+ }
111
+
112
+ if (!allowedTypes.includes(file.type)) {
113
+ sumError.innerText = 'Not supported type (Only `.pdf` or `.txt`)';
114
+ sumError.classList.remove('hidden');
115
+ return;
116
+ }
117
  }
118
 
119
  sumError.classList.add('hidden');
120
 
121
  // Here we can finally summarize data
 
122
  switch (selectOption.value) {
123
  case 'sum-text':
124
+ extractText.value = sumTextInput.value.slice(0, MAX_SIZE);
125
  break;
126
  case 'sum-file':
127
+ _extractFile();
128
  break;
129
  case 'sum-video':
130
+ _getCaptions(sumVideoInput.value);
131
  break;
132
  }
133
+ _summarize(extractText.value);
 
 
134
  }
135
 
136
 
static/js/summarizer.js CHANGED
@@ -7,6 +7,7 @@ const sumVideo = document.getElementById("sum-video-div");
7
  // Form Data
8
  const selectOption = document.getElementById("sum-type");
9
  const sumTextInput = document.getElementById("sum-text-input");
 
10
  const sumVideoInput = document.getElementById("sum-video-input");
11
 
12
  // Error Output Section
@@ -16,6 +17,8 @@ const sumError = document.getElementById("sum-err");
16
  const extractText = document.getElementById("extracted-text");
17
  const summaryText = document.getElementById("summarized-text");
18
 
 
 
19
 
20
  // In progress...
21
  function _summarize(text) {
@@ -35,9 +38,40 @@ function _summarize(text) {
35
  return;
36
  }
37
 
38
- // In progress...
39
- function _extractPDF(file) {
40
- return file;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
 
43
  // In progress...
@@ -65,29 +99,37 @@ function summarize(event) {
65
  }
66
  break;
67
  case 'sum-file':
68
- sumError.innerText = 'This option is not supported, yet';
69
- sumError.classList.remove('hidden');
70
- return;
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
 
73
  sumError.classList.add('hidden');
74
 
75
  // Here we can finally summarize data
76
- let extractedText = '';
77
  switch (selectOption.value) {
78
  case 'sum-text':
79
- extractedText = sumTextInput.value;
80
  break;
81
  case 'sum-file':
82
- extractedText = _extractPDF(sumFileInput);
83
  break;
84
  case 'sum-video':
85
- extractedText = _getCaptions(sumVideoInput.value);
86
  break;
87
  }
88
-
89
- extractText.value = extractedText;
90
- _summarize(extractedText);
91
  }
92
 
93
 
 
7
  // Form Data
8
  const selectOption = document.getElementById("sum-type");
9
  const sumTextInput = document.getElementById("sum-text-input");
10
+ const sumFileInput = document.getElementById("sum-file-input");
11
  const sumVideoInput = document.getElementById("sum-video-input");
12
 
13
  // Error Output Section
 
17
  const extractText = document.getElementById("extracted-text");
18
  const summaryText = document.getElementById("summarized-text");
19
 
20
+ const MAX_SIZE = 5000;
21
+
22
 
23
  // In progress...
24
  function _summarize(text) {
 
38
  return;
39
  }
40
 
41
+ function _extractFile() {
42
+ const file = sumFileInput.files[0];
43
+ if (file.type === 'text/plain') {
44
+ const reader = new FileReader();
45
+ reader.onload = function() {
46
+ extractText.value = reader.result.slice(0, MAX_SIZE);
47
+ };
48
+ reader.readAsText(file, 'CP1251');
49
+ return;
50
+ } else if (file.type === 'application/pdf') {
51
+ extractText.value = '';
52
+ const reader = new FileReader();
53
+ reader.onload = function (e) {
54
+ const pdfData = e.target.result;
55
+ pdfjsLib.getDocument(pdfData).promise.then(function (pdfDocument) {
56
+ for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++) {
57
+ pdfDocument.getPage(pageNum).then(function (pdfPage) {
58
+ pdfPage.getTextContent().then(function (textContent) {
59
+ let size = extractText.value.length;
60
+ let pageText = [];
61
+ for (const textItem of textContent.items) {
62
+ pageText.push(textItem.str);
63
+ size += textItem.str.length;
64
+ if (size > MAX_SIZE) break;
65
+ }
66
+ extractText.value += pageText.join(' ');
67
+ });
68
+ });
69
+ }
70
+ });
71
+ };
72
+ reader.readAsDataURL(file);
73
+ }
74
+ return;
75
  }
76
 
77
  // In progress...
 
99
  }
100
  break;
101
  case 'sum-file':
102
+ const allowedTypes = ['application/pdf', 'text/plain'];
103
+ const file = sumFileInput.files[0];
104
+
105
+ if (!file) {
106
+ sumError.innerText = 'There is no File';
107
+ sumError.classList.remove('hidden');
108
+ return;
109
+ }
110
+
111
+ if (!allowedTypes.includes(file.type)) {
112
+ sumError.innerText = 'Not supported type (Only `.pdf` or `.txt`)';
113
+ sumError.classList.remove('hidden');
114
+ return;
115
+ }
116
  }
117
 
118
  sumError.classList.add('hidden');
119
 
120
  // Here we can finally summarize data
 
121
  switch (selectOption.value) {
122
  case 'sum-text':
123
+ extractText.value = sumTextInput.value.slice(0, MAX_SIZE);
124
  break;
125
  case 'sum-file':
126
+ _extractFile();
127
  break;
128
  case 'sum-video':
129
+ _getCaptions(sumVideoInput.value);
130
  break;
131
  }
132
+ _summarize(extractText.value);
 
 
133
  }
134
 
135
 
static/text_summarizer.html CHANGED
@@ -223,7 +223,7 @@
223
  <select id="sum-type"
224
  class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
225
  <option value="sum-text">Summarize Text</option>
226
- <option value="sum-file">Summarize PDF</option>
227
  <!-- In progress... -->
228
  <!-- <option value="sum-video">Summarize Youtube Video</option> -->
229
  </select>
@@ -247,7 +247,7 @@
247
  </path>
248
  </svg>
249
  <span class="mt-4">Select PDF File</span>
250
- <input id="sum-file-input" type="file" class="hidden">
251
  </label>
252
  </div>
253
 
@@ -300,6 +300,7 @@
300
  </div>
301
  </div>
302
 
 
303
  <script src="js/summarizer.js"></script>
304
  </body>
305
  </html>
 
223
  <select id="sum-type"
224
  class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
225
  <option value="sum-text">Summarize Text</option>
226
+ <option value="sum-file">Summarize File</option>
227
  <!-- In progress... -->
228
  <!-- <option value="sum-video">Summarize Youtube Video</option> -->
229
  </select>
 
247
  </path>
248
  </svg>
249
  <span class="mt-4">Select PDF File</span>
250
+ <input id="sum-file-input" type="file" accept=".pdf, .txt" class="hidden">
251
  </label>
252
  </div>
253
 
 
300
  </div>
301
  </div>
302
 
303
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.943/pdf.min.js"></script>
304
  <script src="js/summarizer.js"></script>
305
  </body>
306
  </html>