Spaces:
Running
Running
Add File support (.pdf | .txt)
Browse files- inference.py +2 -2
- static/emotion_detection.html +4 -3
- static/js/detection.js +55 -14
- static/js/summarizer.js +55 -13
- static/text_summarizer.html +3 -2
inference.py
CHANGED
@@ -6,8 +6,8 @@ path = './model/'
|
|
6 |
model = BertForSequenceClassification.from_pretrained(path)
|
7 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
8 |
|
9 |
-
LABELS = ['
|
10 |
-
'
|
11 |
|
12 |
|
13 |
# Probabilistic prediction of emotion in a text
|
|
|
6 |
model = BertForSequenceClassification.from_pretrained(path)
|
7 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
8 |
|
9 |
+
LABELS = ['Joy', 'Interest', 'Surprise', 'Sadness', 'Anger', 'Disgust', 'Fear',
|
10 |
+
'Guilt', 'Neutral']
|
11 |
|
12 |
|
13 |
# Probabilistic prediction of emotion in a text
|
static/emotion_detection.html
CHANGED
@@ -223,7 +223,7 @@
|
|
223 |
<select id="sum-type"
|
224 |
class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
|
225 |
<option value="sum-text">Identify Text</option>
|
226 |
-
<option value="sum-file">Identify
|
227 |
</select>
|
228 |
</div>
|
229 |
|
@@ -244,8 +244,8 @@
|
|
244 |
d="M16.88 9.1A4 4 0 0 1 16 17H5a5 5 0 0 1-1-9.9V7a3 3 0 0 1 4.52-2.59A4.98 4.98 0 0 1 17 8c0 .38-.04.74-.12 1.1zM11 11h3l-4-4-4 4h3v3h2v-3z">
|
245 |
</path>
|
246 |
</svg>
|
247 |
-
<span class="mt-4">Select
|
248 |
-
<input id="sum-file-input" type="file" class="hidden">
|
249 |
</label>
|
250 |
</div>
|
251 |
|
@@ -298,6 +298,7 @@
|
|
298 |
</div>
|
299 |
</div>
|
300 |
|
|
|
301 |
<script src="js/detection.js"></script>
|
302 |
</body>
|
303 |
</html>
|
|
|
223 |
<select id="sum-type"
|
224 |
class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
|
225 |
<option value="sum-text">Identify Text</option>
|
226 |
+
<option value="sum-file">Identify File</option>
|
227 |
</select>
|
228 |
</div>
|
229 |
|
|
|
244 |
d="M16.88 9.1A4 4 0 0 1 16 17H5a5 5 0 0 1-1-9.9V7a3 3 0 0 1 4.52-2.59A4.98 4.98 0 0 1 17 8c0 .38-.04.74-.12 1.1zM11 11h3l-4-4-4 4h3v3h2v-3z">
|
245 |
</path>
|
246 |
</svg>
|
247 |
+
<span class="mt-4">Select File</span>
|
248 |
+
<input id="sum-file-input" type="file" accept=".pdf, .txt" class="hidden">
|
249 |
</label>
|
250 |
</div>
|
251 |
|
|
|
298 |
</div>
|
299 |
</div>
|
300 |
|
301 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.943/pdf.min.js"></script>
|
302 |
<script src="js/detection.js"></script>
|
303 |
</body>
|
304 |
</html>
|
static/js/detection.js
CHANGED
@@ -7,6 +7,7 @@ const sumVideo = document.getElementById("sum-video-div");
|
|
7 |
// Form Data
|
8 |
const selectOption = document.getElementById("sum-type");
|
9 |
const sumTextInput = document.getElementById("sum-text-input");
|
|
|
10 |
const sumVideoInput = document.getElementById("sum-video-input");
|
11 |
|
12 |
// Error Output Section
|
@@ -16,8 +17,9 @@ const sumError = document.getElementById("sum-err");
|
|
16 |
const extractText = document.getElementById("extracted-text");
|
17 |
const summaryText = document.getElementById("summarized-text");
|
18 |
|
|
|
|
|
19 |
|
20 |
-
// In progress...
|
21 |
function _summarize(text) {
|
22 |
var xhr = new XMLHttpRequest();
|
23 |
xhr.open("POST", "/predict_emotion", true);
|
@@ -37,9 +39,40 @@ function _summarize(text) {
|
|
37 |
return;
|
38 |
}
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
}
|
44 |
|
45 |
// In progress...
|
@@ -67,29 +100,37 @@ function summarize(event) {
|
|
67 |
}
|
68 |
break;
|
69 |
case 'sum-file':
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
}
|
74 |
|
75 |
sumError.classList.add('hidden');
|
76 |
|
77 |
// Here we can finally summarize data
|
78 |
-
let extractedText = '';
|
79 |
switch (selectOption.value) {
|
80 |
case 'sum-text':
|
81 |
-
|
82 |
break;
|
83 |
case 'sum-file':
|
84 |
-
|
85 |
break;
|
86 |
case 'sum-video':
|
87 |
-
|
88 |
break;
|
89 |
}
|
90 |
-
|
91 |
-
extractText.value = extractedText;
|
92 |
-
_summarize(extractedText);
|
93 |
}
|
94 |
|
95 |
|
|
|
7 |
// Form Data
|
8 |
const selectOption = document.getElementById("sum-type");
|
9 |
const sumTextInput = document.getElementById("sum-text-input");
|
10 |
+
const sumFileInput = document.getElementById("sum-file-input");
|
11 |
const sumVideoInput = document.getElementById("sum-video-input");
|
12 |
|
13 |
// Error Output Section
|
|
|
17 |
const extractText = document.getElementById("extracted-text");
|
18 |
const summaryText = document.getElementById("summarized-text");
|
19 |
|
20 |
+
const MAX_SIZE = 5000;
|
21 |
+
|
22 |
|
|
|
23 |
function _summarize(text) {
|
24 |
var xhr = new XMLHttpRequest();
|
25 |
xhr.open("POST", "/predict_emotion", true);
|
|
|
39 |
return;
|
40 |
}
|
41 |
|
42 |
+
function _extractFile() {
|
43 |
+
const file = sumFileInput.files[0];
|
44 |
+
if (file.type === 'text/plain') {
|
45 |
+
const reader = new FileReader();
|
46 |
+
reader.onload = function() {
|
47 |
+
extractText.value = reader.result.slice(0, MAX_SIZE);
|
48 |
+
};
|
49 |
+
reader.readAsText(file, 'CP1251');
|
50 |
+
return;
|
51 |
+
} else if (file.type === 'application/pdf') {
|
52 |
+
extractText.value = '';
|
53 |
+
const reader = new FileReader();
|
54 |
+
reader.onload = function (e) {
|
55 |
+
const pdfData = e.target.result;
|
56 |
+
pdfjsLib.getDocument(pdfData).promise.then(function (pdfDocument) {
|
57 |
+
for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++) {
|
58 |
+
pdfDocument.getPage(pageNum).then(function (pdfPage) {
|
59 |
+
pdfPage.getTextContent().then(function (textContent) {
|
60 |
+
let size = extractText.value.length;
|
61 |
+
let pageText = [];
|
62 |
+
for (const textItem of textContent.items) {
|
63 |
+
pageText.push(textItem.str);
|
64 |
+
size += textItem.str.length;
|
65 |
+
if (size > MAX_SIZE) break;
|
66 |
+
}
|
67 |
+
extractText.value += pageText.join(' ');
|
68 |
+
});
|
69 |
+
});
|
70 |
+
}
|
71 |
+
});
|
72 |
+
};
|
73 |
+
reader.readAsDataURL(file);
|
74 |
+
}
|
75 |
+
return;
|
76 |
}
|
77 |
|
78 |
// In progress...
|
|
|
100 |
}
|
101 |
break;
|
102 |
case 'sum-file':
|
103 |
+
const allowedTypes = ['application/pdf', 'text/plain'];
|
104 |
+
const file = sumFileInput.files[0];
|
105 |
+
|
106 |
+
if (!file) {
|
107 |
+
sumError.innerText = 'There is no File';
|
108 |
+
sumError.classList.remove('hidden');
|
109 |
+
return;
|
110 |
+
}
|
111 |
+
|
112 |
+
if (!allowedTypes.includes(file.type)) {
|
113 |
+
sumError.innerText = 'Not supported type (Only `.pdf` or `.txt`)';
|
114 |
+
sumError.classList.remove('hidden');
|
115 |
+
return;
|
116 |
+
}
|
117 |
}
|
118 |
|
119 |
sumError.classList.add('hidden');
|
120 |
|
121 |
// Here we can finally summarize data
|
|
|
122 |
switch (selectOption.value) {
|
123 |
case 'sum-text':
|
124 |
+
extractText.value = sumTextInput.value.slice(0, MAX_SIZE);
|
125 |
break;
|
126 |
case 'sum-file':
|
127 |
+
_extractFile();
|
128 |
break;
|
129 |
case 'sum-video':
|
130 |
+
_getCaptions(sumVideoInput.value);
|
131 |
break;
|
132 |
}
|
133 |
+
_summarize(extractText.value);
|
|
|
|
|
134 |
}
|
135 |
|
136 |
|
static/js/summarizer.js
CHANGED
@@ -7,6 +7,7 @@ const sumVideo = document.getElementById("sum-video-div");
|
|
7 |
// Form Data
|
8 |
const selectOption = document.getElementById("sum-type");
|
9 |
const sumTextInput = document.getElementById("sum-text-input");
|
|
|
10 |
const sumVideoInput = document.getElementById("sum-video-input");
|
11 |
|
12 |
// Error Output Section
|
@@ -16,6 +17,8 @@ const sumError = document.getElementById("sum-err");
|
|
16 |
const extractText = document.getElementById("extracted-text");
|
17 |
const summaryText = document.getElementById("summarized-text");
|
18 |
|
|
|
|
|
19 |
|
20 |
// In progress...
|
21 |
function _summarize(text) {
|
@@ -35,9 +38,40 @@ function _summarize(text) {
|
|
35 |
return;
|
36 |
}
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
}
|
42 |
|
43 |
// In progress...
|
@@ -65,29 +99,37 @@ function summarize(event) {
|
|
65 |
}
|
66 |
break;
|
67 |
case 'sum-file':
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
}
|
72 |
|
73 |
sumError.classList.add('hidden');
|
74 |
|
75 |
// Here we can finally summarize data
|
76 |
-
let extractedText = '';
|
77 |
switch (selectOption.value) {
|
78 |
case 'sum-text':
|
79 |
-
|
80 |
break;
|
81 |
case 'sum-file':
|
82 |
-
|
83 |
break;
|
84 |
case 'sum-video':
|
85 |
-
|
86 |
break;
|
87 |
}
|
88 |
-
|
89 |
-
extractText.value = extractedText;
|
90 |
-
_summarize(extractedText);
|
91 |
}
|
92 |
|
93 |
|
|
|
7 |
// Form Data
|
8 |
const selectOption = document.getElementById("sum-type");
|
9 |
const sumTextInput = document.getElementById("sum-text-input");
|
10 |
+
const sumFileInput = document.getElementById("sum-file-input");
|
11 |
const sumVideoInput = document.getElementById("sum-video-input");
|
12 |
|
13 |
// Error Output Section
|
|
|
17 |
const extractText = document.getElementById("extracted-text");
|
18 |
const summaryText = document.getElementById("summarized-text");
|
19 |
|
20 |
+
const MAX_SIZE = 5000;
|
21 |
+
|
22 |
|
23 |
// In progress...
|
24 |
function _summarize(text) {
|
|
|
38 |
return;
|
39 |
}
|
40 |
|
41 |
+
function _extractFile() {
|
42 |
+
const file = sumFileInput.files[0];
|
43 |
+
if (file.type === 'text/plain') {
|
44 |
+
const reader = new FileReader();
|
45 |
+
reader.onload = function() {
|
46 |
+
extractText.value = reader.result.slice(0, MAX_SIZE);
|
47 |
+
};
|
48 |
+
reader.readAsText(file, 'CP1251');
|
49 |
+
return;
|
50 |
+
} else if (file.type === 'application/pdf') {
|
51 |
+
extractText.value = '';
|
52 |
+
const reader = new FileReader();
|
53 |
+
reader.onload = function (e) {
|
54 |
+
const pdfData = e.target.result;
|
55 |
+
pdfjsLib.getDocument(pdfData).promise.then(function (pdfDocument) {
|
56 |
+
for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++) {
|
57 |
+
pdfDocument.getPage(pageNum).then(function (pdfPage) {
|
58 |
+
pdfPage.getTextContent().then(function (textContent) {
|
59 |
+
let size = extractText.value.length;
|
60 |
+
let pageText = [];
|
61 |
+
for (const textItem of textContent.items) {
|
62 |
+
pageText.push(textItem.str);
|
63 |
+
size += textItem.str.length;
|
64 |
+
if (size > MAX_SIZE) break;
|
65 |
+
}
|
66 |
+
extractText.value += pageText.join(' ');
|
67 |
+
});
|
68 |
+
});
|
69 |
+
}
|
70 |
+
});
|
71 |
+
};
|
72 |
+
reader.readAsDataURL(file);
|
73 |
+
}
|
74 |
+
return;
|
75 |
}
|
76 |
|
77 |
// In progress...
|
|
|
99 |
}
|
100 |
break;
|
101 |
case 'sum-file':
|
102 |
+
const allowedTypes = ['application/pdf', 'text/plain'];
|
103 |
+
const file = sumFileInput.files[0];
|
104 |
+
|
105 |
+
if (!file) {
|
106 |
+
sumError.innerText = 'There is no File';
|
107 |
+
sumError.classList.remove('hidden');
|
108 |
+
return;
|
109 |
+
}
|
110 |
+
|
111 |
+
if (!allowedTypes.includes(file.type)) {
|
112 |
+
sumError.innerText = 'Not supported type (Only `.pdf` or `.txt`)';
|
113 |
+
sumError.classList.remove('hidden');
|
114 |
+
return;
|
115 |
+
}
|
116 |
}
|
117 |
|
118 |
sumError.classList.add('hidden');
|
119 |
|
120 |
// Here we can finally summarize data
|
|
|
121 |
switch (selectOption.value) {
|
122 |
case 'sum-text':
|
123 |
+
extractText.value = sumTextInput.value.slice(0, MAX_SIZE);
|
124 |
break;
|
125 |
case 'sum-file':
|
126 |
+
_extractFile();
|
127 |
break;
|
128 |
case 'sum-video':
|
129 |
+
_getCaptions(sumVideoInput.value);
|
130 |
break;
|
131 |
}
|
132 |
+
_summarize(extractText.value);
|
|
|
|
|
133 |
}
|
134 |
|
135 |
|
static/text_summarizer.html
CHANGED
@@ -223,7 +223,7 @@
|
|
223 |
<select id="sum-type"
|
224 |
class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
|
225 |
<option value="sum-text">Summarize Text</option>
|
226 |
-
<option value="sum-file">Summarize
|
227 |
<!-- In progress... -->
|
228 |
<!-- <option value="sum-video">Summarize Youtube Video</option> -->
|
229 |
</select>
|
@@ -247,7 +247,7 @@
|
|
247 |
</path>
|
248 |
</svg>
|
249 |
<span class="mt-4">Select PDF File</span>
|
250 |
-
<input id="sum-file-input" type="file" class="hidden">
|
251 |
</label>
|
252 |
</div>
|
253 |
|
@@ -300,6 +300,7 @@
|
|
300 |
</div>
|
301 |
</div>
|
302 |
|
|
|
303 |
<script src="js/summarizer.js"></script>
|
304 |
</body>
|
305 |
</html>
|
|
|
223 |
<select id="sum-type"
|
224 |
class="block w-full px-4 py-2.5 mt-2 text-gray-600 placeholder-gray-400 bg-white border border-gray-200 rounded-md focus:border-indigo-400 focus:outline-none focus:ring focus:ring-indigo-300 focus:ring-opacity-40">
|
225 |
<option value="sum-text">Summarize Text</option>
|
226 |
+
<option value="sum-file">Summarize File</option>
|
227 |
<!-- In progress... -->
|
228 |
<!-- <option value="sum-video">Summarize Youtube Video</option> -->
|
229 |
</select>
|
|
|
247 |
</path>
|
248 |
</svg>
|
249 |
<span class="mt-4">Select PDF File</span>
|
250 |
+
<input id="sum-file-input" type="file" accept=".pdf, .txt" class="hidden">
|
251 |
</label>
|
252 |
</div>
|
253 |
|
|
|
300 |
</div>
|
301 |
</div>
|
302 |
|
303 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.943/pdf.min.js"></script>
|
304 |
<script src="js/summarizer.js"></script>
|
305 |
</body>
|
306 |
</html>
|