Spaces:
Runtime error
Runtime error
Commit
·
592d17f
1
Parent(s):
0c98223
updations to the finbert function
Browse files
app.py
CHANGED
@@ -107,7 +107,7 @@ def finbert(word):
|
|
107 |
data['Cluster'] = pd.Series(assigned_clusters, index = data.index)
|
108 |
data['Centroid'] = data['Cluster'].apply(lambda x: Kclusterer.means()[x])
|
109 |
|
110 |
-
# return the text if
|
111 |
except ValueError:
|
112 |
return text
|
113 |
|
@@ -122,10 +122,17 @@ def finbert(word):
|
|
122 |
summary = " ".join(data.sort_values(
|
123 |
'Distance_From_Centroid',
|
124 |
ascending = True).groupby('Cluster').head(1).sort_index()['Sentences'].tolist())
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
|
128 |
-
|
129 |
def pegasus(text):
|
130 |
'''A function to obtain summaries for each tokenized sentence.
|
131 |
It returns a summarized document as output'''
|
|
|
107 |
data['Cluster'] = pd.Series(assigned_clusters, index = data.index)
|
108 |
data['Centroid'] = data['Cluster'].apply(lambda x: Kclusterer.means()[x])
|
109 |
|
110 |
+
# return the text if clustering algorithm catches an exceptiona and move to the next text file
|
111 |
except ValueError:
|
112 |
return text
|
113 |
|
|
|
122 |
summary = " ".join(data.sort_values(
|
123 |
'Distance_From_Centroid',
|
124 |
ascending = True).groupby('Cluster').head(1).sort_index()['Sentences'].tolist())
|
125 |
+
import re
|
126 |
+
words = list()
|
127 |
+
for text in summary.split():
|
128 |
+
text = re.sub(r'\n','',text)
|
129 |
+
text = re.sub(r'\s$','',text)
|
130 |
+
words.append(text)
|
131 |
+
summary = " ".join(words)
|
132 |
+
|
133 |
+
return (summary,len(word),len(summary))
|
134 |
|
135 |
|
|
|
136 |
def pegasus(text):
|
137 |
'''A function to obtain summaries for each tokenized sentence.
|
138 |
It returns a summarized document as output'''
|