Pclanglais commited on
Commit
efdd1b6
·
verified ·
1 Parent(s): 832ed77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -152,8 +152,12 @@ def save_bibtex(bibtex_content):
152
 
153
  class CombinedProcessor:
154
  def process(self, user_message):
 
 
 
 
155
  editorial_text = re.sub("\n", " ¶ ", user_message)
156
- #editorial_text = re.sub(r'\s*([;:,])\s*', r' \1 ', editorial_text)
157
  print(editorial_text)
158
  num_tokens = len(tokenizer.tokenize(editorial_text))
159
 
@@ -168,14 +172,8 @@ class CombinedProcessor:
168
  bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
169
 
170
  bibtex_entries = []
171
-
172
- corrected_bibliography_entries = []
173
- for entry in bibliography_entries:
174
- entry = re.sub(r' +?¶ +?', r'¶', entry)
175
- entry = entry.split("¶¶")
176
- corrected_bibliography_entries.extend(entry)
177
 
178
- for entry in corrected_bibliography_entries:
179
  print(entry)
180
  entry = re.sub(r'\s*([;:,\.])\s*', r' \1 ', entry)
181
  entry = re.sub(r'- ?[\n¶] ?', r'', entry)
 
152
 
153
  class CombinedProcessor:
154
  def process(self, user_message):
155
+ #Precaution to reinforce bibliography detection.
156
+ editorial_text = "Bibliography\n" + user_message
157
+
158
+ #Our fix for the lack of newline in deberta
159
  editorial_text = re.sub("\n", " ¶ ", user_message)
160
+
161
  print(editorial_text)
162
  num_tokens = len(tokenizer.tokenize(editorial_text))
163
 
 
172
  bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
173
 
174
  bibtex_entries = []
 
 
 
 
 
 
175
 
176
+ for entry in bibliography_entries:
177
  print(entry)
178
  entry = re.sub(r'\s*([;:,\.])\s*', r' \1 ', entry)
179
  entry = re.sub(r'- ?[\n¶] ?', r'', entry)