Pclanglais commited on
Commit
445c0cd
·
verified ·
1 Parent(s): 959ba25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -14,6 +14,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
14
  # Load models
15
  editorial_model = "PleIAs/Estienne"
16
  bibliography_model = "PleIAs/Bibliography-Formatter"
 
 
 
17
 
18
  editorial_classifier = pipeline(
19
  "token-classification", model=editorial_model, aggregation_strategy="simple", device=device
@@ -22,7 +25,7 @@ bibliography_classifier = pipeline(
22
  "token-classification", model=bibliography_model, aggregation_strategy="simple", device=device
23
  )
24
 
25
- tokenizer = AutoTokenizer.from_pretrained(editorial_model, model_max_length=512)
26
 
27
  # Helper functions
28
  def preprocess_text(text):
@@ -172,6 +175,10 @@ class CombinedProcessor:
172
  bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
173
 
174
  bibtex_entries = []
 
 
 
 
175
 
176
  for entry in bibliography_entries:
177
  print(entry)
 
14
  # Load models
15
  editorial_model = "PleIAs/Estienne"
16
  bibliography_model = "PleIAs/Bibliography-Formatter"
17
+ bibliography_style = "PleIAs/Bibliography-Classifier"
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(editorial_model, model_max_length=512)
20
 
21
  editorial_classifier = pipeline(
22
  "token-classification", model=editorial_model, aggregation_strategy="simple", device=device
 
25
  "token-classification", model=bibliography_model, aggregation_strategy="simple", device=device
26
  )
27
 
28
+ style_classifier = pipeline("text-classification", model=bibliography_style, tokenizer=tokenizer, device=device)
29
 
30
  # Helper functions
31
  def preprocess_text(text):
 
175
  bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
176
 
177
  bibtex_entries = []
178
+
179
+ for entry in bibliography_entries:
180
+ style = style_classifier(entry, truncation=True, padding=True, top_k=None)
181
+ print(style)
182
 
183
  for entry in bibliography_entries:
184
  print(entry)