Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
14 |
# Load models
|
15 |
editorial_model = "PleIAs/Estienne"
|
16 |
bibliography_model = "PleIAs/Bibliography-Formatter"
|
|
|
|
|
|
|
17 |
|
18 |
editorial_classifier = pipeline(
|
19 |
"token-classification", model=editorial_model, aggregation_strategy="simple", device=device
|
@@ -22,7 +25,7 @@ bibliography_classifier = pipeline(
|
|
22 |
"token-classification", model=bibliography_model, aggregation_strategy="simple", device=device
|
23 |
)
|
24 |
|
25 |
-
|
26 |
|
27 |
# Helper functions
|
28 |
def preprocess_text(text):
|
@@ -172,6 +175,10 @@ class CombinedProcessor:
|
|
172 |
bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
|
173 |
|
174 |
bibtex_entries = []
|
|
|
|
|
|
|
|
|
175 |
|
176 |
for entry in bibliography_entries:
|
177 |
print(entry)
|
|
|
14 |
# Load models
|
15 |
editorial_model = "PleIAs/Estienne"
|
16 |
bibliography_model = "PleIAs/Bibliography-Formatter"
|
17 |
+
bibliography_style = "PleIAs/Bibliography-Classifier"
|
18 |
+
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(editorial_model, model_max_length=512)
|
20 |
|
21 |
editorial_classifier = pipeline(
|
22 |
"token-classification", model=editorial_model, aggregation_strategy="simple", device=device
|
|
|
25 |
"token-classification", model=bibliography_model, aggregation_strategy="simple", device=device
|
26 |
)
|
27 |
|
28 |
+
style_classifier = pipeline("text-classification", model=bibliography_style, tokenizer=tokenizer, device=device)
|
29 |
|
30 |
# Helper functions
|
31 |
def preprocess_text(text):
|
|
|
175 |
bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
|
176 |
|
177 |
bibtex_entries = []
|
178 |
+
|
179 |
+
for entry in bibliography_entries:
|
180 |
+
style = style_classifier(entry, truncation=True, padding=True, top_k=None)
|
181 |
+
print(style)
|
182 |
|
183 |
for entry in bibliography_entries:
|
184 |
print(entry)
|