Spaces:
Build error
Build error
updated to singletons
Browse files- .vscode/launch.json +0 -16
- .vscode/settings.json +0 -7
- rebel.py +17 -2
- utils.py +3 -1
.vscode/launch.json
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
// Use IntelliSense to learn about possible attributes.
|
| 3 |
-
// Hover to view descriptions of existing attributes.
|
| 4 |
-
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
| 5 |
-
"version": "0.2.0",
|
| 6 |
-
"configurations": [
|
| 7 |
-
{
|
| 8 |
-
"name": "Python: Current File",
|
| 9 |
-
"type": "python",
|
| 10 |
-
"request": "launch",
|
| 11 |
-
"program": "${file}",
|
| 12 |
-
"console": "integratedTerminal",
|
| 13 |
-
"justMyCode": false
|
| 14 |
-
}
|
| 15 |
-
]
|
| 16 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.vscode/settings.json
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"workbench.colorCustomizations": {
|
| 3 |
-
"activityBar.background": "#09323E",
|
| 4 |
-
"titleBar.activeBackground": "#0C4656",
|
| 5 |
-
"titleBar.activeForeground": "#F6FCFE"
|
| 6 |
-
}
|
| 7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rebel.py
CHANGED
|
@@ -27,8 +27,23 @@ DEFAULT_LABEL_COLORS = {
|
|
| 27 |
"PERCENT": "#e4e7d2",
|
| 28 |
}
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
doc = nlp("\n".join(texts).lower())
|
| 33 |
NERs = [ent.text for ent in doc.ents]
|
| 34 |
NER_types = [ent.label_ for ent in doc.ents]
|
|
@@ -78,7 +93,7 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
| 78 |
|
| 79 |
@lru_cache(maxsize=16)
|
| 80 |
def generate_partial_graph(text: str):
|
| 81 |
-
triplet_extractor =
|
| 82 |
a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
|
| 83 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
| 84 |
extracted_triplets = extract_triplets(extracted_text[0])
|
|
|
|
| 27 |
"PERCENT": "#e4e7d2",
|
| 28 |
}
|
| 29 |
|
| 30 |
+
|
| 31 |
+
@st.experimental_singleton(max_entries=1)
|
| 32 |
+
def get_pipeline():
|
| 33 |
+
triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
|
| 34 |
+
return triplet_extractor
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@st.experimental_singleton(max_entries=1)
|
| 39 |
+
def load_spacy():
|
| 40 |
nlp = spacy.load("en_core_web_sm")
|
| 41 |
+
return nlp
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def generate_knowledge_graph(texts: List[str], filename: str):
|
| 45 |
+
nlp = load_spacy()
|
| 46 |
+
|
| 47 |
doc = nlp("\n".join(texts).lower())
|
| 48 |
NERs = [ent.text for ent in doc.ents]
|
| 49 |
NER_types = [ent.label_ for ent in doc.ents]
|
|
|
|
| 93 |
|
| 94 |
@lru_cache(maxsize=16)
|
| 95 |
def generate_partial_graph(text: str):
|
| 96 |
+
triplet_extractor = get_pipeline()
|
| 97 |
a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
|
| 98 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
| 99 |
extracted_triplets = extract_triplets(extracted_text[0])
|
utils.py
CHANGED
|
@@ -3,4 +3,6 @@ def clip_text(t, lenght = 4):
|
|
| 3 |
t_sub = t.replace("...", "dotdotdot")
|
| 4 |
t_clipped = ".".join(t_sub.split(".")[:lenght]) + "."
|
| 5 |
t_reverted = t_clipped.replace("dotdotdot", "...")
|
| 6 |
-
return t_reverted
|
|
|
|
|
|
|
|
|
| 3 |
t_sub = t.replace("...", "dotdotdot")
|
| 4 |
t_clipped = ".".join(t_sub.split(".")[:lenght]) + "."
|
| 5 |
t_reverted = t_clipped.replace("dotdotdot", "...")
|
| 6 |
+
return t_reverted
|
| 7 |
+
|
| 8 |
+
|