Spaces:
Running
Running
change from year to publication year
Browse files
document_qa/document_qa_engine.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import copy
|
| 2 |
-
import json
|
| 3 |
import os
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Union, Any
|
| 6 |
|
|
|
|
| 7 |
from grobid_client.grobid_client import GrobidClient
|
| 8 |
from langchain.chains import create_extraction_chain
|
| 9 |
from langchain.chains.question_answering import load_qa_chain
|
|
@@ -13,8 +13,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
| 13 |
from langchain.vectorstores import Chroma
|
| 14 |
from tqdm import tqdm
|
| 15 |
|
| 16 |
-
from document_qa.grobid_processors import GrobidProcessor
|
| 17 |
-
|
| 18 |
|
| 19 |
class DocumentQAEngine:
|
| 20 |
llm = None
|
|
@@ -220,7 +218,7 @@ class DocumentQAEngine:
|
|
| 220 |
biblio_metadata = copy.copy(biblio)
|
| 221 |
biblio_metadata['type'] = "biblio"
|
| 222 |
biblio_metadata['section'] = "header"
|
| 223 |
-
for key in ['title', 'authors', '
|
| 224 |
if key in biblio_metadata:
|
| 225 |
texts.append("{}: {}".format(key, biblio_metadata[key]))
|
| 226 |
metadatas.append(biblio_metadata)
|
|
|
|
| 1 |
import copy
|
|
|
|
| 2 |
import os
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Union, Any
|
| 5 |
|
| 6 |
+
from document_qa.grobid_processors import GrobidProcessor
|
| 7 |
from grobid_client.grobid_client import GrobidClient
|
| 8 |
from langchain.chains import create_extraction_chain
|
| 9 |
from langchain.chains.question_answering import load_qa_chain
|
|
|
|
| 13 |
from langchain.vectorstores import Chroma
|
| 14 |
from tqdm import tqdm
|
| 15 |
|
|
|
|
|
|
|
| 16 |
|
| 17 |
class DocumentQAEngine:
|
| 18 |
llm = None
|
|
|
|
| 218 |
biblio_metadata = copy.copy(biblio)
|
| 219 |
biblio_metadata['type'] = "biblio"
|
| 220 |
biblio_metadata['section'] = "header"
|
| 221 |
+
for key in ['title', 'authors', 'publication_year']:
|
| 222 |
if key in biblio_metadata:
|
| 223 |
texts.append("{}: {}".format(key, biblio_metadata[key]))
|
| 224 |
metadatas.append(biblio_metadata)
|
document_qa/grobid_processors.py
CHANGED
|
@@ -171,7 +171,7 @@ class GrobidProcessor(BaseProcessor):
|
|
| 171 |
}
|
| 172 |
try:
|
| 173 |
year = dateparser.parse(doc_biblio.header.date).year
|
| 174 |
-
biblio["
|
| 175 |
except:
|
| 176 |
pass
|
| 177 |
|
|
|
|
| 171 |
}
|
| 172 |
try:
|
| 173 |
year = dateparser.parse(doc_biblio.header.date).year
|
| 174 |
+
biblio["publication_year"] = year
|
| 175 |
except:
|
| 176 |
pass
|
| 177 |
|