Spaces:
Sleeping
Sleeping
import logging | |
import os | |
from pathlib import Path | |
import pandas as pd | |
from common.common import configure_logging | |
from common.configuration import Configuration | |
from components.elastic import create_index_elastic_chunks, create_index_elastic_people | |
from components.embedding_extraction import EmbeddingExtractor | |
from controlpanel.components.datasets.dispatcher import Dispatcher | |
from components.nmd.services.acronym import AcronymService | |
from components.nmd.services.dataset import DatasetService | |
from components.nmd.services.document import DocumentService | |
from components.sqlite.create_database import create_database | |
CONFIG_PATH = os.environ.get('CONFIG_PATH', './config_dev.yaml') | |
config = Configuration(CONFIG_PATH) | |
logger = logging.getLogger(__name__) | |
configure_logging(config_file_path=config.common_config.log_file_path) | |
logger.info(f'Start work...') | |
logger.info(f'Use config: {os.path.abspath(CONFIG_PATH)}') | |
model = EmbeddingExtractor( | |
config.db_config.faiss.model_embedding_path, | |
config.db_config.faiss.device, | |
) | |
dispatcher = Dispatcher(model, config, logger) | |
acronym_service = AcronymService() | |
dataset_service = DatasetService(model, dispatcher, config) | |
document_service = DocumentService(dataset_service, config) | |
create_database(dataset_service, config) | |
current_dataset = dataset_service.get_current_dataset() | |
dispatcher.reset_dataset(current_dataset.dataset_id) | |
df = pd.read_pickle( | |
Path.cwd() | |
/ config.db_config.files.regulations_path | |
/ f'{current_dataset.dataset_id}' | |
/ 'dataset.pkl' | |
) | |
if config.db_config.elastic.use_elastic: | |
create_index_elastic_chunks(df, logger) | |
create_index_elastic_people(config.db_config.elastic.people_path, logger) | |
logger.info('Loaded embedding model') | |