import json
import logging
import sys
import time
from pathlib import Path

from elasticsearch import Elasticsearch
from tqdm import tqdm

ROOT_DIR = Path(__file__).resolve().parent.parent.parent
if ROOT_DIR not in sys.path:
    sys.path.append(str(ROOT_DIR))


def create_index_elastic_people(
    path: str,
    logger: logging.Logger | None = None,
):
    if logger is None:
        logger = logging.getLogger(__name__)

    # Подключение к Elasticsearch
    es = Elasticsearch(hosts='localhost:9200')
    INDEX_NAME = 'people_search'

    # Удаление старого индекса, если он существует
    if es.indices.exists(index=INDEX_NAME):
        es.indices.delete(index=INDEX_NAME)

    mapping = {
        "settings": {
            "analysis": {
                "char_filter": {
                    "quote_removal": {
                        "type": "pattern_replace",
                        "pattern": "[\"«»]",
                        "replacement": "",
                    }
                },
                "filter": {
                    # "russian_stemmer": {
                    #     "type": "stemmer",
                    #     "name": "russian"
                    # },
                    "custom_stopwords": {
                        "type": "stop",
                        "stopwords": [
                            "кто",
                            "является",
                            "куратором",
                            "руководит",
                            "отвечает",
                            "бизнес",
                            "за что",
                            "ООО",
                            "ОАО",
                            "НН",
                            "персональный",
                            "состав",
                            "персональный",
                            "состав",
                            "Комитета",
                            "ПАО",
                            "ГМК",
                            "Норильский никель",
                            "Рабочей группы",
                            "что",
                            "как",
                            "почему",
                            "зачем",
                            "где",
                            "когда",
                        ],
                    }
                },
                "analyzer": {
                    "custom_analyzer": {
                        "type": "custom",
                        "char_filter": ["quote_removal"],
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "custom_stopwords",
                            # "russian_stemmer"
                        ],
                    }
                },
            }
        },
        "mappings": {
            "properties": {
                "business_processes": {
                    "type": "nested",
                    "properties": {
                        "production_activities_section": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                        "processes_name": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                        "level_process": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                    },
                },
                "organizatinal_structure": {
                    "type": "nested",
                    "properties": {
                        "position": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                        "leads": {
                            "type": "nested",
                            "properties": {
                                "0": {
                                    "type": "text",
                                    "analyzer": "custom_analyzer",
                                    "search_analyzer": "custom_analyzer",
                                },
                                "1": {
                                    "type": "text",
                                    "analyzer": "custom_analyzer",
                                    "search_analyzer": "custom_analyzer",
                                },
                            },
                        },
                        "subordinate": {
                            "type": "object",
                            "properties": {
                                "person_name": {
                                    "type": "text",
                                    "analyzer": "custom_analyzer",
                                    "search_analyzer": "custom_analyzer",
                                },
                                "position": {
                                    "type": "text",
                                    "analyzer": "custom_analyzer",
                                    "search_analyzer": "custom_analyzer",
                                },
                            },
                        },
                    },
                },
                "business_curator": {
                    "type": "nested",
                    "properties": {
                        "division": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                        "company_name": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                    },
                },
                "groups": {
                    "type": "nested",
                    "properties": {
                        "group_name": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                        "position_in_group": {
                            "type": "text",
                            "analyzer": "custom_analyzer",
                            "search_analyzer": "custom_analyzer",
                        },
                        "block": {"type": "keyword", "null_value": "unknown"},
                    },
                },
                "person_name": {
                    "type": "text",
                    "analyzer": "custom_analyzer",
                    "search_analyzer": "custom_analyzer",
                },
            }
        },
    }
    # Создание индекса с указанным маппингом
    es.indices.create(index=INDEX_NAME, body=mapping)

    group_names = []
    for ind, path in tqdm(enumerate(Path(path).iterdir())):
        # Открываем файл и читаем его содержимое
        try:
            with open(path, 'r', encoding='utf-8') as file:
                data = json.load(file)

            # Индексирование документа в Elasticsearch
            es.index(index=INDEX_NAME, id=ind + 1, body=data)
            time.sleep(0.5)
        except:
            print(f"Ошибка при чтении или добавлении файла {path.name} в индекс")

    if es.indices.exists(index=INDEX_NAME):
        print(f"Index '{INDEX_NAME}' exists.")

    # Подсчет количества документов в индексе
    count_response = es.count(index=INDEX_NAME)
    print(f"Total documents in '{INDEX_NAME}': {count_response['count']}")

    def get_elastic_people_query(query):
        has_business_curator = (
            "бизнес куратор" in query.lower() or "бизнес-куратор" in query.lower()
        )
        business_curator_boost = 20 if has_business_curator else 15
        return {
            "query": {
                "function_score": {
                    "query": {
                        "bool": {
                            "should": [
                                {
                                    "multi_match": {
                                        "query": query,
                                        "fields": ["person_name^3"],
                                        "fuzziness": "AUTO",
                                        "analyzer": "custom_analyzer",
                                    }
                                },
                                {
                                    "nested": {
                                        "path": "business_processes",
                                        "query": {
                                            "multi_match": {
                                                "query": query,
                                                "fields": [
                                                    "business_processes.production_activities_section",
                                                    "business_processes.processes_name",
                                                ],
                                                "fuzziness": "AUTO",
                                                "analyzer": "custom_analyzer",
                                            }
                                        },
                                    }
                                },
                                {
                                    "nested": {
                                        "path": "organizatinal_structure",
                                        "query": {
                                            "multi_match": {
                                                "query": query,
                                                "fields": [
                                                    "organizatinal_structure.position^2"
                                                ],
                                                "fuzziness": "AUTO",
                                                "analyzer": "custom_analyzer",
                                            }
                                        },
                                    }
                                },
                                {
                                    "nested": {
                                        "path": "business_curator",
                                        "query": {
                                            "multi_match": {
                                                "query": query,
                                                "fields": [
                                                    f"business_curator.company_name^{business_curator_boost}"
                                                ],
                                                "fuzziness": "AUTO",
                                                "analyzer": "custom_analyzer",
                                            }
                                        },
                                    }
                                },
                            ]
                        }
                    }
                }
            }
        }

    query = 'кто бизнес куратор ООО Медвежий ручей?'
    # Выполнение поиска в Elasticsearch
    response = es.search(index=INDEX_NAME, body=get_elastic_people_query(query), size=2)
    logger.info(f"Number of hits: {response['hits']['total']['value']}")

    # Вывод результата поиска
    for hit in response['hits']['hits']:
        logger.info(hit['_source'])


if __name__ == '__main__':
    path = '/mnt/ntr_work/data/фывфыаыфвфы/person_card'
    create_index_elastic_people(path)