Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import json | |
| import logging | |
| import time | |
| from pathlib import Path | |
| from elasticsearch import Elasticsearch | |
| from tqdm import tqdm | |
| def create_index_elastic_segmentation( | |
| path: str, | |
| logger: logging.Logger | None = None, | |
| ): | |
| if logger is None: | |
| logger = logging.getLogger(__name__) | |
| # Подключение к Elasticsearch | |
| es = Elasticsearch(hosts='localhost:9200') | |
| INDEX_NAME = 'segmentation_search_elastic' | |
| # Удаление старого индекса, если он существует | |
| if es.indices.exists(index=INDEX_NAME): | |
| es.indices.delete(index=INDEX_NAME) | |
| mapping = { | |
| "mappings": { | |
| "properties": { | |
| "segmentation_model": {"type": "text", "analyzer": "standard"}, | |
| "segmentation_model2": {"type": "text", "analyzer": "standard"}, | |
| "company_name": {"type": "text", "analyzer": "standard"}, | |
| } | |
| } | |
| } | |
| # Создание индекса с указанным маппингом | |
| es.indices.create(index=INDEX_NAME, body=mapping) | |
| for ind, path in tqdm(enumerate(Path(path).iterdir())): | |
| # Открываем файл и читаем его содержимое | |
| with open(path, 'r', encoding='utf-8') as file: | |
| data = json.load(file) | |
| # Индексирование документа в Elasticsearch | |
| es.index(index=INDEX_NAME, id=ind + 1, body=data) | |
| # Подсчет количества документов в индексе | |
| count_response = es.count(index=INDEX_NAME) | |
| logger.info( | |
| f"{ind}, Total documents in '{INDEX_NAME}': {count_response['count']}" | |
| ) | |
| time.sleep(1.0) | |
| if es.indices.exists(index=INDEX_NAME): | |
| logger.info(f"Index '{INDEX_NAME}' exists.") | |
| # Подсчет количества документов в индексе | |
| count_response = es.count(index=INDEX_NAME) | |
| logger.info(f"Total documents in '{INDEX_NAME}': {count_response['count']}") | |
| query = "К какой модели сегментации относится ООО ГРК Быстринское?" | |
| query_ = { | |
| "query": { | |
| "bool": { | |
| "should": [ | |
| { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": [ | |
| "segmentation_model", | |
| "segmentation_model2", | |
| "company_name", | |
| ], | |
| "fuzziness": "AUTO", | |
| "analyzer": "standard", | |
| } | |
| }, | |
| { | |
| "multi_match": { | |
| "query": "модели сегментации модель сегментации", | |
| "fields": ["segmentation_model", "segmentation_model2"], | |
| "operator": "or", | |
| "boost": 0.1, | |
| } | |
| }, | |
| ] | |
| } | |
| } | |
| } | |
| # Выполнение поиска в Elasticsearch | |
| response = es.search(index=INDEX_NAME, body=query_, size=1) | |
| logger.info(f"Number of hits: {response['hits']['total']['value']}") | |
| # Вывод результата поиска | |
| for hit in response['hits']['hits']: | |
| logger.info(hit['_source']) | |
| if __name__ == '__main__': | |
| path = '/mnt/ntr_work/project/nmd800/data/segmentation_card' | |
| create_index_elastic_segmentation(path) | |