import os

from bs4 import BeautifulSoup
from document_qa.grobid_processors import get_xml_nodes_body, get_xml_nodes_figures, get_xml_nodes_header
from tests.resources import TEST_DATA_PATH


def test_get_xml_nodes_body_paragraphs():
    with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    nodes = get_xml_nodes_body(soup, use_paragraphs=True)

    assert len(nodes) == 70


def test_get_xml_nodes_body_sentences():
    with open(os.path.join(TEST_DATA_PATH, "2312.07559.sentences.tei.xml"), 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_body(soup, use_paragraphs=False)

    assert len(children) == 327


def test_get_xml_nodes_figures():
    with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_figures(soup)

    assert len(children) == 13


def test_get_xml_nodes_header_paragraphs():
    with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_header(soup)

    assert sum([len(child) for k, child in children.items()]) == 8


def test_get_xml_nodes_header_sentences():
    with open(os.path.join(TEST_DATA_PATH, "2312.07559.sentences.tei.xml"), 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_header(soup, use_paragraphs=False)

    assert sum([len(child) for k, child in children.items()]) == 15