File size: 687 Bytes
cfd3735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from langchain.retrievers.tfidf import TFIDFRetriever


def test_from_texts() -> None:
    input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
    tfidf_retriever = TFIDFRetriever.from_texts(texts=input_texts)
    assert len(tfidf_retriever.docs) == 3
    assert tfidf_retriever.tfidf_array.toarray().shape == (3, 5)


def test_from_texts_with_tfidf_params() -> None:
    input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
    tfidf_retriever = TFIDFRetriever.from_texts(
        texts=input_texts, tfidf_params={"min_df": 2}
    )
    # should count only multiple words (have, pan)
    assert tfidf_retriever.tfidf_array.toarray().shape == (3, 2)