Spaces:

lingyit1108
/

ragtest-sakimilo

Sleeping

File size: 2,210 Bytes

db694c4
 
 
 
 
 
 
 
 
 
 
b2b3b83
db694c4
b2b3b83
db694c4
 
 
b2b3b83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db694c4
b2b3b83
 
 
 
db694c4
b2b3b83
 
 
 
db694c4
b2b3b83
 
db694c4
b2b3b83
 
db694c4
b2b3b83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db694c4
b2b3b83

import utils
import os

import openai
from llama_index import SimpleDirectoryReader
from llama_index import Document
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.llms import OpenAI

from llama_index.embeddings import HuggingFaceEmbedding
from trulens_eval import Tru

from utils import get_prebuilt_trulens_recorder

openai.api_key = utils.get_openai_api_key()

def main():
    
    if not os.path.exists("./default.sqlite"):

        documents = SimpleDirectoryReader(
            input_files=["./raw_documents/HI_knowledge_base.pdf"]
        ).load_data()

        document = Document(text="\n\n".join([doc.text for doc in documents]))

        ### gpt-4-1106-preview
        ### gpt-3.5-turbo-1106 / gpt-3.5-turbo
        llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1)
        embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

        service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
        index = VectorStoreIndex.from_documents([document], service_context=service_context)

        query_engine = index.as_query_engine()

        separator = "\n\n"
        eval_questions = []
        with open('raw_documents/eval_questions.txt', 'r') as file:
            content = file.read()

        for question in content.split(separator):
            print(question)
            print(separator)
            eval_questions.append(question.strip())

        response = query_engine.query(eval_questions[0])
        print(str(response))

        tru = Tru()
        # tru.reset_database()

        tru_recorder = get_prebuilt_trulens_recorder(query_engine,
                                                    app_id="Direct Query Engine")
        with tru_recorder as recording:
            for question in eval_questions:
                response = query_engine.query(question)

        records, feedback = tru.get_records_and_feedback(app_ids=[])

        os.makedirs("results", exist_ok=True)
        records.to_csv("results/records.csv", index=False)

        print(tru.db.engine.url.render_as_string(hide_password=False))
        # tru.run_dashboard()

if __name__ == "__main__":

    main()