Spaces:
Running
Running
File size: 6,020 Bytes
b580d80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import utils
import os
import numpy as np
import nest_asyncio
import openai
import chromadb
from llama_index.legacy import (
VectorStoreIndex,
SimpleDirectoryReader
)
from llama_index.core import (
StorageContext,
Document,
Settings
)
from llama_index.vector_stores.chroma.base import ChromaVectorStore
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding
from trulens_eval import Tru
from utils import get_prebuilt_trulens_recorder
import time
nest_asyncio.apply()
openai.api_key = utils.get_openai_api_key()
def main():
if not os.path.exists("./default.sqlite"):
start_time = time.time()
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
fine_tuned_path = "local:./models/fine-tuned-embeddings"
Settings.llm = llm
Settings.embed_model = fine_tuned_path
db = chromadb.PersistentClient(path="./models/chroma_db")
chroma_collection = db.get_or_create_collection("quickstart")
# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# create your index
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store,
storage_context=storage_context
)
query_engine = index.as_query_engine()
separator = "\n\n"
eval_questions = []
with open('./raw_documents/eval_questions.txt', 'r') as file:
content = file.read()
for question in content.split(separator):
print(question)
print(separator)
eval_questions.append(question.strip())
response = query_engine.query(eval_questions[0])
print(str(response))
tru = Tru(database_file="./models/trulens_eval.sqlite")
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
app_id="Direct Query Engine")
print("Sending each question to llm ..")
with tru_recorder as recording:
for question in eval_questions:
response = query_engine.query(question)
records, feedback = tru.get_records_and_feedback(app_ids=[])
os.makedirs("./results", exist_ok=True)
records.to_csv("./results/records.csv", index=False)
print(tru.db.engine.url.render_as_string(hide_password=False))
end_time = time.time()
time_spent_mins = (end_time - start_time) / 60
with open("./results/time_cost.txt", "w") as fp:
fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")
if __name__ == "__main__":
# main()
if False:
start_time = time.time()
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
fine_tuned_path = "local:./models/fine-tuned-embeddings"
Settings.llm = llm
Settings.embed_model = fine_tuned_path
db = chromadb.PersistentClient(path="./models/chroma_db")
chroma_collection = db.get_or_create_collection("quickstart")
# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# create your index
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store,
storage_context=storage_context
)
query_engine = index.as_query_engine()
separator = "\n\n"
eval_questions = []
with open('./raw_documents/eval_questions.txt', 'r') as file:
content = file.read()
for question in content.split(separator):
print(question)
print(separator)
eval_questions.append(question.strip())
response = query_engine.query(eval_questions[0])
print(str(response))
from trulens_eval import Tru
tru = Tru()
documents = SimpleDirectoryReader(
input_files=["./raw_documents/qna.txt"]
).load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("Which is not a government healthcare philosophy?")
print(response)
from trulens_eval.feedback.provider.openai import OpenAI
openai = OpenAI()
# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(query_engine)
from trulens_eval import Feedback
# Define a groundedness feedback function
from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
f_groundedness = (
Feedback(grounded.groundedness_measure_with_cot_reasons)
.on(context.collect()) # collect context chunks into a list
.on_output()
.aggregate(grounded.grounded_statements_aggregator)
)
# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_qs_relevance = (
Feedback(openai.qs_relevance)
.on_input()
.on(context)
.aggregate(np.mean)
)
from trulens_eval import TruLlama
tru_query_engine_recorder = TruLlama(query_engine,
app_id='LlamaIndex_App1',
feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])
if False:
# or as context manager
with tru_query_engine_recorder as recording:
query_engine.query("Which of the following is TRUE on the similarity of Means Testing and Casemix?") |