kateforsberg's picture
first commit
d477d5c
raw
history blame
8.41 kB
# TODO: create a csv parser
from __future__ import annotations
from ast import Lambda
import contextlib
import csv
from pathlib import Path
from typing import TYPE_CHECKING, Callable
import yaml
if TYPE_CHECKING:
from io import TextIOWrapper
class CsvParser:
def __init__(self, directory: str) -> None:
self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml"))
self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files"))
csv_files = Path(self.csv_directory).glob("*")
self.csv_file_paths = [file for file in csv_files if file.is_file()]
def csv_parser(self) -> None:
"""This is going to take in a big csv, split it, and put it in config.yaml"""
# This is going to parse multiple different csv files this time.
split_csv = {}
for csv_file in self.csv_file_paths:
with Path.open(csv_file, "r", newline="") as csvfile:
self.split_csv(csvfile, split_csv)
# split_csv should have all the information
yaml_data = yaml.safe_load(self.yaml_path.read_text())
# Rulesets CHANGE
try:
yaml_data["rulesets"] = self.csv_rulesets(
split_csv["Ruleset ID"]
) # Rulesets
except KeyError:
print("No rulesets")
# Agents DONE
try:
yaml_data["structures"] = self.csv_agents(
split_csv["Agent ID"]
) # Agent Definitions
except KeyError:
print("No structures")
# States
# Tailoring (affects the states section only) CHANGE
if "State ID to Tailor" in split_csv:
try:
yaml_data["states"] = self.csv_states(
split_csv["State ID"], # State Definitions
split_csv["State ID to Tailor"], # Agent Tailoring State ID
)
except KeyError:
print(" no states")
else:
try:
yaml_data["states"] = self.csv_states(
split_csv["State ID"], # State Definitions
[], # Agent Tailoring State ID
)
except KeyError:
print(" no states")
try:
yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"])
except KeyError:
print("no prompts")
# # Transitioning (affects event section) DONE
try:
yaml_data["events"] = self.csv_transition_id(
split_csv["Transition ID"]
) # State Transitions
except KeyError:
print("No transitions")
# That's all folks!
self.update_and_save(yaml_data)
def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None:
"""Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections.
Hooray!
"""
reader = csv.reader(csv_file)
# Get the header of the section
header = next(reader)
header = header[0] # Go to the meat of it (get rid of descriptive header)
current_information = []
for row in reader:
key = row[0]
# If the row is empty and/or has no value in the first column.
if key == ",,":
continue
current_information.append({key: row[1:]})
all_information[header] = current_information
def csv_kbs(self, kb_info: list) -> dict:
dictionary = {}
for row in kb_info:
key, value = row.popitem()
if key and value[0] and value[1]:
dictionary[key] = {"file_path": value[0], "file_type": value[1]}
return dictionary
def csv_rulesets(self, ruleset_info: list) -> dict:
dictionary = {}
for row in ruleset_info:
key, value = row.popitem()
if key and value[0] and value[1]:
rules = [
rule.strip().strip('"').lstrip("- ")
for rule in value[1].split("\n")
if rule.strip()
]
dictionary[key] = {
"name": value[0],
"rules": rules,
} # Will have to check this.
return dictionary
def csv_prompts(self, prompt_info: list) -> dict:
dictionary = {}
for row in prompt_info:
key, value = row.popitem()
if key and value[0]:
dictionary[key] = {"prompt": value[0]}
if value[1]:
dictionary[key]["author_intent"] = value[1]
return dictionary
def csv_agents(self, agent_info: list) -> dict:
dictionary = {}
for row in agent_info:
key, value = row.popitem()
if key:
ruleset_ids = []
if value[0]:
ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")]
config = {
"model": "gpt-4o",
"ruleset_ids": ruleset_ids,
}
# If there is a global KB used
if value[1]:
config["vector_stores"] = [value[1]]
# If there is a global prompt used (can be overrided by state specfic)
if value[2]:
config["prompt_id"] = value[2]
# If there is a model override
if value[4]:
config["model"] = value[4]
dictionary[key] = config
return dictionary
def csv_states(self, state_info: list, tailor_info: list) -> dict:
states = {}
for row in state_info:
key, value = row.popitem()
if not key:
continue
if key == "start":
states[key] = {"initial": True}
elif key == "end":
states[key] = {"final": True}
else:
states[key] = {}
if value[0] and value[0] != "none":
agent_list = {name.strip(): {} for name in value[0].split(",")}
states[key]["structures"] = agent_list
for row in tailor_info:
tailor, value = row.popitem()
if not tailor:
continue
structures = (
states[tailor]["structures"]
if tailor in states and "structures" in states[tailor]
else {}
)
structure = value
structure_name = structure[0]
# if ruleset
try:
structure_ruleset = structure[1]
structure_ruleset_list = []
for item in structure_ruleset.split(","):
if item.strip() != "":
structure_ruleset_list.append(item.strip())
if len(structure_ruleset_list):
structures[structure_name] = {
"ruleset_ids": structure_ruleset_list,
}
except KeyError:
structures[structure_name] = {}
try:
if structure[2]:
structures[structure_name]["prompt_id"] = structure[2]
except KeyError:
pass
states[tailor] = {"structures": structures}
return states
def csv_transition_id(self, transition_info: list) -> dict:
events = {}
for row in transition_info:
key, value = row.popitem()
if key and value[0] and value[1]:
if key in events:
# Add the transition if there already are transitions
events[key]["transitions"].append(
{"from": value[0], "to": value[1]}
)
else:
# create the first transition
events[key] = {
"transitions": [
{"from": value[0], "to": value[1]},
]
}
return events
def update_and_save(self, config: dict) -> None:
with self.yaml_path.open("w") as file:
yaml.dump(config, file, default_flow_style=False, line_break="\n")
if __name__ == "__main__":
CsvParser("uw_programmatic").csv_parser()