# TODO: create a csv parser from __future__ import annotations from ast import Lambda import contextlib import csv from pathlib import Path from typing import TYPE_CHECKING, Callable import yaml if TYPE_CHECKING: from io import TextIOWrapper class CsvParser: def __init__(self, directory: str) -> None: self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml")) self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files")) csv_files = Path(self.csv_directory).glob("*") self.csv_file_paths = [file for file in csv_files if file.is_file()] def csv_parser(self) -> None: """This is going to take in a big csv, split it, and put it in config.yaml""" # This is going to parse multiple different csv files this time. split_csv = {} for csv_file in self.csv_file_paths: with Path.open(csv_file, "r", newline="") as csvfile: self.split_csv(csvfile, split_csv) # split_csv should have all the information yaml_data = yaml.safe_load(self.yaml_path.read_text()) # Rulesets CHANGE try: yaml_data["rulesets"] = self.csv_rulesets( split_csv["Ruleset ID"] ) # Rulesets except KeyError: print("No rulesets") # Agents DONE try: yaml_data["structures"] = self.csv_agents( split_csv["Agent ID"] ) # Agent Definitions except KeyError: print("No structures") # States # Tailoring (affects the states section only) CHANGE if "State ID to Tailor" in split_csv: try: yaml_data["states"] = self.csv_states( split_csv["State ID"], # State Definitions split_csv["State ID to Tailor"], # Agent Tailoring State ID ) except KeyError: print(" no states") else: try: yaml_data["states"] = self.csv_states( split_csv["State ID"], # State Definitions [], # Agent Tailoring State ID ) except KeyError: print(" no states") try: yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"]) except KeyError: print("no prompts") # # Transitioning (affects event section) DONE try: yaml_data["events"] = self.csv_transition_id( split_csv["Transition ID"] ) # State Transitions except KeyError: print("No transitions") # That's all folks! self.update_and_save(yaml_data) def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None: """Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections. Hooray! """ reader = csv.reader(csv_file) # Get the header of the section header = next(reader) header = header[0] # Go to the meat of it (get rid of descriptive header) current_information = [] for row in reader: key = row[0] # If the row is empty and/or has no value in the first column. if key == ",,": continue current_information.append({key: row[1:]}) all_information[header] = current_information def csv_kbs(self, kb_info: list) -> dict: dictionary = {} for row in kb_info: key, value = row.popitem() if key and value[0] and value[1]: dictionary[key] = {"file_path": value[0], "file_type": value[1]} return dictionary def csv_rulesets(self, ruleset_info: list) -> dict: dictionary = {} for row in ruleset_info: key, value = row.popitem() if key and value[0] and value[1]: rules = [ rule.strip().strip('"').lstrip("- ") for rule in value[1].split("\n") if rule.strip() ] dictionary[key] = { "name": value[0], "rules": rules, } # Will have to check this. return dictionary def csv_prompts(self, prompt_info: list) -> dict: dictionary = {} for row in prompt_info: key, value = row.popitem() if key and value[0]: dictionary[key] = {"prompt": value[0]} if value[1]: dictionary[key]["author_intent"] = value[1] return dictionary def csv_agents(self, agent_info: list) -> dict: dictionary = {} for row in agent_info: key, value = row.popitem() if key: ruleset_ids = [] if value[0]: ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")] config = { "model": "gpt-4o", "ruleset_ids": ruleset_ids, } # If there is a global KB used if value[1]: config["vector_stores"] = [value[1]] # If there is a global prompt used (can be overrided by state specfic) if value[2]: config["prompt_id"] = value[2] # If there is a model override if value[4]: config["model"] = value[4] dictionary[key] = config return dictionary def csv_states(self, state_info: list, tailor_info: list) -> dict: states = {} for row in state_info: key, value = row.popitem() if not key: continue if key == "start": states[key] = {"initial": True} elif key == "end": states[key] = {"final": True} else: states[key] = {} if value[0] and value[0] != "none": agent_list = {name.strip(): {} for name in value[0].split(",")} states[key]["structures"] = agent_list for row in tailor_info: tailor, value = row.popitem() if not tailor: continue structures = ( states[tailor]["structures"] if tailor in states and "structures" in states[tailor] else {} ) structure = value structure_name = structure[0] # if ruleset try: structure_ruleset = structure[1] structure_ruleset_list = [] for item in structure_ruleset.split(","): if item.strip() != "": structure_ruleset_list.append(item.strip()) if len(structure_ruleset_list): structures[structure_name] = { "ruleset_ids": structure_ruleset_list, } except KeyError: structures[structure_name] = {} try: if structure[2]: structures[structure_name]["prompt_id"] = structure[2] except KeyError: pass states[tailor] = {"structures": structures} return states def csv_transition_id(self, transition_info: list) -> dict: events = {} for row in transition_info: key, value = row.popitem() if key and value[0] and value[1]: if key in events: # Add the transition if there already are transitions events[key]["transitions"].append( {"from": value[0], "to": value[1]} ) else: # create the first transition events[key] = { "transitions": [ {"from": value[0], "to": value[1]}, ] } return events def update_and_save(self, config: dict) -> None: with self.yaml_path.open("w") as file: yaml.dump(config, file, default_flow_style=False, line_break="\n") if __name__ == "__main__": CsvParser("uw_programmatic").csv_parser()