Spaces:
Sleeping
Sleeping
# TODO: create a csv parser | |
from __future__ import annotations | |
from ast import Lambda | |
import contextlib | |
import csv | |
from pathlib import Path | |
from typing import TYPE_CHECKING, Callable | |
import yaml | |
if TYPE_CHECKING: | |
from io import TextIOWrapper | |
class CsvParser: | |
def __init__(self, directory: str) -> None: | |
self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml")) | |
self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files")) | |
csv_files = Path(self.csv_directory).glob("*") | |
self.csv_file_paths = [file for file in csv_files if file.is_file()] | |
def csv_parser(self) -> None: | |
"""This is going to take in a big csv, split it, and put it in config.yaml""" | |
# This is going to parse multiple different csv files this time. | |
split_csv = {} | |
for csv_file in self.csv_file_paths: | |
with Path.open(csv_file, "r", newline="") as csvfile: | |
self.split_csv(csvfile, split_csv) | |
# split_csv should have all the information | |
yaml_data = yaml.safe_load(self.yaml_path.read_text()) | |
# Rulesets CHANGE | |
try: | |
yaml_data["rulesets"] = self.csv_rulesets( | |
split_csv["Ruleset ID"] | |
) # Rulesets | |
except KeyError: | |
print("No rulesets") | |
# Agents DONE | |
try: | |
yaml_data["structures"] = self.csv_agents( | |
split_csv["Agent ID"] | |
) # Agent Definitions | |
except KeyError: | |
print("No structures") | |
# States | |
# Tailoring (affects the states section only) CHANGE | |
if "State ID to Tailor" in split_csv: | |
try: | |
yaml_data["states"] = self.csv_states( | |
split_csv["State ID"], # State Definitions | |
split_csv["State ID to Tailor"], # Agent Tailoring State ID | |
) | |
except KeyError: | |
print(" no states") | |
else: | |
try: | |
yaml_data["states"] = self.csv_states( | |
split_csv["State ID"], # State Definitions | |
[], # Agent Tailoring State ID | |
) | |
except KeyError: | |
print(" no states") | |
try: | |
yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"]) | |
except KeyError: | |
print("no prompts") | |
# # Transitioning (affects event section) DONE | |
try: | |
yaml_data["events"] = self.csv_transition_id( | |
split_csv["Transition ID"] | |
) # State Transitions | |
except KeyError: | |
print("No transitions") | |
# That's all folks! | |
self.update_and_save(yaml_data) | |
def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None: | |
"""Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections. | |
Hooray! | |
""" | |
reader = csv.reader(csv_file) | |
# Get the header of the section | |
header = next(reader) | |
header = header[0] # Go to the meat of it (get rid of descriptive header) | |
current_information = [] | |
for row in reader: | |
key = row[0] | |
# If the row is empty and/or has no value in the first column. | |
if key == ",,": | |
continue | |
current_information.append({key: row[1:]}) | |
all_information[header] = current_information | |
def csv_kbs(self, kb_info: list) -> dict: | |
dictionary = {} | |
for row in kb_info: | |
key, value = row.popitem() | |
if key and value[0] and value[1]: | |
dictionary[key] = {"file_path": value[0], "file_type": value[1]} | |
return dictionary | |
def csv_rulesets(self, ruleset_info: list) -> dict: | |
dictionary = {} | |
for row in ruleset_info: | |
key, value = row.popitem() | |
if key and value[0] and value[1]: | |
rules = [ | |
rule.strip().strip('"').lstrip("- ") | |
for rule in value[1].split("\n") | |
if rule.strip() | |
] | |
dictionary[key] = { | |
"name": value[0], | |
"rules": rules, | |
} # Will have to check this. | |
return dictionary | |
def csv_prompts(self, prompt_info: list) -> dict: | |
dictionary = {} | |
for row in prompt_info: | |
key, value = row.popitem() | |
if key and value[0]: | |
dictionary[key] = {"prompt": value[0]} | |
if value[1]: | |
dictionary[key]["author_intent"] = value[1] | |
return dictionary | |
def csv_agents(self, agent_info: list) -> dict: | |
dictionary = {} | |
for row in agent_info: | |
key, value = row.popitem() | |
if key: | |
ruleset_ids = [] | |
if value[0]: | |
ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")] | |
config = { | |
"model": "gpt-4o", | |
"ruleset_ids": ruleset_ids, | |
} | |
# If there is a global KB used | |
if value[1]: | |
config["vector_stores"] = [value[1]] | |
# If there is a global prompt used (can be overrided by state specfic) | |
if value[2]: | |
config["prompt_id"] = value[2] | |
# If there is a model override | |
if value[4]: | |
config["model"] = value[4] | |
dictionary[key] = config | |
return dictionary | |
def csv_states(self, state_info: list, tailor_info: list) -> dict: | |
states = {} | |
for row in state_info: | |
key, value = row.popitem() | |
if not key: | |
continue | |
if key == "start": | |
states[key] = {"initial": True} | |
elif key == "end": | |
states[key] = {"final": True} | |
else: | |
states[key] = {} | |
if value[0] and value[0] != "none": | |
agent_list = {name.strip(): {} for name in value[0].split(",")} | |
states[key]["structures"] = agent_list | |
for row in tailor_info: | |
tailor, value = row.popitem() | |
if not tailor: | |
continue | |
structures = ( | |
states[tailor]["structures"] | |
if tailor in states and "structures" in states[tailor] | |
else {} | |
) | |
structure = value | |
structure_name = structure[0] | |
# if ruleset | |
try: | |
structure_ruleset = structure[1] | |
structure_ruleset_list = [] | |
for item in structure_ruleset.split(","): | |
if item.strip() != "": | |
structure_ruleset_list.append(item.strip()) | |
if len(structure_ruleset_list): | |
structures[structure_name] = { | |
"ruleset_ids": structure_ruleset_list, | |
} | |
except KeyError: | |
structures[structure_name] = {} | |
try: | |
if structure[2]: | |
structures[structure_name]["prompt_id"] = structure[2] | |
except KeyError: | |
pass | |
states[tailor] = {"structures": structures} | |
return states | |
def csv_transition_id(self, transition_info: list) -> dict: | |
events = {} | |
for row in transition_info: | |
key, value = row.popitem() | |
if key and value[0] and value[1]: | |
if key in events: | |
# Add the transition if there already are transitions | |
events[key]["transitions"].append( | |
{"from": value[0], "to": value[1]} | |
) | |
else: | |
# create the first transition | |
events[key] = { | |
"transitions": [ | |
{"from": value[0], "to": value[1]}, | |
] | |
} | |
return events | |
def update_and_save(self, config: dict) -> None: | |
with self.yaml_path.open("w") as file: | |
yaml.dump(config, file, default_flow_style=False, line_break="\n") | |
if __name__ == "__main__": | |
CsvParser("uw_programmatic").csv_parser() | |