Spaces:
Sleeping
Sleeping
File size: 8,414 Bytes
d477d5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# TODO: create a csv parser
from __future__ import annotations
from ast import Lambda
import contextlib
import csv
from pathlib import Path
from typing import TYPE_CHECKING, Callable
import yaml
if TYPE_CHECKING:
from io import TextIOWrapper
class CsvParser:
def __init__(self, directory: str) -> None:
self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml"))
self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files"))
csv_files = Path(self.csv_directory).glob("*")
self.csv_file_paths = [file for file in csv_files if file.is_file()]
def csv_parser(self) -> None:
"""This is going to take in a big csv, split it, and put it in config.yaml"""
# This is going to parse multiple different csv files this time.
split_csv = {}
for csv_file in self.csv_file_paths:
with Path.open(csv_file, "r", newline="") as csvfile:
self.split_csv(csvfile, split_csv)
# split_csv should have all the information
yaml_data = yaml.safe_load(self.yaml_path.read_text())
# Rulesets CHANGE
try:
yaml_data["rulesets"] = self.csv_rulesets(
split_csv["Ruleset ID"]
) # Rulesets
except KeyError:
print("No rulesets")
# Agents DONE
try:
yaml_data["structures"] = self.csv_agents(
split_csv["Agent ID"]
) # Agent Definitions
except KeyError:
print("No structures")
# States
# Tailoring (affects the states section only) CHANGE
if "State ID to Tailor" in split_csv:
try:
yaml_data["states"] = self.csv_states(
split_csv["State ID"], # State Definitions
split_csv["State ID to Tailor"], # Agent Tailoring State ID
)
except KeyError:
print(" no states")
else:
try:
yaml_data["states"] = self.csv_states(
split_csv["State ID"], # State Definitions
[], # Agent Tailoring State ID
)
except KeyError:
print(" no states")
try:
yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"])
except KeyError:
print("no prompts")
# # Transitioning (affects event section) DONE
try:
yaml_data["events"] = self.csv_transition_id(
split_csv["Transition ID"]
) # State Transitions
except KeyError:
print("No transitions")
# That's all folks!
self.update_and_save(yaml_data)
def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None:
"""Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections.
Hooray!
"""
reader = csv.reader(csv_file)
# Get the header of the section
header = next(reader)
header = header[0] # Go to the meat of it (get rid of descriptive header)
current_information = []
for row in reader:
key = row[0]
# If the row is empty and/or has no value in the first column.
if key == ",,":
continue
current_information.append({key: row[1:]})
all_information[header] = current_information
def csv_kbs(self, kb_info: list) -> dict:
dictionary = {}
for row in kb_info:
key, value = row.popitem()
if key and value[0] and value[1]:
dictionary[key] = {"file_path": value[0], "file_type": value[1]}
return dictionary
def csv_rulesets(self, ruleset_info: list) -> dict:
dictionary = {}
for row in ruleset_info:
key, value = row.popitem()
if key and value[0] and value[1]:
rules = [
rule.strip().strip('"').lstrip("- ")
for rule in value[1].split("\n")
if rule.strip()
]
dictionary[key] = {
"name": value[0],
"rules": rules,
} # Will have to check this.
return dictionary
def csv_prompts(self, prompt_info: list) -> dict:
dictionary = {}
for row in prompt_info:
key, value = row.popitem()
if key and value[0]:
dictionary[key] = {"prompt": value[0]}
if value[1]:
dictionary[key]["author_intent"] = value[1]
return dictionary
def csv_agents(self, agent_info: list) -> dict:
dictionary = {}
for row in agent_info:
key, value = row.popitem()
if key:
ruleset_ids = []
if value[0]:
ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")]
config = {
"model": "gpt-4o",
"ruleset_ids": ruleset_ids,
}
# If there is a global KB used
if value[1]:
config["vector_stores"] = [value[1]]
# If there is a global prompt used (can be overrided by state specfic)
if value[2]:
config["prompt_id"] = value[2]
# If there is a model override
if value[4]:
config["model"] = value[4]
dictionary[key] = config
return dictionary
def csv_states(self, state_info: list, tailor_info: list) -> dict:
states = {}
for row in state_info:
key, value = row.popitem()
if not key:
continue
if key == "start":
states[key] = {"initial": True}
elif key == "end":
states[key] = {"final": True}
else:
states[key] = {}
if value[0] and value[0] != "none":
agent_list = {name.strip(): {} for name in value[0].split(",")}
states[key]["structures"] = agent_list
for row in tailor_info:
tailor, value = row.popitem()
if not tailor:
continue
structures = (
states[tailor]["structures"]
if tailor in states and "structures" in states[tailor]
else {}
)
structure = value
structure_name = structure[0]
# if ruleset
try:
structure_ruleset = structure[1]
structure_ruleset_list = []
for item in structure_ruleset.split(","):
if item.strip() != "":
structure_ruleset_list.append(item.strip())
if len(structure_ruleset_list):
structures[structure_name] = {
"ruleset_ids": structure_ruleset_list,
}
except KeyError:
structures[structure_name] = {}
try:
if structure[2]:
structures[structure_name]["prompt_id"] = structure[2]
except KeyError:
pass
states[tailor] = {"structures": structures}
return states
def csv_transition_id(self, transition_info: list) -> dict:
events = {}
for row in transition_info:
key, value = row.popitem()
if key and value[0] and value[1]:
if key in events:
# Add the transition if there already are transitions
events[key]["transitions"].append(
{"from": value[0], "to": value[1]}
)
else:
# create the first transition
events[key] = {
"transitions": [
{"from": value[0], "to": value[1]},
]
}
return events
def update_and_save(self, config: dict) -> None:
with self.yaml_path.open("w") as file:
yaml.dump(config, file, default_flow_style=False, line_break="\n")
if __name__ == "__main__":
CsvParser("uw_programmatic").csv_parser()
|