Spaces:

griptape
/

uw-teaching-effectiveness

Sleeping

File size: 8,414 Bytes

d477d5c

# TODO: create a csv parser
from __future__ import annotations

from ast import Lambda
import contextlib
import csv
from pathlib import Path
from typing import TYPE_CHECKING, Callable

import yaml

if TYPE_CHECKING:
    from io import TextIOWrapper


class CsvParser:

    def __init__(self, directory: str) -> None:
        self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml"))
        self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files"))
        csv_files = Path(self.csv_directory).glob("*")
        self.csv_file_paths = [file for file in csv_files if file.is_file()]

    def csv_parser(self) -> None:
        """This is going to take in a big csv, split it, and put it in config.yaml"""
        # This is going to parse multiple different csv files this time.
        split_csv = {}
        for csv_file in self.csv_file_paths:
            with Path.open(csv_file, "r", newline="") as csvfile:
                self.split_csv(csvfile, split_csv)
        # split_csv should have all the information
        yaml_data = yaml.safe_load(self.yaml_path.read_text())
        # Rulesets CHANGE
        try:
            yaml_data["rulesets"] = self.csv_rulesets(
                split_csv["Ruleset ID"]
            )  # Rulesets
        except KeyError:
            print("No rulesets")
        # Agents DONE
        try:
            yaml_data["structures"] = self.csv_agents(
                split_csv["Agent ID"]
            )  # Agent Definitions
        except KeyError:
            print("No structures")
        # States
        # Tailoring (affects the states section only) CHANGE
        if "State ID to Tailor" in split_csv:
            try:
                yaml_data["states"] = self.csv_states(
                    split_csv["State ID"],  # State Definitions
                    split_csv["State ID to Tailor"],  # Agent Tailoring State ID
                )
            except KeyError:
                print(" no states")
        else:
            try:
                yaml_data["states"] = self.csv_states(
                    split_csv["State ID"],  # State Definitions
                    [],  # Agent Tailoring State ID
                )
            except KeyError:
                print(" no states")
        try:
            yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"])
        except KeyError:
            print("no prompts")
        # # Transitioning (affects event section) DONE
        try:
            yaml_data["events"] = self.csv_transition_id(
                split_csv["Transition ID"]
            )  # State Transitions
        except KeyError:
            print("No transitions")
        # That's all folks!
        self.update_and_save(yaml_data)

    def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None:
        """Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections.
        Hooray!
        """
        reader = csv.reader(csv_file)
        # Get the header of the section
        header = next(reader)
        header = header[0]  # Go to the meat of it (get rid of descriptive header)
        current_information = []
        for row in reader:
            key = row[0]
            # If the row is empty and/or has no value in the first column.
            if key == ",,":
                continue
            current_information.append({key: row[1:]})
        all_information[header] = current_information

    def csv_kbs(self, kb_info: list) -> dict:
        dictionary = {}
        for row in kb_info:
            key, value = row.popitem()
            if key and value[0] and value[1]:
                dictionary[key] = {"file_path": value[0], "file_type": value[1]}
        return dictionary

    def csv_rulesets(self, ruleset_info: list) -> dict:
        dictionary = {}
        for row in ruleset_info:
            key, value = row.popitem()
            if key and value[0] and value[1]:
                rules = [
                    rule.strip().strip('"').lstrip("- ")
                    for rule in value[1].split("\n")
                    if rule.strip()
                ]
                dictionary[key] = {
                    "name": value[0],
                    "rules": rules,
                }  # Will have to check this.
        return dictionary

    def csv_prompts(self, prompt_info: list) -> dict:
        dictionary = {}
        for row in prompt_info:
            key, value = row.popitem()
            if key and value[0]:
                dictionary[key] = {"prompt": value[0]}
                if value[1]:
                    dictionary[key]["author_intent"] = value[1]
        return dictionary

    def csv_agents(self, agent_info: list) -> dict:
        dictionary = {}
        for row in agent_info:
            key, value = row.popitem()
            if key:
                ruleset_ids = []
                if value[0]:
                    ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")]
                config = {
                    "model": "gpt-4o",
                    "ruleset_ids": ruleset_ids,
                }
                # If there is a global KB used
                if value[1]:
                    config["vector_stores"] = [value[1]]
                # If there is a global prompt used (can be overrided by state specfic)
                if value[2]:
                    config["prompt_id"] = value[2]
                # If there is a model override
                if value[4]:
                    config["model"] = value[4]
                dictionary[key] = config
        return dictionary

    def csv_states(self, state_info: list, tailor_info: list) -> dict:
        states = {}
        for row in state_info:
            key, value = row.popitem()
            if not key:
                continue
            if key == "start":
                states[key] = {"initial": True}
            elif key == "end":
                states[key] = {"final": True}
            else:
                states[key] = {}
            if value[0] and value[0] != "none":
                agent_list = {name.strip(): {} for name in value[0].split(",")}
                states[key]["structures"] = agent_list
        for row in tailor_info:
            tailor, value = row.popitem()
            if not tailor:
                continue
            structures = (
                states[tailor]["structures"]
                if tailor in states and "structures" in states[tailor]
                else {}
            )
            structure = value
            structure_name = structure[0]
            # if ruleset
            try:
                structure_ruleset = structure[1]
                structure_ruleset_list = []
                for item in structure_ruleset.split(","):
                    if item.strip() != "":
                        structure_ruleset_list.append(item.strip())
                if len(structure_ruleset_list):
                    structures[structure_name] = {
                        "ruleset_ids": structure_ruleset_list,
                    }
            except KeyError:
                structures[structure_name] = {}
            try:
                if structure[2]:
                    structures[structure_name]["prompt_id"] = structure[2]
            except KeyError:
                pass
            states[tailor] = {"structures": structures}
        return states

    def csv_transition_id(self, transition_info: list) -> dict:
        events = {}
        for row in transition_info:
            key, value = row.popitem()
            if key and value[0] and value[1]:
                if key in events:
                    # Add the transition if there already are transitions
                    events[key]["transitions"].append(
                        {"from": value[0], "to": value[1]}
                    )
                else:
                    # create the first transition
                    events[key] = {
                        "transitions": [
                            {"from": value[0], "to": value[1]},
                        ]
                    }
        return events

    def update_and_save(self, config: dict) -> None:
        with self.yaml_path.open("w") as file:
            yaml.dump(config, file, default_flow_style=False, line_break="\n")


if __name__ == "__main__":
    CsvParser("uw_programmatic").csv_parser()