Spaces:

griptape
/

uw-teaching-effectiveness

Running

App Files Files

kateforsberg commited on Jan 3

Commit

d477d5c

1 Parent(s): 29e76cb

first commit

Browse files

Files changed (16) hide show

.gitignore +69 -0
LICENSE +201 -0
app.py +99 -0
griptape_statemachine/__init__.py +0 -0
griptape_statemachine/parsers/__init__.py +5 -0
griptape_statemachine/parsers/base_parser.py +11 -0
griptape_statemachine/parsers/uw_config_parser.py +107 -0
griptape_statemachine/parsers/uw_csv_parser.py +226 -0
poetry.lock +0 -0
pyproject.toml +75 -0
requirements.txt +11 -0
uw_programmatic/__init__.py +0 -0
uw_programmatic/base_machine.py +496 -0
uw_programmatic/config.yaml +119 -0
uw_programmatic/question_pipeline.py +300 -0
uw_programmatic/uw_machine.py +265 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,69 @@

+.env
+.idea
+.DS_Store
+.huskyrc.json
+out
+log.log
+**/node_modules
+*.pyc
+*.vsix
+**/.vscode/.ropeproject/**
+**/testFiles/**/.cache/**
+*.noseids
+.nyc_output
+.vscode-test
+__pycache__
+npm-debug.log
+**/.mypy_cache/**
+!yarn.lock
+cucumber-report.json
+**/.vscode-test/**
+**/.vscode test/**
+**/.vscode-smoke/**
+**/.venv*/
+port.txt
+precommit.hook
+pythonFiles/lib/**
+debug_coverage*/**
+languageServer/**
+languageServer.*/**
+bin/**
+obj/**
+.pytest_cache
+tmp/**
+.python-version
+.vs/
+test-results*.xml
+xunit-test-results.xml
+build/ci/performance/performance-results.json
+!build/
+debug*.log
+debugpy*.log
+pydevd*.log
+nodeLanguageServer/**
+nodeLanguageServer.*/**
+dist/**
+*.egg-info
+# translation files
+*.xlf
+*.nls.*.json
+*.i18n.json
+# asdf
+.tool-versions
+# mkdocs build output
+site
+reference
+# coverage.py
+htmlcov/
+coverage.*
+# knowledge base material
+uw_machines/relevant_knowledge/
+#Outputs from the runs
+outputs/professor_guide.xlsx
+outputs/similarity_step.csv

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from __future__ import annotations
+from pathlib import Path
+import gradio as gr
+from typing import Any, Callable
+import contextvars
+from uw_programmatic.uw_machine import UWMachine
+def run_with_context(func: Callable) -> Callable:
+    ctx = contextvars.copy_context()
+    def wrapper(*args, **kwargs) -> Any:
+        return ctx.run(func, *args, **kwargs)
+    return wrapper
+def generate_questions(
+    page_lower, page_higher, question_number, taxonomy
+) -> tuple[str, dict[str, Any]]:
+    if machine.value and machine.value.current_state_value == "start":
+        machine.value.start_machine()  # Start the machine!
+    if not question_number or question_number <= 0:
+        msg = "Choose a valid question number."
+        raise gr.Error(msg)
+    if not page_lower or not page_higher or page_higher < page_lower:
+        msg = "Choose a valid page range."
+        raise gr.Error(msg)
+    if page_higher - page_lower <= 6:
+        msg = "Page range must be >6."
+        raise gr.Error(msg)
+    if not taxonomy or len(taxonomy) == 0:
+        msg = "Choose at least one taxonomy."
+        raise gr.Error(msg)
+    machine.value.send(
+        "process_event",
+        event_={
+            "type": "user_input",
+            "value": {
+                "page_range": (page_lower, page_higher),
+                "question_number": question_number,
+                "taxonomy": taxonomy,
+            },
+        },
+    )
+    return (
+        "## Questions Ready for Download Below",
+        gr.update(
+            visible=True, value=f"{Path.cwd().joinpath('outputs/professor_guide.xlsx')}"
+        ),
+    )
+def create_statemachine() -> None:
+    # Creates GoapMachine from the config.yaml in current directory
+    cwd_path = Path.cwd() / "uw_programmatic"
+    config_path = cwd_path.joinpath(Path("config.yaml"))
+    try:
+        machine.value = UWMachine.from_config_file(config_path)
+    except Exception as e:
+        raise gr.Error(str(e)) from e
+with gr.Blocks() as demo:
+    gr.Markdown("# UW Quiz Generator")
+    machine = gr.State(value=None)
+    with gr.Row():
+        with gr.Column(scale=2):
+            taxonomy = gr.CheckboxGroup(
+                choices=["Knowledge", "Comprehension", "Application"],
+                label="Taxonomy",
+                value="Knowledge",
+            )
+            question_number = gr.Number(
+                minimum=1, maximum=15, label="Number of Questions", value=3
+            )
+            gr.Markdown("For Chapter 3 - Pages 88-309")
+            with gr.Row():
+                page_lower = gr.Number(
+                    label="First Page", minimum=88, value=88, maximum=309
+                )
+                page_higher = gr.Number(
+                    label="Last Page", minimum=88, value=309, maximum=309
+                )
+            start_button = gr.Button(value="Generate Questions")
+        with gr.Column(scale=1):
+            output = gr.Markdown("## Questions Not Ready for Download", visible=True)
+            download_professor = gr.DownloadButton(
+                label="Download Questions", visible=False
+            )
+    create_statemachine()
+    start_button.click(
+        fn=run_with_context(generate_questions),
+        inputs=[page_lower, page_higher, question_number, taxonomy],
+        outputs=[output, download_professor],
+    )
+# TODO: Add a username and password here.
+demo.launch()

griptape_statemachine/__init__.py ADDED Viewed

File without changes

griptape_statemachine/parsers/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .base_parser import BaseParser
+from .uw_config_parser import UWConfigParser
+from .uw_csv_parser import CsvParser
+__all__ = ["UWConfigParser", "BaseParser", "CsvParser"]

griptape_statemachine/parsers/base_parser.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from abc import ABC, abstractmethod
+from pathlib import Path
+from attrs import define, field
+@define()
+class BaseParser(ABC):
+    file_path: Path = field()
+    @abstractmethod
+    def parse(self) -> dict: ...

griptape_statemachine/parsers/uw_config_parser.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from __future__ import annotations
+import schema
+import yaml
+from attrs import define
+from yaml.resolver import Resolver
+from griptape_statemachine.parsers.base_parser import BaseParser
+STRUCTURE_SCHEMA = schema.Schema(
+    {
+        schema.Optional("model"): str,
+        schema.Optional("ruleset_ids"): [str],
+        schema.Optional("vector_stores"): [str],
+        schema.Optional("prompt_id"): str,
+    }
+)
+CONFIG_SCHEMA = schema.Schema(
+    {
+        "rulesets": schema.Schema(
+            {
+                str: schema.Schema(
+                    {
+                        "name": str,
+                        "rules": [str],
+                    }
+                )
+            }
+        ),
+        # Added for vector stores
+        schema.Optional("vector_stores"): schema.Schema(
+            {
+                str: schema.Schema(
+                    {
+                        "file_path": str,
+                        "file_type": str,
+                        schema.Optional("max_tokens"): int,
+                    }
+                )
+            }
+        ),
+        "structures": schema.Schema({str: STRUCTURE_SCHEMA}),
+        "events": schema.Schema(
+            {
+                str: schema.Schema(
+                    {
+                        "transitions": [
+                            schema.Schema(
+                                {
+                                    "from": str,
+                                    "to": str,
+                                    schema.Optional("internal"): bool,
+                                    schema.Optional("on"): str,
+                                    schema.Optional("relevance"): str,
+                                }
+                            )
+                        ],
+                    }
+                )
+            }
+        ),
+        "states": schema.Schema(
+            {
+                str: schema.Schema(
+                    {
+                        schema.Optional(
+                            schema.Or("initial", "final")
+                        ): bool,  # pyright: ignore[reportArgumentType]
+                        schema.Optional("structures"): schema.Schema(
+                            {str: STRUCTURE_SCHEMA}
+                        ),
+                    }
+                )
+            }
+        ),
+        schema.Optional("prompts"): {
+            str: {schema.Optional("author_intent"): str, "prompt": str}
+        },
+    }
+)
+@define()
+class UWConfigParser(BaseParser):
+    def __attrs_post_init__(self) -> None:
+        # remove resolver entries for On/Off/Yes/No
+        for ch in "OoYyNn":
+            if ch in Resolver.yaml_implicit_resolvers:
+                if len(Resolver.yaml_implicit_resolvers[ch]) == 1:
+                    del Resolver.yaml_implicit_resolvers[ch]
+                else:
+                    Resolver.yaml_implicit_resolvers[ch] = [
+                        x
+                        for x in Resolver.yaml_implicit_resolvers[ch]
+                        if x[0] != "tag:yaml.org,2002:bool"
+                    ]
+    def parse(self) -> dict:
+        data = yaml.safe_load(self.file_path.read_text())
+        CONFIG_SCHEMA.validate(data)
+        return data
+    def update_and_save(self, config: dict) -> None:
+        with self.file_path.open("w") as file:
+            yaml.dump(config, file, default_flow_style=False, line_break="\n")

griptape_statemachine/parsers/uw_csv_parser.py ADDED Viewed

	@@ -0,0 +1,226 @@

+# TODO: create a csv parser
+from __future__ import annotations
+from ast import Lambda
+import contextlib
+import csv
+from pathlib import Path
+from typing import TYPE_CHECKING, Callable
+import yaml
+if TYPE_CHECKING:
+    from io import TextIOWrapper
+class CsvParser:
+    def __init__(self, directory: str) -> None:
+        self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml"))
+        self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files"))
+        csv_files = Path(self.csv_directory).glob("*")
+        self.csv_file_paths = [file for file in csv_files if file.is_file()]
+    def csv_parser(self) -> None:
+        """This is going to take in a big csv, split it, and put it in config.yaml"""
+        # This is going to parse multiple different csv files this time.
+        split_csv = {}
+        for csv_file in self.csv_file_paths:
+            with Path.open(csv_file, "r", newline="") as csvfile:
+                self.split_csv(csvfile, split_csv)
+        # split_csv should have all the information
+        yaml_data = yaml.safe_load(self.yaml_path.read_text())
+        # Rulesets CHANGE
+        try:
+            yaml_data["rulesets"] = self.csv_rulesets(
+                split_csv["Ruleset ID"]
+            )  # Rulesets
+        except KeyError:
+            print("No rulesets")
+        # Agents DONE
+        try:
+            yaml_data["structures"] = self.csv_agents(
+                split_csv["Agent ID"]
+            )  # Agent Definitions
+        except KeyError:
+            print("No structures")
+        # States
+        # Tailoring (affects the states section only) CHANGE
+        if "State ID to Tailor" in split_csv:
+            try:
+                yaml_data["states"] = self.csv_states(
+                    split_csv["State ID"],  # State Definitions
+                    split_csv["State ID to Tailor"],  # Agent Tailoring State ID
+                )
+            except KeyError:
+                print(" no states")
+        else:
+            try:
+                yaml_data["states"] = self.csv_states(
+                    split_csv["State ID"],  # State Definitions
+                    [],  # Agent Tailoring State ID
+                )
+            except KeyError:
+                print(" no states")
+        try:
+            yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"])
+        except KeyError:
+            print("no prompts")
+        # # Transitioning (affects event section) DONE
+        try:
+            yaml_data["events"] = self.csv_transition_id(
+                split_csv["Transition ID"]
+            )  # State Transitions
+        except KeyError:
+            print("No transitions")
+        # That's all folks!
+        self.update_and_save(yaml_data)
+    def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None:
+        """Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections.
+        Hooray!
+        """
+        reader = csv.reader(csv_file)
+        # Get the header of the section
+        header = next(reader)
+        header = header[0]  # Go to the meat of it (get rid of descriptive header)
+        current_information = []
+        for row in reader:
+            key = row[0]
+            # If the row is empty and/or has no value in the first column.
+            if key == ",,":
+                continue
+            current_information.append({key: row[1:]})
+        all_information[header] = current_information
+    def csv_kbs(self, kb_info: list) -> dict:
+        dictionary = {}
+        for row in kb_info:
+            key, value = row.popitem()
+            if key and value[0] and value[1]:
+                dictionary[key] = {"file_path": value[0], "file_type": value[1]}
+        return dictionary
+    def csv_rulesets(self, ruleset_info: list) -> dict:
+        dictionary = {}
+        for row in ruleset_info:
+            key, value = row.popitem()
+            if key and value[0] and value[1]:
+                rules = [
+                    rule.strip().strip('"').lstrip("- ")
+                    for rule in value[1].split("\n")
+                    if rule.strip()
+                ]
+                dictionary[key] = {
+                    "name": value[0],
+                    "rules": rules,
+                }  # Will have to check this.
+        return dictionary
+    def csv_prompts(self, prompt_info: list) -> dict:
+        dictionary = {}
+        for row in prompt_info:
+            key, value = row.popitem()
+            if key and value[0]:
+                dictionary[key] = {"prompt": value[0]}
+                if value[1]:
+                    dictionary[key]["author_intent"] = value[1]
+        return dictionary
+    def csv_agents(self, agent_info: list) -> dict:
+        dictionary = {}
+        for row in agent_info:
+            key, value = row.popitem()
+            if key:
+                ruleset_ids = []
+                if value[0]:
+                    ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")]
+                config = {
+                    "model": "gpt-4o",
+                    "ruleset_ids": ruleset_ids,
+                }
+                # If there is a global KB used
+                if value[1]:
+                    config["vector_stores"] = [value[1]]
+                # If there is a global prompt used (can be overrided by state specfic)
+                if value[2]:
+                    config["prompt_id"] = value[2]
+                # If there is a model override
+                if value[4]:
+                    config["model"] = value[4]
+                dictionary[key] = config
+        return dictionary
+    def csv_states(self, state_info: list, tailor_info: list) -> dict:
+        states = {}
+        for row in state_info:
+            key, value = row.popitem()
+            if not key:
+                continue
+            if key == "start":
+                states[key] = {"initial": True}
+            elif key == "end":
+                states[key] = {"final": True}
+            else:
+                states[key] = {}
+            if value[0] and value[0] != "none":
+                agent_list = {name.strip(): {} for name in value[0].split(",")}
+                states[key]["structures"] = agent_list
+        for row in tailor_info:
+            tailor, value = row.popitem()
+            if not tailor:
+                continue
+            structures = (
+                states[tailor]["structures"]
+                if tailor in states and "structures" in states[tailor]
+                else {}
+            )
+            structure = value
+            structure_name = structure[0]
+            # if ruleset
+            try:
+                structure_ruleset = structure[1]
+                structure_ruleset_list = []
+                for item in structure_ruleset.split(","):
+                    if item.strip() != "":
+                        structure_ruleset_list.append(item.strip())
+                if len(structure_ruleset_list):
+                    structures[structure_name] = {
+                        "ruleset_ids": structure_ruleset_list,
+                    }
+            except KeyError:
+                structures[structure_name] = {}
+            try:
+                if structure[2]:
+                    structures[structure_name]["prompt_id"] = structure[2]
+            except KeyError:
+                pass
+            states[tailor] = {"structures": structures}
+        return states
+    def csv_transition_id(self, transition_info: list) -> dict:
+        events = {}
+        for row in transition_info:
+            key, value = row.popitem()
+            if key and value[0] and value[1]:
+                if key in events:
+                    # Add the transition if there already are transitions
+                    events[key]["transitions"].append(
+                        {"from": value[0], "to": value[1]}
+                    )
+                else:
+                    # create the first transition
+                    events[key] = {
+                        "transitions": [
+                            {"from": value[0], "to": value[1]},
+                        ]
+                    }
+        return events
+    def update_and_save(self, config: dict) -> None:
+        with self.yaml_path.open("w") as file:
+            yaml.dump(config, file, default_flow_style=False, line_break="\n")
+if __name__ == "__main__":
+    CsvParser("uw_programmatic").csv_parser()

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,75 @@

+[tool.poetry]
+name = "griptape_statemachine"
+version = "0.1.0"
+description = ""
+authors = ["Collin Dutter <[email protected]>", "Kate Forsberg <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.11"
+griptape = "1.0"
+python-statemachine = {extras = ["diagrams"], version = "^2.3.6"}
+pyyaml = "^6.0.2"
+schema = "^0.7.7"
+python-dotenv = "^1.0.1"
+graphviz = "^0.20.3"
+gradio = "^5.6.0"
+pydot = "^3.0.2"
+pypdf = "^5.1.0"
+PyPDF2 = "^2.2.0"
+xlsxwriter = "3.2.0"
+[tool.poetry.group.dev.dependencies]
+pyright = "^1.1.380"
+ruff = "^0.6.4"
+typos = "^1.25.0"
+pre-commit = "^3.8.0"
+[tool.poetry.group.test.dependencies]
+pytest = "^8.3.3"
+[tool.ruff]
+line-length = 120
+[tool.ruff.lint]
+select = [
+   "ALL"
+]
+ignore = [
+   "D",
+   "COM812", # missing-trailing-comma -- See https://github.com/astral-sh/ruff/issues/9216
+   "ANN003",
+   'T201',
+   "TD",
+   "FIX",
+   "E501"
+]
+[tool.ruff.lint.per-file-ignores]
+"tests/*.py" = ["S101"]
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+[tool.ruff.lint.flake8-pytest-style]
+fixture-parentheses = true
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"attr".msg = "The attr module is deprecated, use attrs instead."
+[tool.pyright]
+venvPath = "."
+venv = ".venv"
+include = [
+   "griptape_statemachine"
+]
+exclude = [
+    "**/__pycache__",
+]
+pythonVersion = "3.11"
+enableExperimentalFeatures = true
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+griptape==1.0
+python-statemachine[diagrams]==2.3.6
+pyyaml==6.0.2
+schema==0.7.7
+python-dotenv==1.0.1
+graphviz==0.20.3
+gradio==5.6.0
+pydot==3.0.2
+pypdf==5.1.0
+PyPDF2==2.2.0
+xlsxwriter==3.2.0

uw_programmatic/__init__.py ADDED Viewed

File without changes

uw_programmatic/base_machine.py ADDED Viewed

	@@ -0,0 +1,496 @@

+from __future__ import annotations
+import json
+import logging
+import os
+import random
+from abc import abstractmethod
+from pathlib import Path
+from typing import TYPE_CHECKING, cast
+import requests
+from dotenv import load_dotenv
+from griptape.artifacts import ListArtifact, TextArtifact
+from griptape.configs import Defaults
+from griptape.configs.drivers import (
+    OpenAiDriversConfig,
+)
+from griptape.drivers import (
+    GriptapeCloudVectorStoreDriver,
+    LocalStructureRunDriver,
+    OpenAiChatPromptDriver,
+)
+from griptape.engines.rag import RagEngine
+from griptape.engines.rag.modules import (
+    TextChunksResponseRagModule,
+    VectorStoreRetrievalRagModule,
+)
+from griptape.engines.rag.stages import ResponseRagStage, RetrievalRagStage
+from griptape.events import (
+    BaseEvent,
+    EventBus,
+    EventListener,
+    FinishStructureRunEvent,
+)
+from griptape.memory.structure import ConversationMemory
+from griptape.rules import Rule, Ruleset
+from griptape.structures import Agent, Workflow
+from griptape.tasks import CodeExecutionTask, StructureRunTask, ToolTask
+from griptape.tools import RagTool
+from statemachine import State, StateMachine
+from statemachine.factory import StateMachineMetaclass
+from griptape_statemachine.parsers.uw_config_parser import UWConfigParser
+logger = logging.getLogger(__name__)
+logging.getLogger("griptape").setLevel(logging.ERROR)
+if TYPE_CHECKING:
+    from griptape.structures import Structure
+    from griptape.tools import BaseTool
+    from statemachine.event import Event
+load_dotenv()
+Defaults.drivers_config = OpenAiDriversConfig(
+    prompt_driver=OpenAiChatPromptDriver(model="gpt-4o", max_tokens=4096)
+)
+def custom_dict_merge(dict1: dict, dict2: dict) -> dict:
+    result = dict1.copy()
+    for key, value in dict2.items():
+        if key in result and isinstance(result[key], list) and isinstance(value, list):
+            result[key] = result[key] + value
+        else:
+            result[key] = value
+    return result
+class UWBaseMachine(StateMachine):
+    """Base class for a machine.
+    Attributes:
+        config_file (Path): The path to the configuration file.
+        config (dict): The configuration data.
+        outputs_to_user (list[str]): Outputs to return to the user.
+    """
+    def __init__(self, config_file: Path, **kwargs) -> None:
+        self.config_parser = UWConfigParser(config_file)
+        self.config = self.config_parser.parse()
+        self._structures = {}
+        self.vector_stores = {}  # Store here in case needs multiple uses
+        self.question_list: list = []
+        # For the parameters necessary from the user
+        self.page_range: tuple = ()
+        self.question_number: int = 0
+        self.taxonomy: list = []
+        self.state_status: dict[str, bool] = {}
+        for key in self.state_transitions:
+            self.state_status[key] = False
+        def on_event(event: BaseEvent) -> None:
+            """Takes in griptape events from eventbus and fixes them."""
+            print(f"Received Griptape event: {json.dumps(event.to_dict(), indent=2)}")
+            try:
+                self.send(
+                    "process_event",
+                    event_={"type": "griptape_event", "value": event.to_dict()},
+                )
+            except Exception as e:
+                errormsg = f"Would not allow process_event to be sent. Check to see if it is defined in the config.yaml. Error:{e}"
+                raise ValueError(errormsg) from e
+        EventBus.clear_event_listeners()
+        EventBus.add_event_listener(
+            EventListener(on_event, event_types=[FinishStructureRunEvent]),
+        )
+        super().__init__()
+    @property
+    def available_events(self) -> list[str]:
+        return self.current_state.transitions.unique_events
+    @property
+    @abstractmethod
+    def tools(self) -> dict[str, BaseTool]:
+        """Returns the Tools for the machine."""
+        ...
+    @property
+    def _current_state_config(self) -> dict:
+        return self.config["states"][self.current_state_value]
+    @classmethod
+    def from_definition(  # noqa: C901, PLR0912
+        cls, definition: dict, **extra_kwargs
+    ) -> UWBaseMachine:
+        try:
+            states_instances = {}
+            for state_id, state_kwargs in definition["states"].items():
+                # These are the relevant states that need GOAP.
+                states_instances[state_id] = State(**state_kwargs, value=state_id)
+        except Exception as e:
+            errormsg = f"""Error in state definition: {e}.
+            """
+            raise ValueError(errormsg) from e
+        events = {}
+        state_transitions = {}
+        for event_name, transitions in definition["events"].items():
+            for transition_data in transitions:
+                try:
+                    source_name = transition_data["from"]
+                    source = states_instances[source_name]
+                    target = states_instances[transition_data["to"]]
+                    relevance = ""
+                    if "relevance" in transition_data:
+                        relevance = transition_data["relevance"]
+                    if source_name not in state_transitions:
+                        state_transitions[source_name] = {event_name: relevance}
+                    else:
+                        state_transitions[source_name][event_name] = relevance
+                except Exception as e:
+                    errormsg = f"Error:{e}. Please check your transitions to be sure each transition has a source and destination."
+                    raise ValueError(errormsg) from e
+                transition = source.to(
+                    target,
+                    event=event_name,
+                    cond=transition_data.get("cond"),
+                    unless=transition_data.get("unless"),
+                    on=transition_data.get("on"),
+                    internal=transition_data.get("internal"),
+                )
+                if event_name in events:
+                    events[event_name] |= transition
+                else:
+                    events[event_name] = transition
+        for state_id, state in states_instances.items():
+            if state_id not in ("end", "start"):
+                transition = state.to(
+                    state,
+                    event="process_event",
+                    on=f"on_event_{state_id}",
+                    internal=True,
+                )
+                if "process_event" in events:
+                    events["process_event"] |= transition
+                else:
+                    events["process_event"] = transition
+        attrs_mapper = {
+            **extra_kwargs,
+            **states_instances,
+            **events,
+            "state_transitions": state_transitions,
+        }
+        return cast(
+            UWBaseMachine,
+            StateMachineMetaclass(cls.__name__, (cls,), attrs_mapper)(**extra_kwargs),
+        )
+    @classmethod
+    def from_config_file(
+        cls,
+        config_file: Path,
+        **extra_kwargs,
+    ) -> UWBaseMachine:
+        """Creates a StateMachine class from a configuration file"""
+        config_parser = UWConfigParser(config_file)
+        config = config_parser.parse()
+        extra_kwargs["config_file"] = config_file
+        definition_states = {
+            state_id: {
+                "initial": state_value.get("initial", False),
+                "final": state_value.get("final", False),
+            }
+            for state_id, state_value in config["states"].items()
+        }
+        definition_events = {
+            event_name: list(event_value["transitions"])
+            for event_name, event_value in config["events"].items()
+        }
+        definition = {"states": definition_states, "events": definition_events}
+        return cls.from_definition(definition, **extra_kwargs)
+    @abstractmethod
+    def start_machine(self) -> None:
+        """Starts the machine."""
+        ...
+    def reset_structures(self) -> None:
+        """Resets the structures."""
+        self._structures = {}
+    def on_enter_state(self, source: State, state: State, event: Event) -> None:
+        print(f"Transitioning from {source} to {state} with event {event}")
+    def get_structure(self, structure_id: str) -> Structure:
+        global_structure_config = self.config["structures"][structure_id]
+        state_structure_config = self._current_state_config.get("structures", {}).get(
+            structure_id, {}
+        )
+        structure_config = custom_dict_merge(
+            global_structure_config, state_structure_config
+        )
+        if structure_id not in self._structures:
+            # Initialize Structure with all the expensive setup
+            structure = Agent(
+                id=structure_id,
+                conversation_memory=ConversationMemory(),
+            )
+            self._structures[structure_id] = structure
+        # Create a new clone with state-specific stuff
+        structure = self._structures[structure_id]
+        structure = Agent(
+            id=structure.id,
+            prompt_driver=structure.prompt_driver,
+            conversation_memory=structure.conversation_memory,
+            rulesets=[
+                *self._get_structure_rulesets(structure_config.get("ruleset_ids", [])),
+            ],
+        )
+        print(f"Structure: {structure_id}")
+        for ruleset in structure.rulesets:
+            for rule in ruleset.rules:
+                print(f"Rule: {rule.value}")
+        return structure
+    def _get_structure_rulesets(self, ruleset_ids: list[str]) -> list[Ruleset]:
+        ruleset_configs = [
+            self.config["rulesets"][ruleset_id] for ruleset_id in ruleset_ids
+        ]
+        # Convert ruleset configs to Rulesets
+        return [
+            Ruleset(
+                name=ruleset_config["name"],
+                rules=[Rule(rule) for rule in ruleset_config["rules"]],
+            )
+            for ruleset_config in ruleset_configs
+        ]
+    def get_prompt_by_structure(self, structure_id: str) -> str | None:
+        try:
+            state_structure_config = self._current_state_config.get(
+                "structures", {}
+            ).get(structure_id, {})
+            global_structure_config = self.config["structures"][structure_id]
+        except KeyError:
+            return None
+        prompt_id = None
+        if "prompt_id" in global_structure_config:
+            prompt_id = global_structure_config["prompt_id"]
+        elif "prompt_id" in state_structure_config:
+            prompt_id = state_structure_config["prompt_id"]
+        else:
+            return None
+        return self.config["prompts"][prompt_id]["prompt"]
+    def get_prompt_by_id(self, prompt_id: str) -> str | None:
+        prompt_config = self.config["prompts"]
+        if prompt_id in prompt_config:
+            return prompt_config[prompt_id]["prompt"]
+        return None
+    # ALL METHODS RELATING TO THE WORKFLOW AND PIPELINE
+    def end_workflow(self, task: CodeExecutionTask) -> ListArtifact:
+        parent_outputs = task.parent_outputs
+        questions = []
+        for outputs in parent_outputs.values():
+            if outputs.type == "InfoArtifact":
+                continue
+            questions.append(outputs)
+        return ListArtifact(questions)
+    def get_questions_workflow(self) -> Workflow:
+        workflow = Workflow(id="create_question_workflow")
+        # How many questions still need to be created
+        for _ in range(self.question_number - len(self.question_list)):
+            task = StructureRunTask(
+                structure_run_driver=LocalStructureRunDriver(
+                    create_structure=self.get_single_question
+                ),
+                child_ids=["end_task"],
+            )
+            workflow.add_task(task)
+        end_task = CodeExecutionTask(id="end_task", on_run=self.end_workflow)
+        workflow.add_task(end_task)
+        return workflow
+    def single_question_last_task(self, task: CodeExecutionTask) -> TextArtifact:
+        parent_outputs = task.parent_outputs
+        wrong_answers = parent_outputs["wrong_answers"].value  # Output is a list
+        wrong_answers = wrong_answers.split("\n")
+        question_and_answer = parent_outputs["get_question"].value  # Output is a json
+        try:
+            question_and_answer = json.loads(question_and_answer)
+        except:
+            question_and_answer = question_and_answer.split("\n")[1:]
+            question_and_answer = "".join(question_and_answer)
+            question_and_answer = json.loads(question_and_answer)
+        inputs = task.input.value.split(",")
+        question = {
+            "Question": question_and_answer["Question"],
+            "Answer": question_and_answer["Answer"],
+            "Wrong Answers": wrong_answers,
+            "Page": inputs[0],
+            "Taxonomy": inputs[1],
+        }
+        return TextArtifact(question)
+    def get_question_for_wrong_answers(self, task: CodeExecutionTask) -> TextArtifact:
+        parent_outputs = task.parent_outputs
+        question = parent_outputs["get_question"].value
+        question = json.loads(question)["Question"]
+        return TextArtifact(question)
+    def get_separated_answer_for_wrong_answers(
+        self, task: CodeExecutionTask
+    ) -> TextArtifact:
+        parent_outputs = task.parent_outputs
+        answer = parent_outputs["get_question"].value
+        print(answer)
+        answer = json.loads(answer)["Answer"]
+        return TextArtifact(answer)
+    def make_rag_structure(
+        self, vector_store: GriptapeCloudVectorStoreDriver
+    ) -> Structure:
+        if vector_store:
+            tool = self.build_rag_tool(self.build_rag_engine(vector_store))
+            use_rag_task = ToolTask(tool=tool)
+            return Agent(tasks=[use_rag_task])
+        errormsg = "No Vector Store"
+        raise ValueError(errormsg)
+    def get_single_question(self) -> Workflow:
+        question_generator = Workflow(id="single_question")
+        taxonomy = random.choice(self.taxonomy)
+        taxonomyprompt = {
+            "Knowledge": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'define', 'list', 'state', 'identify', or 'label'.",
+            "Comprehension": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'explain', 'predict', 'interpret', 'infer', 'summarize', 'convert', or 'give an example of x'.",
+            "Application": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The structure of the question should be one of 'How could x be used to y?' or 'How would you show/make use of/modify/demonstrate/solve/apply x to conditions y?'",
+        }
+        pages, driver = self.get_vector_store_id_from_page()
+        get_information = StructureRunTask(
+            id="information_task",
+            input="What is the information in KB?",
+            structure_run_driver=LocalStructureRunDriver(
+                create_structure=lambda: self.make_rag_structure(driver)
+            ),
+            child_ids=["get_question"],
+        )
+        # Get KBs and select it, assign it to the structure or create the structure right here.
+        # Rules for subject matter expert: return only a json with question and answer as keys.
+        generate_q_task = StructureRunTask(
+            id="get_question",
+            input=taxonomyprompt[taxonomy],
+            structure_run_driver=LocalStructureRunDriver(
+                create_structure=lambda: self.get_structure("subject_matter_expert")
+            ),
+            parent_ids=["information_task"],
+        )
+        get_question_code_task = CodeExecutionTask(
+            id="get_only_question",
+            on_run=self.get_question_for_wrong_answers,
+            parent_ids=["get_question"],
+            child_ids=["wrong_answers"],
+        )
+        get_separated_answer_code_task = CodeExecutionTask(
+            id="get_separated_answer",
+            on_run=self.get_separated_answer_for_wrong_answers,
+            parent_ids=["get_question"],
+            child_ids=["wrong_answers"],
+        )
+        generate_wrong_answers = StructureRunTask(
+            id="wrong_answers",
+            input="""Write and return three incorrect answers for this question: {{parent_outputs['get_separated_question']}}. The correct answer to the question is: {{parent_outputs['get_separated_answer']}}, and incorrect answers should have the same structure as this answer whilst still being incorrect. Use this information as context to write the incorrect answers: {{parent_outputs['information_task']}}""",
+            structure_run_driver=LocalStructureRunDriver(
+                create_structure=lambda: self.get_structure("wrong_answers_generator")
+            ),
+            parent_ids=["get_only_question", "information_task"],
+        )
+        compile_task = CodeExecutionTask(
+            id="compile_task",
+            input=f"{pages}, {taxonomy}",
+            on_run=self.single_question_last_task,
+            parent_ids=["wrong_answers", "get_question"],
+        )
+        question_generator.add_tasks(
+            get_information,
+            generate_q_task,
+            get_question_code_task,
+            get_separated_answer_code_task,
+            generate_wrong_answers,
+            compile_task,
+        )
+        return question_generator
+    def get_vector_store_id_from_page(
+        self,
+    ) -> tuple[str, GriptapeCloudVectorStoreDriver]:
+        base_url = "https://cloud.griptape.ai/api/"
+        kb_url = f"{base_url}/knowledge-bases"
+        headers = {"Authorization": f"Bearer {os.getenv('GT_CLOUD_API_KEY')}"}
+        # TODO: This needs to change when I have my own bucket. Right now, I'm doing the 10 most recently made KBs
+        response = requests.get(url=kb_url, headers=headers)
+        response.raise_for_status()
+        if response.status_code == 200:
+            data = response.json()
+            possible_kbs = {}
+            for kb in data["knowledge_bases"]:
+                name = kb["name"]
+                if "KB_section" not in name:
+                    continue
+                page_nums = name.split("p")[1:]
+                start_page = int(page_nums[0].split("-")[0])
+                end_page = int(page_nums[1])
+                if end_page <= self.page_range[1] and start_page >= self.page_range[0]:
+                    possible_kbs[kb["knowledge_base_id"]] = f"{start_page}-{end_page}"
+            kb_id = random.choice(list(possible_kbs.keys()))
+            page_value = possible_kbs[kb_id]  # TODO: This won't help at all actually
+            return page_value, GriptapeCloudVectorStoreDriver(
+                api_key=os.getenv("GT_CLOUD_API_KEY", ""),
+                knowledge_base_id=kb_id,
+            )
+        else:
+            raise ValueError(response.status_code)
+    def get_taxonomy_vs(self) -> GriptapeCloudVectorStoreDriver:
+        return GriptapeCloudVectorStoreDriver(
+            api_key=os.getenv("GT_CLOUD_API_KEY", ""),
+            knowledge_base_id="2c3a6f19-51a8-43c3-8445-c7fbe06bf460",
+        )
+    def build_rag_engine(
+        self, vector_store_driver: GriptapeCloudVectorStoreDriver
+    ) -> RagEngine:
+        return RagEngine(
+            retrieval_stage=RetrievalRagStage(
+                retrieval_modules=[
+                    VectorStoreRetrievalRagModule(
+                        vector_store_driver=vector_store_driver,
+                    )
+                ],
+            ),
+            response_stage=ResponseRagStage(
+                response_modules=[TextChunksResponseRagModule()]
+            ),
+        )
+    def build_rag_tool(self, engine: RagEngine) -> RagTool:
+        return RagTool(
+            description="Contains information about the textbook. Use it ONLY for context.",
+            rag_engine=engine,
+        )

uw_programmatic/config.yaml ADDED Viewed

	@@ -0,0 +1,119 @@

+events:
+  enter_first_state:
+    transitions:
+    - from: start
+      to: gather_parameters
+  finish_state:
+    transitions:
+    - from: evaluate_q_count
+      to: output_q
+  next_state:
+    transitions:
+    - from: gather_parameters
+      to: evaluate_q_count
+    - from: evaluate_q_count
+      to: need_more_q
+    - from: need_more_q
+      to: assess_generated_q
+    - from: assess_generated_q
+      to: evaluate_q_count
+    - from: output_q
+      to: gather_parameters
+  end_state:
+    transitions:
+    - from: output_q
+      to: end
+rulesets:
+  frame_question_best_practices:
+    name: Frame Question with KB
+    rules:
+    - '"Return a string with the reformmated question'
+    - '"No commentary, no code, no backticks'
+    - '"Use the information from your knowledge base'
+    - '"Do not change the content of the question'
+  incorrect_answers_creator:
+    name: Create Wrong Answers
+    rules:
+    - '"Return ONLY a list of 3 incorrect answers. No markdown, no commentary, no
+      backticks.'
+    - '"All incorrect answers should be different, but plausible answers to the question.'
+    - '"Incorrect answers may reference material from the knowledge base, but must
+      not be correct answers to the question'
+    - '"Length of incorrect answers should be 10 words max, 5 words minimum'
+  similarity_checker:
+    name: Check Similarity
+    rules:
+    - '''"you are adept at comparing questions to check whether they are similar'''
+    - '''"you will be given a list of questions. If two questions assess very similar
+      subjects in a very similar way, remove one of them from the list.'''
+    - '''"do not change anything else in the list.'''
+    - '''"output only the edited list.'''
+    - '''Return ONLY a json'''
+    - '''No markdown, no commentary, no code, no backticks.'''
+    - '"Use \" for quotes within the JSON'
+  specific_question_creator:
+    name: Create Question
+    rules:
+    - '"Return ONLY a json with ''Question'' and ''Answer'' as keys.'
+    - " No markdown, no commentary, no code, no backticks."
+    - '"Query to knowledge base should always be ''find information for quiz question'''
+    - '"Question should be a multiple choice quiz style question that assesses a student''s
+      knowledge of the information in the knowledge base (which should be referred
+      to as ''the textbook''). Answer should be a correct answer to the question that
+      uses information from the knowledge base. Do not return incorrect answers.'
+    - '"The length of the question should be 30 words at most.'
+    - '"Question should never reference or ask about an entire section, never reference
+      or ask about a quote in the knowledge base, never ask for the page number of
+      some information, and never ask for information about the file, document, or
+      knowledge base.'
+    - '"The answer to the question should be short, but should not omit important
+      information.'
+  taxonomy_prompter:
+    name: Decide Taxonomy
+    rules:
+    - '"behave as if you were a user asking an AI chatbot to generate a question for
+      you'
+states:
+  assess_generated_q:
+    structures:
+      Similarity_Auditor: {}
+  end:
+    final: true
+  evaluate_q_count: {}
+  gather_parameters: {}
+  need_more_q:
+    structures:
+      best_practices_expert: {}
+      subject_matter_expert: {}
+      taxonomy_expert: {}
+  output_q: {}
+  start:
+    initial: true
+structures:
+  best_practices_expert:
+    model: gpt-4o
+    prompt_id: best_practices_question
+    ruleset_ids:
+    - frame_question_best_practices
+    vector_stores:
+    - best_practices
+  similarity_auditor:
+    model: gpt-4o
+    prompt_id: similarity_auditor_prompt
+    ruleset_ids:
+    - similarity_checker
+  subject_matter_expert:
+    model: gpt-4o
+    prompt_id: scope_question_subject_expert
+    ruleset_ids:
+    - specific_question_creator
+  taxonomy_expert:
+    model: gpt-4o
+    prompt_id: scope_question_taxonomy
+    ruleset_ids:
+    - taxonomy_prompter
+  wrong_answers_generator:
+    model: gpt-4o
+    prompt_id: write_incorrect_answers
+    ruleset_ids:
+    - incorrect_answers_creator

uw_programmatic/question_pipeline.py ADDED Viewed

	@@ -0,0 +1,300 @@

+from __future__ import annotations
+import ast
+import json
+import os
+import random
+import logging
+import requests
+from dotenv import load_dotenv
+from griptape.artifacts import ListArtifact, TextArtifact
+from griptape.configs import Defaults
+from griptape.configs.drivers import OpenAiDriversConfig
+from griptape.drivers import (
+    LocalStructureRunDriver,
+    OpenAiChatPromptDriver,
+    GriptapeCloudVectorStoreDriver,
+)
+from griptape.artifacts import ListArtifact, TextArtifact
+from griptape.rules import Ruleset, Rule
+import json
+import requests
+import random
+import os
+from dotenv import load_dotenv
+from griptape.engines.rag import RagEngine
+from griptape.engines.rag.modules import (
+    VectorStoreRetrievalRagModule,
+    TextChunksResponseRagModule,
+)
+from griptape.engines.rag.stages import ResponseRagStage, RetrievalRagStage
+from griptape.tools import RagTool
+from griptape.configs.logging import TruncateLoggingFilter
+from griptape_statemachine.parsers.uw_csv_parser import CsvParser
+load_dotenv()
+# openai default config pass in a new openai driver
+Defaults.drivers_config = OpenAiDriversConfig(
+    prompt_driver=OpenAiChatPromptDriver(model="gpt-4o", max_tokens=4096)
+)
+# logger = logging.getLogger(Defaults.logging_config.logger_name)
+# logger.setLevel(logging.ERROR)
+# logger.addFilter(TruncateLoggingFilter(max_log_length=5000))
+# ALL METHODS RELATING TO THE WORKFLOW AND PIPELINE
+def end_workflow(task: CodeExecutionTask) -> ListArtifact:
+    parent_outputs = task.parent_outputs
+    questions = []
+    for output in parent_outputs.values():
+        output = output.value
+        try:
+            output = ast.literal_eval(output)
+            question = {output["Question"]: output}
+            questions.append(TextArtifact(question))
+        except SyntaxError:
+            pass
+    return ListArtifact(questions)
+def get_questions_workflow() -> Workflow:
+    workflow = Workflow(id="create_question_workflow")
+    # How many questions still need to be created
+    for _ in range(10):
+        task = StructureRunTask(
+            driver=LocalStructureRunDriver(create_structure=get_single_question),
+            child_ids=["end_task"],
+        )
+        workflow.add_task(task)
+    end_task = CodeExecutionTask(id="end_task", on_run=end_workflow)
+    workflow.add_task(end_task)
+    return workflow
+def single_question_last_task(task: CodeExecutionTask) -> TextArtifact:
+    parent_outputs = task.parent_outputs
+    print(f"PARENT OUTPUTS ARE: {parent_outputs}")
+    wrong_answers = parent_outputs["wrong_answers"].value  # Output is a list
+    wrong_answers = wrong_answers.split("\n")
+    question_and_answer = parent_outputs["get_question"].value  # Output is a json
+    question_and_answer = json.loads(question_and_answer)
+    inputs = task.input.value.split(",")
+    question = {
+        "Question": question_and_answer["Question"],
+        "Answer": question_and_answer["Answer"],
+        "Wrong Answers": wrong_answers,
+        "Page": int(inputs[0]),
+        "Taxonomy": inputs[1],
+    }
+    return TextArtifact(question)
+def get_question_for_wrong_answers(task: CodeExecutionTask) -> TextArtifact:
+    parent_outputs = task.parent_outputs
+    question = parent_outputs["get_question"].value
+    print(question)
+    question = json.loads(question)["Question"]
+    return TextArtifact(question)
+def get_single_question() -> Workflow:
+    question_generator = Workflow()
+    page_number = random.choice(list(range(1, 9)))
+    taxonomy = random.choice(["Knowledge", "Comprehension", "Application"])
+    taxonomyprompt = {
+        "Knowledge": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'define', 'list', 'state', 'identify', or 'label'.",
+        "Comprehension": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'explain', 'predict', 'interpret', 'infer', 'summarize', 'convert', or 'give an example of x'.",
+        "Application": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The structure of the question should be one of 'How could x be used to y?' or 'How would you show/make use of/modify/demonstrate/solve/apply x to conditions y?'",
+    }
+    # Get KBs and select it, assign it to the structure or create the structure right here.
+    # Rules for subject matter expert: return only a json with question and answer as keys.
+    generate_q_task = StructureRunTask(
+        id="get_question",
+        input=taxonomyprompt[taxonomy],
+        driver=LocalStructureRunDriver(
+            create_structure=lambda: get_structure("subject_matter_expert", page_number)
+        ),
+    )
+    get_question_code_task = CodeExecutionTask(
+        id="get_only_question",
+        on_run=get_question_for_wrong_answers,
+        parent_ids=["get_question"],
+        child_ids=["wrong_answers"],
+    )
+    # This will use the same KB as the previous task
+    generate_wrong_answers = StructureRunTask(
+        id="wrong_answers",
+        input="""Write and return three incorrect answers for this question: {{parent_outputs['get_only_question']}} with this context: {{parent_outputs['information_task']}}""",
+        structure_run_driver=LocalStructureRunDriver(
+            create_structure=lambda: get_structure("wrong_answers_generator")
+        ),
+        parent_ids=["get_only_question"],
+    )
+    compile_task = CodeExecutionTask(
+        id="compile_task",
+        input=f"{page_number}, {taxonomy})",
+        on_run=single_question_last_task,
+        parent_ids=["wrong_answers", "get_question"],
+    )
+    question_generator.add_tasks(
+        generate_q_task,
+        get_question_code_task,
+        generate_wrong_answers,
+        compile_task,
+    )
+    return question_generator
+def get_structure(structure_id: str, page_number=0) -> Structure:
+    match structure_id:
+        case "subject_matter_expert":
+            rulesets = Ruleset(
+                name="specific_question_creator",
+                rules=[
+                    Rule(
+                        "Return ONLY a json with 'Question' and 'Answer' as keys. No markdown, no commentary, no code, no backticks."
+                    ),
+                    Rule(
+                        "Query to knowledge base should always be 'find information for quiz question'"
+                    ),
+                    Rule("Use ONLY information from your knowledge base"),
+                    Rule(
+                        "Question should be a question based on the knowledge base. Answer should be from knowledge base."
+                    ),
+                    Rule(
+                        "The answer to the question should be short, but should not omit important information."
+                    ),
+                    Rule("Answer length should be 10 words maximum, 5 words minimum"),
+                ],
+            )
+            structure = Agent(
+                id="subject_matter_expert",
+                prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"),
+                rulesets=[rulesets],
+                tools=[tool],
+            )
+        case "taxonomy_expert":
+            rulesets = Ruleset(
+                name="KB Rules",
+                rules=[
+                    Rule(
+                        "Use only your knowledge base. Do not make up any additional information."
+                    ),
+                    Rule("Maximum 10 words."),
+                    Rule(
+                        "Return information an AI chatbot could use to write a question on a subject."
+                    ),
+                ],
+            )
+            kb_driver = get_taxonomy_vs()
+            tool = build_rag_tool(build_rag_engine(kb_driver))
+            structure = Agent(
+                id="taxonomy_expert",
+                prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"),
+                tools=[tool],
+            )
+        case "wrong_answers_generator":
+            rulesets = Ruleset(
+                name="incorrect_answers_creator",
+                rules=[
+                    Rule(
+                        "Return ONLY a list of 3 incorrect answers. No markdown, no commentary, no backticks."
+                    ),
+                    Rule(
+                        "All incorrect answers should be different, but plausible answers to the question."
+                    ),
+                    Rule(
+                        "Incorrect answers may reference material from the knowledge base, but must not be correct answers to the question"
+                    ),
+                    Rule(
+                        "Length of incorrect answers should be 10 words max, 5 words minimum"
+                    ),
+                ],
+            )
+            kb_driver = get_vector_store_id_from_page(page_number)
+            tool = build_rag_tool(build_rag_engine(kb_driver))
+            structure = Agent(
+                id="wrong_answers_generator",
+                prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"),
+                rulesets=[rulesets],
+                tools=[tool],
+            )
+        case _:
+            structure = Agent(prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"))
+    return structure
+def get_vector_store_id_from_page(page: int) -> GriptapeCloudVectorStoreDriver | None:
+    base_url = "https://cloud.griptape.ai/api/"
+    kb_url = f"{base_url}/knowledge-bases"
+    headers = {"Authorization": f"Bearer {os.getenv('GT_CLOUD_API_KEY')}"}
+    # TODO: This needs to change when I have my own bucket. Right now, I'm doing the 10 most recently made KBs
+    response = requests.get(url=kb_url, headers=headers)
+    response = requests.get(
+        url=kb_url,
+        headers=headers,
+    )
+    response.raise_for_status()
+    if response.status_code == 200:
+        data = response.json()
+        for kb in data["knowledge_bases"]:
+            name = kb["name"]
+            if "KB_section" not in name:
+                continue
+            page_nums = name.split("pg")[1].split("-")
+            start_page = int(page_nums[0])
+            end_page = int(page_nums[1])
+            if end_page <= 40 and start_page >= 1:
+                possible_kbs[kb["knowledge_base_id"]] = f"{start_page}-{end_page}"
+        kb_id = random.choice(list(possible_kbs.keys()))
+        page_value = possible_kbs[kb_id]
+        return page_value, GriptapeCloudVectorStoreDriver(
+            api_key=os.getenv("GT_CLOUD_API_KEY", ""),
+            knowledge_base_id=kb_id,
+        )
+    else:
+        raise ValueError(response.status_code)
+    return None
+def get_taxonomy_vs() -> GriptapeCloudVectorStoreDriver:
+    return GriptapeCloudVectorStoreDriver(
+        api_key=os.getenv("GT_CLOUD_API_KEY", ""),
+        knowledge_base_id="2c3a6f19-51a8-43c3-8445-c7fbe06bf460",
+    )
+def build_rag_engine(vector_store_driver) -> RagEngine:
+    return RagEngine(
+        retrieval_stage=RetrievalRagStage(
+            retrieval_modules=[
+                VectorStoreRetrievalRagModule(
+                    vector_store_driver=vector_store_driver,
+                    query_params={
+                        "count": 100,
+                    },
+                )
+            ],
+        ),
+        response_stage=ResponseRagStage(
+            response_modules=[TextChunksResponseRagModule()]
+        ),
+    )
+def build_rag_tool(engine) -> RagTool:
+    return RagTool(
+        description="Contains information about the textbook. Use it to answer any related questions.",
+        rag_engine=engine,
+    )
+if __name__ == "__main__":
+    # workflow = get_questions_workflow()
+    # workflow.run()
+    CsvParser("uw_programmatic").csv_parser()

uw_programmatic/uw_machine.py ADDED Viewed

	@@ -0,0 +1,265 @@

+from __future__ import annotations
+import ast
+import xlsxwriter
+import csv
+import json
+from pathlib import Path
+import random
+from typing import TYPE_CHECKING, Any
+from griptape.structures import Agent
+from h11 import Event
+import pandas as pd
+from base_machine import UWBaseMachine
+from griptape.events import EventBus, EventListener, FinishStructureRunEvent, BaseEvent
+from griptape.loaders import PdfLoader
+if TYPE_CHECKING:
+    from griptape.tools import BaseTool
+class UWMachine(UWBaseMachine):
+    """State machine with GOAP"""
+    @property
+    def tools(self) -> dict[str, BaseTool]:
+        return {}
+    def start_machine(self) -> None:
+        """Starts the machine."""
+        # Clear input history.
+        # Clear csv file
+        with Path(Path.cwd().joinpath("outputs/similarity_step.csv")).open("w") as file:
+            file.write("")
+        self.send("enter_first_state")
+    def on_event_gather_parameters(self, event_: dict) -> None:
+        event_source = event_["type"]
+        event_value = event_["value"]
+        match event_source:
+            case "user_input":
+                parameters = event_value
+                self.page_range = parameters["page_range"]
+                self.question_number = parameters["question_number"]
+                self.taxonomy = parameters["taxonomy"]
+                self.send("next_state")
+            case _:
+                err_msg = f"Unexpected Transition Event ID: {event_value}."
+                raise ValueError(err_msg)
+    def on_enter_evaluate_q_count(self) -> None:
+        if len(self.question_list) >= self.question_number:
+            self.send("finish_state")  # go to output questions
+        else:
+            self.send("next_state")  # go to need more questions
+    def on_event_evaluate_q_count(self, event_: dict) -> None:
+        pass
+    def on_enter_need_more_q(self) -> None:
+        # Create the entire workflow to create another question.
+        self.get_questions_workflow().run()
+    def on_event_need_more_q(self, event_: dict) -> None:
+        event_source = event_["type"]
+        event_value = event_["value"]
+        match event_source:
+            case "griptape_event":
+                event_type = event_value["type"]
+                match event_type:
+                    case "FinishStructureRunEvent":
+                        structure_id = event_value["structure_id"]
+                        match structure_id:
+                            case "create_question_workflow":
+                                values = event_value["output_task_output"]["value"]
+                                questions = [
+                                    ast.literal_eval(question["value"])
+                                    for question in values
+                                ]
+                                self.most_recent_questions = (
+                                    questions  # This is a ListArtifact I'm pretty sure
+                                )
+                                self.send("next_state")
+                    case _:
+                        print(f"Error:{event_} ")
+            case _:
+                print(f"Unexpected: {event_}")
+    def on_enter_assess_generated_q(self) -> None:
+        # TODO: Should it append it to the list already and remove duplicates? or not?
+        # TODO: Merge incoming lists
+        with Path(Path.cwd().joinpath("outputs/similarity_step.csv")).open(
+            "a", newline=""
+        ) as file:
+            writer = csv.DictWriter(
+                file,
+                fieldnames=[
+                    "Question",
+                    "Answer",
+                    "Wrong Answers",
+                    "Page",
+                    "Taxonomy",
+                ],
+            )
+            writer.writerow({"Question": "LIST OF QUESTIONS GENERATED THIS ROUND"})
+            writer.writerows(self.most_recent_questions)
+        merged_list = [*self.question_list, *self.most_recent_questions]
+        prompt = f"{merged_list}"
+        self.get_structure("similarity_auditor").run(prompt)
+    def on_event_assess_generated_q(self, event_: dict) -> None:
+        event_source = event_["type"]
+        event_value = event_["value"]
+        match event_source:
+            case "griptape_event":
+                event_type = event_value["type"]
+                match event_type:
+                    case "FinishStructureRunEvent":
+                        structure_id = event_value["structure_id"]
+                        match structure_id:
+                            case "similarity_auditor":
+                                new_question_list = event_value["output_task_output"][
+                                    "value"
+                                ]
+                                try:
+                                    new_question_list = json.loads(
+                                        new_question_list
+                                    )  # This must be in that JSON format
+                                except:
+                                    new_question_list = self.question_list
+                                merged_list = [
+                                    *self.question_list,
+                                    *self.most_recent_questions,
+                                ]
+                                deleted_q = [
+                                    question1
+                                    for question1 in merged_list
+                                    if not any(
+                                        question2["Question"] == question1["Question"]
+                                        for question2 in new_question_list
+                                    )
+                                ]
+                                with Path(
+                                    Path.cwd().joinpath("outputs/similarity_step.csv")
+                                ).open("a", newline="") as file:
+                                    writer = csv.DictWriter(
+                                        file,
+                                        fieldnames=[
+                                            "Question",
+                                            "Answer",
+                                            "Wrong Answers",
+                                            "Page",
+                                            "Taxonomy",
+                                        ],
+                                    )
+                                    writer.writerow(
+                                        {"Question": "QUESTIONS REMOVED THIS ROUND!"}
+                                    )
+                                    if len(deleted_q):
+                                        writer.writerows(deleted_q)
+                                    else:
+                                        writer.writerow({"Question": "No q removed"})
+                                self.question_list = new_question_list
+                                self.send("next_state")  # move on
+    def on_enter_output_q(self) -> None:
+        columns = pd.MultiIndex.from_tuples(
+            [
+                ("Professor", "Page Range"),
+                ("Professor", "Taxonomy"),
+                ("Professor", "Question"),
+                ("Professor", "Answer"),
+                ("Professor", "Wrong Answers"),
+                ("Student", "Question"),
+                ("Student", "Answers"),
+            ]
+        )
+        data = pd.DataFrame(columns=columns)
+        for question in range(len(self.question_list)):
+            shuffled_answers = [
+                self.question_list[question]["Answer"],
+                *self.question_list[question]["Wrong Answers"],
+            ]
+            random.shuffle(shuffled_answers)
+            shuffled_answers = "\n".join(shuffled_answers)
+            new_row = [
+                self.question_list[question]["Page"],
+                self.question_list[question]["Taxonomy"],
+                self.question_list[question]["Question"],
+                self.question_list[question]["Answer"],
+                self.question_list[question]["Wrong Answers"],
+                self.question_list[question]["Question"],
+                shuffled_answers,
+            ]
+            data.loc[question] = new_row
+        data.columns = ["_".join(col).strip() for col in data.columns.values]
+        writer = pd.ExcelWriter("outputs/professor_guide.xlsx", engine="xlsxwriter")
+        data.to_excel(writer, sheet_name="Quiz Questions", index=False)
+        writer.close()
+        self.send("next_state")
+    def on_event_output_q(self, event_: dict) -> None:
+        pass
+    def on_exit_output_q(self) -> None:
+        # Reset the state machine values
+        self.question_list = []
+        self.most_recent_questions = []
+    if __name__ == "__main__":
+        question_list = [
+            {
+                "Page": "1-2",
+                "Taxonomy": "Knowledge",
+                "Question": "What is Python?",
+                "Answer": "A programming language",
+                "Wrong Answers": ["A snake", "A car brand", "A fruit"],
+            },
+            {
+                "Page": "3-4",
+                "Taxonomy": "Comprehension",
+                "Question": "What does HTML stand for?",
+                "Answer": "HyperText Markup Language",
+                "Wrong Answers": [
+                    "High Text Machine Language",
+                    "Hyperlink Text Mode Language",
+                    "None of the above",
+                ],
+            },
+        ]
+        columns = pd.MultiIndex.from_tuples(
+            [
+                ("Professor", "Page Range"),
+                ("Professor", "Taxonomy"),
+                ("Professor", "Question"),
+                ("Professor", "Answer"),
+                ("Professor", "Wrong Answers"),
+                ("Student", "Question"),
+                ("Student", "Answers"),
+            ]
+        )
+        data = pd.DataFrame(columns=columns)
+        for question in range(len(question_list)):
+            shuffled_answers = [
+                question_list[question]["Answer"],
+                *question_list[question]["Wrong Answers"],
+            ]
+            random.shuffle(shuffled_answers)
+            shuffled_answers = "\n".join(shuffled_answers)
+            new_row = [
+                question_list[question]["Page"],
+                question_list[question]["Taxonomy"],
+                question_list[question]["Question"],
+                question_list[question]["Answer"],
+                question_list[question]["Wrong Answers"],
+                question_list[question]["Question"],
+                shuffled_answers,
+            ]
+            data.loc[question] = new_row
+        data.columns = ["_".join(col).strip() for col in data.columns.values]
+        writer = pd.ExcelWriter("outputs/professor_guide.xlsx", engine="xlsxwriter")
+        data.to_excel(writer, sheet_name="Quiz Questions", index=False)
+        writer.close()