Spaces:
Running
Running
Commit
·
d477d5c
1
Parent(s):
29e76cb
first commit
Browse files- .gitignore +69 -0
- LICENSE +201 -0
- app.py +99 -0
- griptape_statemachine/__init__.py +0 -0
- griptape_statemachine/parsers/__init__.py +5 -0
- griptape_statemachine/parsers/base_parser.py +11 -0
- griptape_statemachine/parsers/uw_config_parser.py +107 -0
- griptape_statemachine/parsers/uw_csv_parser.py +226 -0
- poetry.lock +0 -0
- pyproject.toml +75 -0
- requirements.txt +11 -0
- uw_programmatic/__init__.py +0 -0
- uw_programmatic/base_machine.py +496 -0
- uw_programmatic/config.yaml +119 -0
- uw_programmatic/question_pipeline.py +300 -0
- uw_programmatic/uw_machine.py +265 -0
.gitignore
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
.idea
|
3 |
+
.DS_Store
|
4 |
+
.huskyrc.json
|
5 |
+
out
|
6 |
+
log.log
|
7 |
+
**/node_modules
|
8 |
+
*.pyc
|
9 |
+
*.vsix
|
10 |
+
**/.vscode/.ropeproject/**
|
11 |
+
**/testFiles/**/.cache/**
|
12 |
+
*.noseids
|
13 |
+
.nyc_output
|
14 |
+
.vscode-test
|
15 |
+
__pycache__
|
16 |
+
npm-debug.log
|
17 |
+
**/.mypy_cache/**
|
18 |
+
!yarn.lock
|
19 |
+
cucumber-report.json
|
20 |
+
**/.vscode-test/**
|
21 |
+
**/.vscode test/**
|
22 |
+
**/.vscode-smoke/**
|
23 |
+
**/.venv*/
|
24 |
+
port.txt
|
25 |
+
precommit.hook
|
26 |
+
pythonFiles/lib/**
|
27 |
+
debug_coverage*/**
|
28 |
+
languageServer/**
|
29 |
+
languageServer.*/**
|
30 |
+
bin/**
|
31 |
+
obj/**
|
32 |
+
.pytest_cache
|
33 |
+
tmp/**
|
34 |
+
.python-version
|
35 |
+
.vs/
|
36 |
+
test-results*.xml
|
37 |
+
xunit-test-results.xml
|
38 |
+
build/ci/performance/performance-results.json
|
39 |
+
!build/
|
40 |
+
debug*.log
|
41 |
+
debugpy*.log
|
42 |
+
pydevd*.log
|
43 |
+
nodeLanguageServer/**
|
44 |
+
nodeLanguageServer.*/**
|
45 |
+
dist/**
|
46 |
+
*.egg-info
|
47 |
+
|
48 |
+
# translation files
|
49 |
+
*.xlf
|
50 |
+
*.nls.*.json
|
51 |
+
*.i18n.json
|
52 |
+
|
53 |
+
# asdf
|
54 |
+
.tool-versions
|
55 |
+
|
56 |
+
# mkdocs build output
|
57 |
+
site
|
58 |
+
reference
|
59 |
+
|
60 |
+
# coverage.py
|
61 |
+
htmlcov/
|
62 |
+
coverage.*
|
63 |
+
|
64 |
+
# knowledge base material
|
65 |
+
uw_machines/relevant_knowledge/
|
66 |
+
|
67 |
+
#Outputs from the runs
|
68 |
+
outputs/professor_guide.xlsx
|
69 |
+
outputs/similarity_step.csv
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
app.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
from pathlib import Path
|
3 |
+
import gradio as gr
|
4 |
+
from typing import Any, Callable
|
5 |
+
import contextvars
|
6 |
+
from uw_programmatic.uw_machine import UWMachine
|
7 |
+
|
8 |
+
|
9 |
+
def run_with_context(func: Callable) -> Callable:
|
10 |
+
ctx = contextvars.copy_context()
|
11 |
+
|
12 |
+
def wrapper(*args, **kwargs) -> Any:
|
13 |
+
return ctx.run(func, *args, **kwargs)
|
14 |
+
|
15 |
+
return wrapper
|
16 |
+
|
17 |
+
|
18 |
+
def generate_questions(
|
19 |
+
page_lower, page_higher, question_number, taxonomy
|
20 |
+
) -> tuple[str, dict[str, Any]]:
|
21 |
+
if machine.value and machine.value.current_state_value == "start":
|
22 |
+
machine.value.start_machine() # Start the machine!
|
23 |
+
if not question_number or question_number <= 0:
|
24 |
+
msg = "Choose a valid question number."
|
25 |
+
raise gr.Error(msg)
|
26 |
+
if not page_lower or not page_higher or page_higher < page_lower:
|
27 |
+
msg = "Choose a valid page range."
|
28 |
+
raise gr.Error(msg)
|
29 |
+
if page_higher - page_lower <= 6:
|
30 |
+
msg = "Page range must be >6."
|
31 |
+
raise gr.Error(msg)
|
32 |
+
if not taxonomy or len(taxonomy) == 0:
|
33 |
+
msg = "Choose at least one taxonomy."
|
34 |
+
raise gr.Error(msg)
|
35 |
+
machine.value.send(
|
36 |
+
"process_event",
|
37 |
+
event_={
|
38 |
+
"type": "user_input",
|
39 |
+
"value": {
|
40 |
+
"page_range": (page_lower, page_higher),
|
41 |
+
"question_number": question_number,
|
42 |
+
"taxonomy": taxonomy,
|
43 |
+
},
|
44 |
+
},
|
45 |
+
)
|
46 |
+
return (
|
47 |
+
"## Questions Ready for Download Below",
|
48 |
+
gr.update(
|
49 |
+
visible=True, value=f"{Path.cwd().joinpath('outputs/professor_guide.xlsx')}"
|
50 |
+
),
|
51 |
+
)
|
52 |
+
|
53 |
+
|
54 |
+
def create_statemachine() -> None:
|
55 |
+
# Creates GoapMachine from the config.yaml in current directory
|
56 |
+
cwd_path = Path.cwd() / "uw_programmatic"
|
57 |
+
config_path = cwd_path.joinpath(Path("config.yaml"))
|
58 |
+
try:
|
59 |
+
machine.value = UWMachine.from_config_file(config_path)
|
60 |
+
except Exception as e:
|
61 |
+
raise gr.Error(str(e)) from e
|
62 |
+
|
63 |
+
|
64 |
+
with gr.Blocks() as demo:
|
65 |
+
gr.Markdown("# UW Quiz Generator")
|
66 |
+
machine = gr.State(value=None)
|
67 |
+
with gr.Row():
|
68 |
+
with gr.Column(scale=2):
|
69 |
+
taxonomy = gr.CheckboxGroup(
|
70 |
+
choices=["Knowledge", "Comprehension", "Application"],
|
71 |
+
label="Taxonomy",
|
72 |
+
value="Knowledge",
|
73 |
+
)
|
74 |
+
question_number = gr.Number(
|
75 |
+
minimum=1, maximum=15, label="Number of Questions", value=3
|
76 |
+
)
|
77 |
+
gr.Markdown("For Chapter 3 - Pages 88-309")
|
78 |
+
with gr.Row():
|
79 |
+
page_lower = gr.Number(
|
80 |
+
label="First Page", minimum=88, value=88, maximum=309
|
81 |
+
)
|
82 |
+
page_higher = gr.Number(
|
83 |
+
label="Last Page", minimum=88, value=309, maximum=309
|
84 |
+
)
|
85 |
+
start_button = gr.Button(value="Generate Questions")
|
86 |
+
|
87 |
+
with gr.Column(scale=1):
|
88 |
+
output = gr.Markdown("## Questions Not Ready for Download", visible=True)
|
89 |
+
download_professor = gr.DownloadButton(
|
90 |
+
label="Download Questions", visible=False
|
91 |
+
)
|
92 |
+
create_statemachine()
|
93 |
+
start_button.click(
|
94 |
+
fn=run_with_context(generate_questions),
|
95 |
+
inputs=[page_lower, page_higher, question_number, taxonomy],
|
96 |
+
outputs=[output, download_professor],
|
97 |
+
)
|
98 |
+
# TODO: Add a username and password here.
|
99 |
+
demo.launch()
|
griptape_statemachine/__init__.py
ADDED
File without changes
|
griptape_statemachine/parsers/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .base_parser import BaseParser
|
2 |
+
from .uw_config_parser import UWConfigParser
|
3 |
+
from .uw_csv_parser import CsvParser
|
4 |
+
|
5 |
+
__all__ = ["UWConfigParser", "BaseParser", "CsvParser"]
|
griptape_statemachine/parsers/base_parser.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
from pathlib import Path
|
3 |
+
from attrs import define, field
|
4 |
+
|
5 |
+
|
6 |
+
@define()
|
7 |
+
class BaseParser(ABC):
|
8 |
+
file_path: Path = field()
|
9 |
+
|
10 |
+
@abstractmethod
|
11 |
+
def parse(self) -> dict: ...
|
griptape_statemachine/parsers/uw_config_parser.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import schema
|
4 |
+
import yaml
|
5 |
+
from attrs import define
|
6 |
+
from yaml.resolver import Resolver
|
7 |
+
|
8 |
+
from griptape_statemachine.parsers.base_parser import BaseParser
|
9 |
+
|
10 |
+
|
11 |
+
STRUCTURE_SCHEMA = schema.Schema(
|
12 |
+
{
|
13 |
+
schema.Optional("model"): str,
|
14 |
+
schema.Optional("ruleset_ids"): [str],
|
15 |
+
schema.Optional("vector_stores"): [str],
|
16 |
+
schema.Optional("prompt_id"): str,
|
17 |
+
}
|
18 |
+
)
|
19 |
+
|
20 |
+
CONFIG_SCHEMA = schema.Schema(
|
21 |
+
{
|
22 |
+
"rulesets": schema.Schema(
|
23 |
+
{
|
24 |
+
str: schema.Schema(
|
25 |
+
{
|
26 |
+
"name": str,
|
27 |
+
"rules": [str],
|
28 |
+
}
|
29 |
+
)
|
30 |
+
}
|
31 |
+
),
|
32 |
+
# Added for vector stores
|
33 |
+
schema.Optional("vector_stores"): schema.Schema(
|
34 |
+
{
|
35 |
+
str: schema.Schema(
|
36 |
+
{
|
37 |
+
"file_path": str,
|
38 |
+
"file_type": str,
|
39 |
+
schema.Optional("max_tokens"): int,
|
40 |
+
}
|
41 |
+
)
|
42 |
+
}
|
43 |
+
),
|
44 |
+
"structures": schema.Schema({str: STRUCTURE_SCHEMA}),
|
45 |
+
"events": schema.Schema(
|
46 |
+
{
|
47 |
+
str: schema.Schema(
|
48 |
+
{
|
49 |
+
"transitions": [
|
50 |
+
schema.Schema(
|
51 |
+
{
|
52 |
+
"from": str,
|
53 |
+
"to": str,
|
54 |
+
schema.Optional("internal"): bool,
|
55 |
+
schema.Optional("on"): str,
|
56 |
+
schema.Optional("relevance"): str,
|
57 |
+
}
|
58 |
+
)
|
59 |
+
],
|
60 |
+
}
|
61 |
+
)
|
62 |
+
}
|
63 |
+
),
|
64 |
+
"states": schema.Schema(
|
65 |
+
{
|
66 |
+
str: schema.Schema(
|
67 |
+
{
|
68 |
+
schema.Optional(
|
69 |
+
schema.Or("initial", "final")
|
70 |
+
): bool, # pyright: ignore[reportArgumentType]
|
71 |
+
schema.Optional("structures"): schema.Schema(
|
72 |
+
{str: STRUCTURE_SCHEMA}
|
73 |
+
),
|
74 |
+
}
|
75 |
+
)
|
76 |
+
}
|
77 |
+
),
|
78 |
+
schema.Optional("prompts"): {
|
79 |
+
str: {schema.Optional("author_intent"): str, "prompt": str}
|
80 |
+
},
|
81 |
+
}
|
82 |
+
)
|
83 |
+
|
84 |
+
|
85 |
+
@define()
|
86 |
+
class UWConfigParser(BaseParser):
|
87 |
+
def __attrs_post_init__(self) -> None:
|
88 |
+
# remove resolver entries for On/Off/Yes/No
|
89 |
+
for ch in "OoYyNn":
|
90 |
+
if ch in Resolver.yaml_implicit_resolvers:
|
91 |
+
if len(Resolver.yaml_implicit_resolvers[ch]) == 1:
|
92 |
+
del Resolver.yaml_implicit_resolvers[ch]
|
93 |
+
else:
|
94 |
+
Resolver.yaml_implicit_resolvers[ch] = [
|
95 |
+
x
|
96 |
+
for x in Resolver.yaml_implicit_resolvers[ch]
|
97 |
+
if x[0] != "tag:yaml.org,2002:bool"
|
98 |
+
]
|
99 |
+
|
100 |
+
def parse(self) -> dict:
|
101 |
+
data = yaml.safe_load(self.file_path.read_text())
|
102 |
+
CONFIG_SCHEMA.validate(data)
|
103 |
+
return data
|
104 |
+
|
105 |
+
def update_and_save(self, config: dict) -> None:
|
106 |
+
with self.file_path.open("w") as file:
|
107 |
+
yaml.dump(config, file, default_flow_style=False, line_break="\n")
|
griptape_statemachine/parsers/uw_csv_parser.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TODO: create a csv parser
|
2 |
+
from __future__ import annotations
|
3 |
+
|
4 |
+
from ast import Lambda
|
5 |
+
import contextlib
|
6 |
+
import csv
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import TYPE_CHECKING, Callable
|
9 |
+
|
10 |
+
import yaml
|
11 |
+
|
12 |
+
if TYPE_CHECKING:
|
13 |
+
from io import TextIOWrapper
|
14 |
+
|
15 |
+
|
16 |
+
class CsvParser:
|
17 |
+
|
18 |
+
def __init__(self, directory: str) -> None:
|
19 |
+
self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml"))
|
20 |
+
self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files"))
|
21 |
+
csv_files = Path(self.csv_directory).glob("*")
|
22 |
+
self.csv_file_paths = [file for file in csv_files if file.is_file()]
|
23 |
+
|
24 |
+
def csv_parser(self) -> None:
|
25 |
+
"""This is going to take in a big csv, split it, and put it in config.yaml"""
|
26 |
+
# This is going to parse multiple different csv files this time.
|
27 |
+
split_csv = {}
|
28 |
+
for csv_file in self.csv_file_paths:
|
29 |
+
with Path.open(csv_file, "r", newline="") as csvfile:
|
30 |
+
self.split_csv(csvfile, split_csv)
|
31 |
+
# split_csv should have all the information
|
32 |
+
yaml_data = yaml.safe_load(self.yaml_path.read_text())
|
33 |
+
# Rulesets CHANGE
|
34 |
+
try:
|
35 |
+
yaml_data["rulesets"] = self.csv_rulesets(
|
36 |
+
split_csv["Ruleset ID"]
|
37 |
+
) # Rulesets
|
38 |
+
except KeyError:
|
39 |
+
print("No rulesets")
|
40 |
+
# Agents DONE
|
41 |
+
try:
|
42 |
+
yaml_data["structures"] = self.csv_agents(
|
43 |
+
split_csv["Agent ID"]
|
44 |
+
) # Agent Definitions
|
45 |
+
except KeyError:
|
46 |
+
print("No structures")
|
47 |
+
# States
|
48 |
+
# Tailoring (affects the states section only) CHANGE
|
49 |
+
if "State ID to Tailor" in split_csv:
|
50 |
+
try:
|
51 |
+
yaml_data["states"] = self.csv_states(
|
52 |
+
split_csv["State ID"], # State Definitions
|
53 |
+
split_csv["State ID to Tailor"], # Agent Tailoring State ID
|
54 |
+
)
|
55 |
+
except KeyError:
|
56 |
+
print(" no states")
|
57 |
+
else:
|
58 |
+
try:
|
59 |
+
yaml_data["states"] = self.csv_states(
|
60 |
+
split_csv["State ID"], # State Definitions
|
61 |
+
[], # Agent Tailoring State ID
|
62 |
+
)
|
63 |
+
except KeyError:
|
64 |
+
print(" no states")
|
65 |
+
try:
|
66 |
+
yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"])
|
67 |
+
except KeyError:
|
68 |
+
print("no prompts")
|
69 |
+
# # Transitioning (affects event section) DONE
|
70 |
+
try:
|
71 |
+
yaml_data["events"] = self.csv_transition_id(
|
72 |
+
split_csv["Transition ID"]
|
73 |
+
) # State Transitions
|
74 |
+
except KeyError:
|
75 |
+
print("No transitions")
|
76 |
+
# That's all folks!
|
77 |
+
self.update_and_save(yaml_data)
|
78 |
+
|
79 |
+
def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None:
|
80 |
+
"""Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections.
|
81 |
+
Hooray!
|
82 |
+
"""
|
83 |
+
reader = csv.reader(csv_file)
|
84 |
+
# Get the header of the section
|
85 |
+
header = next(reader)
|
86 |
+
header = header[0] # Go to the meat of it (get rid of descriptive header)
|
87 |
+
current_information = []
|
88 |
+
for row in reader:
|
89 |
+
key = row[0]
|
90 |
+
# If the row is empty and/or has no value in the first column.
|
91 |
+
if key == ",,":
|
92 |
+
continue
|
93 |
+
current_information.append({key: row[1:]})
|
94 |
+
all_information[header] = current_information
|
95 |
+
|
96 |
+
def csv_kbs(self, kb_info: list) -> dict:
|
97 |
+
dictionary = {}
|
98 |
+
for row in kb_info:
|
99 |
+
key, value = row.popitem()
|
100 |
+
if key and value[0] and value[1]:
|
101 |
+
dictionary[key] = {"file_path": value[0], "file_type": value[1]}
|
102 |
+
return dictionary
|
103 |
+
|
104 |
+
def csv_rulesets(self, ruleset_info: list) -> dict:
|
105 |
+
dictionary = {}
|
106 |
+
for row in ruleset_info:
|
107 |
+
key, value = row.popitem()
|
108 |
+
if key and value[0] and value[1]:
|
109 |
+
rules = [
|
110 |
+
rule.strip().strip('"').lstrip("- ")
|
111 |
+
for rule in value[1].split("\n")
|
112 |
+
if rule.strip()
|
113 |
+
]
|
114 |
+
dictionary[key] = {
|
115 |
+
"name": value[0],
|
116 |
+
"rules": rules,
|
117 |
+
} # Will have to check this.
|
118 |
+
return dictionary
|
119 |
+
|
120 |
+
def csv_prompts(self, prompt_info: list) -> dict:
|
121 |
+
dictionary = {}
|
122 |
+
for row in prompt_info:
|
123 |
+
key, value = row.popitem()
|
124 |
+
if key and value[0]:
|
125 |
+
dictionary[key] = {"prompt": value[0]}
|
126 |
+
if value[1]:
|
127 |
+
dictionary[key]["author_intent"] = value[1]
|
128 |
+
return dictionary
|
129 |
+
|
130 |
+
def csv_agents(self, agent_info: list) -> dict:
|
131 |
+
dictionary = {}
|
132 |
+
for row in agent_info:
|
133 |
+
key, value = row.popitem()
|
134 |
+
if key:
|
135 |
+
ruleset_ids = []
|
136 |
+
if value[0]:
|
137 |
+
ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")]
|
138 |
+
config = {
|
139 |
+
"model": "gpt-4o",
|
140 |
+
"ruleset_ids": ruleset_ids,
|
141 |
+
}
|
142 |
+
# If there is a global KB used
|
143 |
+
if value[1]:
|
144 |
+
config["vector_stores"] = [value[1]]
|
145 |
+
# If there is a global prompt used (can be overrided by state specfic)
|
146 |
+
if value[2]:
|
147 |
+
config["prompt_id"] = value[2]
|
148 |
+
# If there is a model override
|
149 |
+
if value[4]:
|
150 |
+
config["model"] = value[4]
|
151 |
+
dictionary[key] = config
|
152 |
+
return dictionary
|
153 |
+
|
154 |
+
def csv_states(self, state_info: list, tailor_info: list) -> dict:
|
155 |
+
states = {}
|
156 |
+
for row in state_info:
|
157 |
+
key, value = row.popitem()
|
158 |
+
if not key:
|
159 |
+
continue
|
160 |
+
if key == "start":
|
161 |
+
states[key] = {"initial": True}
|
162 |
+
elif key == "end":
|
163 |
+
states[key] = {"final": True}
|
164 |
+
else:
|
165 |
+
states[key] = {}
|
166 |
+
if value[0] and value[0] != "none":
|
167 |
+
agent_list = {name.strip(): {} for name in value[0].split(",")}
|
168 |
+
states[key]["structures"] = agent_list
|
169 |
+
for row in tailor_info:
|
170 |
+
tailor, value = row.popitem()
|
171 |
+
if not tailor:
|
172 |
+
continue
|
173 |
+
structures = (
|
174 |
+
states[tailor]["structures"]
|
175 |
+
if tailor in states and "structures" in states[tailor]
|
176 |
+
else {}
|
177 |
+
)
|
178 |
+
structure = value
|
179 |
+
structure_name = structure[0]
|
180 |
+
# if ruleset
|
181 |
+
try:
|
182 |
+
structure_ruleset = structure[1]
|
183 |
+
structure_ruleset_list = []
|
184 |
+
for item in structure_ruleset.split(","):
|
185 |
+
if item.strip() != "":
|
186 |
+
structure_ruleset_list.append(item.strip())
|
187 |
+
if len(structure_ruleset_list):
|
188 |
+
structures[structure_name] = {
|
189 |
+
"ruleset_ids": structure_ruleset_list,
|
190 |
+
}
|
191 |
+
except KeyError:
|
192 |
+
structures[structure_name] = {}
|
193 |
+
try:
|
194 |
+
if structure[2]:
|
195 |
+
structures[structure_name]["prompt_id"] = structure[2]
|
196 |
+
except KeyError:
|
197 |
+
pass
|
198 |
+
states[tailor] = {"structures": structures}
|
199 |
+
return states
|
200 |
+
|
201 |
+
def csv_transition_id(self, transition_info: list) -> dict:
|
202 |
+
events = {}
|
203 |
+
for row in transition_info:
|
204 |
+
key, value = row.popitem()
|
205 |
+
if key and value[0] and value[1]:
|
206 |
+
if key in events:
|
207 |
+
# Add the transition if there already are transitions
|
208 |
+
events[key]["transitions"].append(
|
209 |
+
{"from": value[0], "to": value[1]}
|
210 |
+
)
|
211 |
+
else:
|
212 |
+
# create the first transition
|
213 |
+
events[key] = {
|
214 |
+
"transitions": [
|
215 |
+
{"from": value[0], "to": value[1]},
|
216 |
+
]
|
217 |
+
}
|
218 |
+
return events
|
219 |
+
|
220 |
+
def update_and_save(self, config: dict) -> None:
|
221 |
+
with self.yaml_path.open("w") as file:
|
222 |
+
yaml.dump(config, file, default_flow_style=False, line_break="\n")
|
223 |
+
|
224 |
+
|
225 |
+
if __name__ == "__main__":
|
226 |
+
CsvParser("uw_programmatic").csv_parser()
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "griptape_statemachine"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["Collin Dutter <[email protected]>", "Kate Forsberg <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.11"
|
10 |
+
griptape = "1.0"
|
11 |
+
python-statemachine = {extras = ["diagrams"], version = "^2.3.6"}
|
12 |
+
pyyaml = "^6.0.2"
|
13 |
+
schema = "^0.7.7"
|
14 |
+
python-dotenv = "^1.0.1"
|
15 |
+
graphviz = "^0.20.3"
|
16 |
+
gradio = "^5.6.0"
|
17 |
+
pydot = "^3.0.2"
|
18 |
+
pypdf = "^5.1.0"
|
19 |
+
PyPDF2 = "^2.2.0"
|
20 |
+
xlsxwriter = "3.2.0"
|
21 |
+
|
22 |
+
|
23 |
+
[tool.poetry.group.dev.dependencies]
|
24 |
+
pyright = "^1.1.380"
|
25 |
+
ruff = "^0.6.4"
|
26 |
+
typos = "^1.25.0"
|
27 |
+
pre-commit = "^3.8.0"
|
28 |
+
|
29 |
+
|
30 |
+
[tool.poetry.group.test.dependencies]
|
31 |
+
pytest = "^8.3.3"
|
32 |
+
|
33 |
+
[tool.ruff]
|
34 |
+
line-length = 120
|
35 |
+
|
36 |
+
[tool.ruff.lint]
|
37 |
+
select = [
|
38 |
+
"ALL"
|
39 |
+
]
|
40 |
+
ignore = [
|
41 |
+
"D",
|
42 |
+
"COM812", # missing-trailing-comma -- See https://github.com/astral-sh/ruff/issues/9216
|
43 |
+
"ANN003",
|
44 |
+
'T201',
|
45 |
+
"TD",
|
46 |
+
"FIX",
|
47 |
+
"E501"
|
48 |
+
]
|
49 |
+
[tool.ruff.lint.per-file-ignores]
|
50 |
+
"tests/*.py" = ["S101"]
|
51 |
+
|
52 |
+
[tool.ruff.lint.pydocstyle]
|
53 |
+
convention = "google"
|
54 |
+
|
55 |
+
[tool.ruff.lint.flake8-pytest-style]
|
56 |
+
fixture-parentheses = true
|
57 |
+
|
58 |
+
[tool.ruff.lint.flake8-tidy-imports.banned-api]
|
59 |
+
"attr".msg = "The attr module is deprecated, use attrs instead."
|
60 |
+
|
61 |
+
[tool.pyright]
|
62 |
+
venvPath = "."
|
63 |
+
venv = ".venv"
|
64 |
+
include = [
|
65 |
+
"griptape_statemachine"
|
66 |
+
]
|
67 |
+
exclude = [
|
68 |
+
"**/__pycache__",
|
69 |
+
]
|
70 |
+
pythonVersion = "3.11"
|
71 |
+
enableExperimentalFeatures = true
|
72 |
+
|
73 |
+
[build-system]
|
74 |
+
requires = ["poetry-core"]
|
75 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
griptape==1.0
|
2 |
+
python-statemachine[diagrams]==2.3.6
|
3 |
+
pyyaml==6.0.2
|
4 |
+
schema==0.7.7
|
5 |
+
python-dotenv==1.0.1
|
6 |
+
graphviz==0.20.3
|
7 |
+
gradio==5.6.0
|
8 |
+
pydot==3.0.2
|
9 |
+
pypdf==5.1.0
|
10 |
+
PyPDF2==2.2.0
|
11 |
+
xlsxwriter==3.2.0
|
uw_programmatic/__init__.py
ADDED
File without changes
|
uw_programmatic/base_machine.py
ADDED
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import random
|
7 |
+
from abc import abstractmethod
|
8 |
+
from pathlib import Path
|
9 |
+
from typing import TYPE_CHECKING, cast
|
10 |
+
|
11 |
+
import requests
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
from griptape.artifacts import ListArtifact, TextArtifact
|
14 |
+
from griptape.configs import Defaults
|
15 |
+
from griptape.configs.drivers import (
|
16 |
+
OpenAiDriversConfig,
|
17 |
+
)
|
18 |
+
from griptape.drivers import (
|
19 |
+
GriptapeCloudVectorStoreDriver,
|
20 |
+
LocalStructureRunDriver,
|
21 |
+
OpenAiChatPromptDriver,
|
22 |
+
)
|
23 |
+
from griptape.engines.rag import RagEngine
|
24 |
+
from griptape.engines.rag.modules import (
|
25 |
+
TextChunksResponseRagModule,
|
26 |
+
VectorStoreRetrievalRagModule,
|
27 |
+
)
|
28 |
+
from griptape.engines.rag.stages import ResponseRagStage, RetrievalRagStage
|
29 |
+
from griptape.events import (
|
30 |
+
BaseEvent,
|
31 |
+
EventBus,
|
32 |
+
EventListener,
|
33 |
+
FinishStructureRunEvent,
|
34 |
+
)
|
35 |
+
from griptape.memory.structure import ConversationMemory
|
36 |
+
from griptape.rules import Rule, Ruleset
|
37 |
+
from griptape.structures import Agent, Workflow
|
38 |
+
from griptape.tasks import CodeExecutionTask, StructureRunTask, ToolTask
|
39 |
+
from griptape.tools import RagTool
|
40 |
+
from statemachine import State, StateMachine
|
41 |
+
from statemachine.factory import StateMachineMetaclass
|
42 |
+
|
43 |
+
from griptape_statemachine.parsers.uw_config_parser import UWConfigParser
|
44 |
+
|
45 |
+
logger = logging.getLogger(__name__)
|
46 |
+
logging.getLogger("griptape").setLevel(logging.ERROR)
|
47 |
+
|
48 |
+
if TYPE_CHECKING:
|
49 |
+
from griptape.structures import Structure
|
50 |
+
from griptape.tools import BaseTool
|
51 |
+
from statemachine.event import Event
|
52 |
+
|
53 |
+
load_dotenv()
|
54 |
+
|
55 |
+
Defaults.drivers_config = OpenAiDriversConfig(
|
56 |
+
prompt_driver=OpenAiChatPromptDriver(model="gpt-4o", max_tokens=4096)
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
def custom_dict_merge(dict1: dict, dict2: dict) -> dict:
|
61 |
+
result = dict1.copy()
|
62 |
+
for key, value in dict2.items():
|
63 |
+
if key in result and isinstance(result[key], list) and isinstance(value, list):
|
64 |
+
result[key] = result[key] + value
|
65 |
+
else:
|
66 |
+
result[key] = value
|
67 |
+
return result
|
68 |
+
|
69 |
+
|
70 |
+
class UWBaseMachine(StateMachine):
|
71 |
+
"""Base class for a machine.
|
72 |
+
|
73 |
+
|
74 |
+
Attributes:
|
75 |
+
config_file (Path): The path to the configuration file.
|
76 |
+
config (dict): The configuration data.
|
77 |
+
outputs_to_user (list[str]): Outputs to return to the user.
|
78 |
+
"""
|
79 |
+
|
80 |
+
def __init__(self, config_file: Path, **kwargs) -> None:
|
81 |
+
self.config_parser = UWConfigParser(config_file)
|
82 |
+
self.config = self.config_parser.parse()
|
83 |
+
self._structures = {}
|
84 |
+
self.vector_stores = {} # Store here in case needs multiple uses
|
85 |
+
self.question_list: list = []
|
86 |
+
# For the parameters necessary from the user
|
87 |
+
self.page_range: tuple = ()
|
88 |
+
self.question_number: int = 0
|
89 |
+
self.taxonomy: list = []
|
90 |
+
|
91 |
+
self.state_status: dict[str, bool] = {}
|
92 |
+
|
93 |
+
for key in self.state_transitions:
|
94 |
+
self.state_status[key] = False
|
95 |
+
|
96 |
+
def on_event(event: BaseEvent) -> None:
|
97 |
+
"""Takes in griptape events from eventbus and fixes them."""
|
98 |
+
print(f"Received Griptape event: {json.dumps(event.to_dict(), indent=2)}")
|
99 |
+
try:
|
100 |
+
self.send(
|
101 |
+
"process_event",
|
102 |
+
event_={"type": "griptape_event", "value": event.to_dict()},
|
103 |
+
)
|
104 |
+
except Exception as e:
|
105 |
+
errormsg = f"Would not allow process_event to be sent. Check to see if it is defined in the config.yaml. Error:{e}"
|
106 |
+
raise ValueError(errormsg) from e
|
107 |
+
|
108 |
+
EventBus.clear_event_listeners()
|
109 |
+
EventBus.add_event_listener(
|
110 |
+
EventListener(on_event, event_types=[FinishStructureRunEvent]),
|
111 |
+
)
|
112 |
+
super().__init__()
|
113 |
+
|
114 |
+
@property
|
115 |
+
def available_events(self) -> list[str]:
|
116 |
+
return self.current_state.transitions.unique_events
|
117 |
+
|
118 |
+
@property
|
119 |
+
@abstractmethod
|
120 |
+
def tools(self) -> dict[str, BaseTool]:
|
121 |
+
"""Returns the Tools for the machine."""
|
122 |
+
...
|
123 |
+
|
124 |
+
@property
|
125 |
+
def _current_state_config(self) -> dict:
|
126 |
+
return self.config["states"][self.current_state_value]
|
127 |
+
|
128 |
+
@classmethod
|
129 |
+
def from_definition( # noqa: C901, PLR0912
|
130 |
+
cls, definition: dict, **extra_kwargs
|
131 |
+
) -> UWBaseMachine:
|
132 |
+
try:
|
133 |
+
states_instances = {}
|
134 |
+
for state_id, state_kwargs in definition["states"].items():
|
135 |
+
# These are the relevant states that need GOAP.
|
136 |
+
states_instances[state_id] = State(**state_kwargs, value=state_id)
|
137 |
+
except Exception as e:
|
138 |
+
errormsg = f"""Error in state definition: {e}.
|
139 |
+
"""
|
140 |
+
raise ValueError(errormsg) from e
|
141 |
+
|
142 |
+
events = {}
|
143 |
+
state_transitions = {}
|
144 |
+
for event_name, transitions in definition["events"].items():
|
145 |
+
for transition_data in transitions:
|
146 |
+
try:
|
147 |
+
source_name = transition_data["from"]
|
148 |
+
source = states_instances[source_name]
|
149 |
+
target = states_instances[transition_data["to"]]
|
150 |
+
relevance = ""
|
151 |
+
if "relevance" in transition_data:
|
152 |
+
relevance = transition_data["relevance"]
|
153 |
+
if source_name not in state_transitions:
|
154 |
+
state_transitions[source_name] = {event_name: relevance}
|
155 |
+
else:
|
156 |
+
state_transitions[source_name][event_name] = relevance
|
157 |
+
except Exception as e:
|
158 |
+
errormsg = f"Error:{e}. Please check your transitions to be sure each transition has a source and destination."
|
159 |
+
raise ValueError(errormsg) from e
|
160 |
+
|
161 |
+
transition = source.to(
|
162 |
+
target,
|
163 |
+
event=event_name,
|
164 |
+
cond=transition_data.get("cond"),
|
165 |
+
unless=transition_data.get("unless"),
|
166 |
+
on=transition_data.get("on"),
|
167 |
+
internal=transition_data.get("internal"),
|
168 |
+
)
|
169 |
+
|
170 |
+
if event_name in events:
|
171 |
+
events[event_name] |= transition
|
172 |
+
else:
|
173 |
+
events[event_name] = transition
|
174 |
+
for state_id, state in states_instances.items():
|
175 |
+
if state_id not in ("end", "start"):
|
176 |
+
transition = state.to(
|
177 |
+
state,
|
178 |
+
event="process_event",
|
179 |
+
on=f"on_event_{state_id}",
|
180 |
+
internal=True,
|
181 |
+
)
|
182 |
+
if "process_event" in events:
|
183 |
+
events["process_event"] |= transition
|
184 |
+
else:
|
185 |
+
events["process_event"] = transition
|
186 |
+
|
187 |
+
attrs_mapper = {
|
188 |
+
**extra_kwargs,
|
189 |
+
**states_instances,
|
190 |
+
**events,
|
191 |
+
"state_transitions": state_transitions,
|
192 |
+
}
|
193 |
+
|
194 |
+
return cast(
|
195 |
+
UWBaseMachine,
|
196 |
+
StateMachineMetaclass(cls.__name__, (cls,), attrs_mapper)(**extra_kwargs),
|
197 |
+
)
|
198 |
+
|
199 |
+
@classmethod
|
200 |
+
def from_config_file(
|
201 |
+
cls,
|
202 |
+
config_file: Path,
|
203 |
+
**extra_kwargs,
|
204 |
+
) -> UWBaseMachine:
|
205 |
+
"""Creates a StateMachine class from a configuration file"""
|
206 |
+
config_parser = UWConfigParser(config_file)
|
207 |
+
config = config_parser.parse()
|
208 |
+
extra_kwargs["config_file"] = config_file
|
209 |
+
|
210 |
+
definition_states = {
|
211 |
+
state_id: {
|
212 |
+
"initial": state_value.get("initial", False),
|
213 |
+
"final": state_value.get("final", False),
|
214 |
+
}
|
215 |
+
for state_id, state_value in config["states"].items()
|
216 |
+
}
|
217 |
+
definition_events = {
|
218 |
+
event_name: list(event_value["transitions"])
|
219 |
+
for event_name, event_value in config["events"].items()
|
220 |
+
}
|
221 |
+
definition = {"states": definition_states, "events": definition_events}
|
222 |
+
|
223 |
+
return cls.from_definition(definition, **extra_kwargs)
|
224 |
+
|
225 |
+
@abstractmethod
|
226 |
+
def start_machine(self) -> None:
|
227 |
+
"""Starts the machine."""
|
228 |
+
...
|
229 |
+
|
230 |
+
def reset_structures(self) -> None:
|
231 |
+
"""Resets the structures."""
|
232 |
+
self._structures = {}
|
233 |
+
|
234 |
+
def on_enter_state(self, source: State, state: State, event: Event) -> None:
|
235 |
+
print(f"Transitioning from {source} to {state} with event {event}")
|
236 |
+
|
237 |
+
def get_structure(self, structure_id: str) -> Structure:
|
238 |
+
global_structure_config = self.config["structures"][structure_id]
|
239 |
+
state_structure_config = self._current_state_config.get("structures", {}).get(
|
240 |
+
structure_id, {}
|
241 |
+
)
|
242 |
+
structure_config = custom_dict_merge(
|
243 |
+
global_structure_config, state_structure_config
|
244 |
+
)
|
245 |
+
if structure_id not in self._structures:
|
246 |
+
# Initialize Structure with all the expensive setup
|
247 |
+
structure = Agent(
|
248 |
+
id=structure_id,
|
249 |
+
conversation_memory=ConversationMemory(),
|
250 |
+
)
|
251 |
+
self._structures[structure_id] = structure
|
252 |
+
|
253 |
+
# Create a new clone with state-specific stuff
|
254 |
+
structure = self._structures[structure_id]
|
255 |
+
structure = Agent(
|
256 |
+
id=structure.id,
|
257 |
+
prompt_driver=structure.prompt_driver,
|
258 |
+
conversation_memory=structure.conversation_memory,
|
259 |
+
rulesets=[
|
260 |
+
*self._get_structure_rulesets(structure_config.get("ruleset_ids", [])),
|
261 |
+
],
|
262 |
+
)
|
263 |
+
print(f"Structure: {structure_id}")
|
264 |
+
for ruleset in structure.rulesets:
|
265 |
+
for rule in ruleset.rules:
|
266 |
+
print(f"Rule: {rule.value}")
|
267 |
+
return structure
|
268 |
+
|
269 |
+
def _get_structure_rulesets(self, ruleset_ids: list[str]) -> list[Ruleset]:
|
270 |
+
ruleset_configs = [
|
271 |
+
self.config["rulesets"][ruleset_id] for ruleset_id in ruleset_ids
|
272 |
+
]
|
273 |
+
|
274 |
+
# Convert ruleset configs to Rulesets
|
275 |
+
return [
|
276 |
+
Ruleset(
|
277 |
+
name=ruleset_config["name"],
|
278 |
+
rules=[Rule(rule) for rule in ruleset_config["rules"]],
|
279 |
+
)
|
280 |
+
for ruleset_config in ruleset_configs
|
281 |
+
]
|
282 |
+
|
283 |
+
def get_prompt_by_structure(self, structure_id: str) -> str | None:
|
284 |
+
try:
|
285 |
+
state_structure_config = self._current_state_config.get(
|
286 |
+
"structures", {}
|
287 |
+
).get(structure_id, {})
|
288 |
+
global_structure_config = self.config["structures"][structure_id]
|
289 |
+
except KeyError:
|
290 |
+
return None
|
291 |
+
prompt_id = None
|
292 |
+
if "prompt_id" in global_structure_config:
|
293 |
+
prompt_id = global_structure_config["prompt_id"]
|
294 |
+
elif "prompt_id" in state_structure_config:
|
295 |
+
prompt_id = state_structure_config["prompt_id"]
|
296 |
+
else:
|
297 |
+
return None
|
298 |
+
return self.config["prompts"][prompt_id]["prompt"]
|
299 |
+
|
300 |
+
def get_prompt_by_id(self, prompt_id: str) -> str | None:
|
301 |
+
prompt_config = self.config["prompts"]
|
302 |
+
if prompt_id in prompt_config:
|
303 |
+
return prompt_config[prompt_id]["prompt"]
|
304 |
+
return None
|
305 |
+
|
306 |
+
# ALL METHODS RELATING TO THE WORKFLOW AND PIPELINE
|
307 |
+
def end_workflow(self, task: CodeExecutionTask) -> ListArtifact:
|
308 |
+
parent_outputs = task.parent_outputs
|
309 |
+
questions = []
|
310 |
+
for outputs in parent_outputs.values():
|
311 |
+
if outputs.type == "InfoArtifact":
|
312 |
+
continue
|
313 |
+
questions.append(outputs)
|
314 |
+
return ListArtifact(questions)
|
315 |
+
|
316 |
+
def get_questions_workflow(self) -> Workflow:
|
317 |
+
workflow = Workflow(id="create_question_workflow")
|
318 |
+
# How many questions still need to be created
|
319 |
+
for _ in range(self.question_number - len(self.question_list)):
|
320 |
+
task = StructureRunTask(
|
321 |
+
structure_run_driver=LocalStructureRunDriver(
|
322 |
+
create_structure=self.get_single_question
|
323 |
+
),
|
324 |
+
child_ids=["end_task"],
|
325 |
+
)
|
326 |
+
workflow.add_task(task)
|
327 |
+
end_task = CodeExecutionTask(id="end_task", on_run=self.end_workflow)
|
328 |
+
workflow.add_task(end_task)
|
329 |
+
return workflow
|
330 |
+
|
331 |
+
def single_question_last_task(self, task: CodeExecutionTask) -> TextArtifact:
|
332 |
+
parent_outputs = task.parent_outputs
|
333 |
+
wrong_answers = parent_outputs["wrong_answers"].value # Output is a list
|
334 |
+
wrong_answers = wrong_answers.split("\n")
|
335 |
+
question_and_answer = parent_outputs["get_question"].value # Output is a json
|
336 |
+
try:
|
337 |
+
question_and_answer = json.loads(question_and_answer)
|
338 |
+
except:
|
339 |
+
question_and_answer = question_and_answer.split("\n")[1:]
|
340 |
+
question_and_answer = "".join(question_and_answer)
|
341 |
+
question_and_answer = json.loads(question_and_answer)
|
342 |
+
inputs = task.input.value.split(",")
|
343 |
+
question = {
|
344 |
+
"Question": question_and_answer["Question"],
|
345 |
+
"Answer": question_and_answer["Answer"],
|
346 |
+
"Wrong Answers": wrong_answers,
|
347 |
+
"Page": inputs[0],
|
348 |
+
"Taxonomy": inputs[1],
|
349 |
+
}
|
350 |
+
return TextArtifact(question)
|
351 |
+
|
352 |
+
def get_question_for_wrong_answers(self, task: CodeExecutionTask) -> TextArtifact:
|
353 |
+
parent_outputs = task.parent_outputs
|
354 |
+
question = parent_outputs["get_question"].value
|
355 |
+
question = json.loads(question)["Question"]
|
356 |
+
return TextArtifact(question)
|
357 |
+
|
358 |
+
def get_separated_answer_for_wrong_answers(
|
359 |
+
self, task: CodeExecutionTask
|
360 |
+
) -> TextArtifact:
|
361 |
+
parent_outputs = task.parent_outputs
|
362 |
+
answer = parent_outputs["get_question"].value
|
363 |
+
print(answer)
|
364 |
+
answer = json.loads(answer)["Answer"]
|
365 |
+
return TextArtifact(answer)
|
366 |
+
|
367 |
+
def make_rag_structure(
|
368 |
+
self, vector_store: GriptapeCloudVectorStoreDriver
|
369 |
+
) -> Structure:
|
370 |
+
if vector_store:
|
371 |
+
tool = self.build_rag_tool(self.build_rag_engine(vector_store))
|
372 |
+
use_rag_task = ToolTask(tool=tool)
|
373 |
+
return Agent(tasks=[use_rag_task])
|
374 |
+
errormsg = "No Vector Store"
|
375 |
+
raise ValueError(errormsg)
|
376 |
+
|
377 |
+
def get_single_question(self) -> Workflow:
|
378 |
+
question_generator = Workflow(id="single_question")
|
379 |
+
taxonomy = random.choice(self.taxonomy)
|
380 |
+
taxonomyprompt = {
|
381 |
+
"Knowledge": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'define', 'list', 'state', 'identify', or 'label'.",
|
382 |
+
"Comprehension": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'explain', 'predict', 'interpret', 'infer', 'summarize', 'convert', or 'give an example of x'.",
|
383 |
+
"Application": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The structure of the question should be one of 'How could x be used to y?' or 'How would you show/make use of/modify/demonstrate/solve/apply x to conditions y?'",
|
384 |
+
}
|
385 |
+
pages, driver = self.get_vector_store_id_from_page()
|
386 |
+
get_information = StructureRunTask(
|
387 |
+
id="information_task",
|
388 |
+
input="What is the information in KB?",
|
389 |
+
structure_run_driver=LocalStructureRunDriver(
|
390 |
+
create_structure=lambda: self.make_rag_structure(driver)
|
391 |
+
),
|
392 |
+
child_ids=["get_question"],
|
393 |
+
)
|
394 |
+
# Get KBs and select it, assign it to the structure or create the structure right here.
|
395 |
+
# Rules for subject matter expert: return only a json with question and answer as keys.
|
396 |
+
generate_q_task = StructureRunTask(
|
397 |
+
id="get_question",
|
398 |
+
input=taxonomyprompt[taxonomy],
|
399 |
+
structure_run_driver=LocalStructureRunDriver(
|
400 |
+
create_structure=lambda: self.get_structure("subject_matter_expert")
|
401 |
+
),
|
402 |
+
parent_ids=["information_task"],
|
403 |
+
)
|
404 |
+
get_question_code_task = CodeExecutionTask(
|
405 |
+
id="get_only_question",
|
406 |
+
on_run=self.get_question_for_wrong_answers,
|
407 |
+
parent_ids=["get_question"],
|
408 |
+
child_ids=["wrong_answers"],
|
409 |
+
)
|
410 |
+
get_separated_answer_code_task = CodeExecutionTask(
|
411 |
+
id="get_separated_answer",
|
412 |
+
on_run=self.get_separated_answer_for_wrong_answers,
|
413 |
+
parent_ids=["get_question"],
|
414 |
+
child_ids=["wrong_answers"],
|
415 |
+
)
|
416 |
+
generate_wrong_answers = StructureRunTask(
|
417 |
+
id="wrong_answers",
|
418 |
+
input="""Write and return three incorrect answers for this question: {{parent_outputs['get_separated_question']}}. The correct answer to the question is: {{parent_outputs['get_separated_answer']}}, and incorrect answers should have the same structure as this answer whilst still being incorrect. Use this information as context to write the incorrect answers: {{parent_outputs['information_task']}}""",
|
419 |
+
structure_run_driver=LocalStructureRunDriver(
|
420 |
+
create_structure=lambda: self.get_structure("wrong_answers_generator")
|
421 |
+
),
|
422 |
+
parent_ids=["get_only_question", "information_task"],
|
423 |
+
)
|
424 |
+
compile_task = CodeExecutionTask(
|
425 |
+
id="compile_task",
|
426 |
+
input=f"{pages}, {taxonomy}",
|
427 |
+
on_run=self.single_question_last_task,
|
428 |
+
parent_ids=["wrong_answers", "get_question"],
|
429 |
+
)
|
430 |
+
question_generator.add_tasks(
|
431 |
+
get_information,
|
432 |
+
generate_q_task,
|
433 |
+
get_question_code_task,
|
434 |
+
get_separated_answer_code_task,
|
435 |
+
generate_wrong_answers,
|
436 |
+
compile_task,
|
437 |
+
)
|
438 |
+
return question_generator
|
439 |
+
|
440 |
+
def get_vector_store_id_from_page(
|
441 |
+
self,
|
442 |
+
) -> tuple[str, GriptapeCloudVectorStoreDriver]:
|
443 |
+
base_url = "https://cloud.griptape.ai/api/"
|
444 |
+
kb_url = f"{base_url}/knowledge-bases"
|
445 |
+
headers = {"Authorization": f"Bearer {os.getenv('GT_CLOUD_API_KEY')}"}
|
446 |
+
# TODO: This needs to change when I have my own bucket. Right now, I'm doing the 10 most recently made KBs
|
447 |
+
response = requests.get(url=kb_url, headers=headers)
|
448 |
+
response.raise_for_status()
|
449 |
+
if response.status_code == 200:
|
450 |
+
data = response.json()
|
451 |
+
possible_kbs = {}
|
452 |
+
for kb in data["knowledge_bases"]:
|
453 |
+
name = kb["name"]
|
454 |
+
if "KB_section" not in name:
|
455 |
+
continue
|
456 |
+
page_nums = name.split("p")[1:]
|
457 |
+
start_page = int(page_nums[0].split("-")[0])
|
458 |
+
end_page = int(page_nums[1])
|
459 |
+
if end_page <= self.page_range[1] and start_page >= self.page_range[0]:
|
460 |
+
possible_kbs[kb["knowledge_base_id"]] = f"{start_page}-{end_page}"
|
461 |
+
kb_id = random.choice(list(possible_kbs.keys()))
|
462 |
+
page_value = possible_kbs[kb_id] # TODO: This won't help at all actually
|
463 |
+
return page_value, GriptapeCloudVectorStoreDriver(
|
464 |
+
api_key=os.getenv("GT_CLOUD_API_KEY", ""),
|
465 |
+
knowledge_base_id=kb_id,
|
466 |
+
)
|
467 |
+
else:
|
468 |
+
raise ValueError(response.status_code)
|
469 |
+
|
470 |
+
def get_taxonomy_vs(self) -> GriptapeCloudVectorStoreDriver:
|
471 |
+
return GriptapeCloudVectorStoreDriver(
|
472 |
+
api_key=os.getenv("GT_CLOUD_API_KEY", ""),
|
473 |
+
knowledge_base_id="2c3a6f19-51a8-43c3-8445-c7fbe06bf460",
|
474 |
+
)
|
475 |
+
|
476 |
+
def build_rag_engine(
|
477 |
+
self, vector_store_driver: GriptapeCloudVectorStoreDriver
|
478 |
+
) -> RagEngine:
|
479 |
+
return RagEngine(
|
480 |
+
retrieval_stage=RetrievalRagStage(
|
481 |
+
retrieval_modules=[
|
482 |
+
VectorStoreRetrievalRagModule(
|
483 |
+
vector_store_driver=vector_store_driver,
|
484 |
+
)
|
485 |
+
],
|
486 |
+
),
|
487 |
+
response_stage=ResponseRagStage(
|
488 |
+
response_modules=[TextChunksResponseRagModule()]
|
489 |
+
),
|
490 |
+
)
|
491 |
+
|
492 |
+
def build_rag_tool(self, engine: RagEngine) -> RagTool:
|
493 |
+
return RagTool(
|
494 |
+
description="Contains information about the textbook. Use it ONLY for context.",
|
495 |
+
rag_engine=engine,
|
496 |
+
)
|
uw_programmatic/config.yaml
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
events:
|
2 |
+
enter_first_state:
|
3 |
+
transitions:
|
4 |
+
- from: start
|
5 |
+
to: gather_parameters
|
6 |
+
finish_state:
|
7 |
+
transitions:
|
8 |
+
- from: evaluate_q_count
|
9 |
+
to: output_q
|
10 |
+
next_state:
|
11 |
+
transitions:
|
12 |
+
- from: gather_parameters
|
13 |
+
to: evaluate_q_count
|
14 |
+
- from: evaluate_q_count
|
15 |
+
to: need_more_q
|
16 |
+
- from: need_more_q
|
17 |
+
to: assess_generated_q
|
18 |
+
- from: assess_generated_q
|
19 |
+
to: evaluate_q_count
|
20 |
+
- from: output_q
|
21 |
+
to: gather_parameters
|
22 |
+
end_state:
|
23 |
+
transitions:
|
24 |
+
- from: output_q
|
25 |
+
to: end
|
26 |
+
rulesets:
|
27 |
+
frame_question_best_practices:
|
28 |
+
name: Frame Question with KB
|
29 |
+
rules:
|
30 |
+
- '"Return a string with the reformmated question'
|
31 |
+
- '"No commentary, no code, no backticks'
|
32 |
+
- '"Use the information from your knowledge base'
|
33 |
+
- '"Do not change the content of the question'
|
34 |
+
incorrect_answers_creator:
|
35 |
+
name: Create Wrong Answers
|
36 |
+
rules:
|
37 |
+
- '"Return ONLY a list of 3 incorrect answers. No markdown, no commentary, no
|
38 |
+
backticks.'
|
39 |
+
- '"All incorrect answers should be different, but plausible answers to the question.'
|
40 |
+
- '"Incorrect answers may reference material from the knowledge base, but must
|
41 |
+
not be correct answers to the question'
|
42 |
+
- '"Length of incorrect answers should be 10 words max, 5 words minimum'
|
43 |
+
similarity_checker:
|
44 |
+
name: Check Similarity
|
45 |
+
rules:
|
46 |
+
- '''"you are adept at comparing questions to check whether they are similar'''
|
47 |
+
- '''"you will be given a list of questions. If two questions assess very similar
|
48 |
+
subjects in a very similar way, remove one of them from the list.'''
|
49 |
+
- '''"do not change anything else in the list.'''
|
50 |
+
- '''"output only the edited list.'''
|
51 |
+
- '''Return ONLY a json'''
|
52 |
+
- '''No markdown, no commentary, no code, no backticks.'''
|
53 |
+
- '"Use \" for quotes within the JSON'
|
54 |
+
specific_question_creator:
|
55 |
+
name: Create Question
|
56 |
+
rules:
|
57 |
+
- '"Return ONLY a json with ''Question'' and ''Answer'' as keys.'
|
58 |
+
- " No markdown, no commentary, no code, no backticks."
|
59 |
+
- '"Query to knowledge base should always be ''find information for quiz question'''
|
60 |
+
- '"Question should be a multiple choice quiz style question that assesses a student''s
|
61 |
+
knowledge of the information in the knowledge base (which should be referred
|
62 |
+
to as ''the textbook''). Answer should be a correct answer to the question that
|
63 |
+
uses information from the knowledge base. Do not return incorrect answers.'
|
64 |
+
- '"The length of the question should be 30 words at most.'
|
65 |
+
- '"Question should never reference or ask about an entire section, never reference
|
66 |
+
or ask about a quote in the knowledge base, never ask for the page number of
|
67 |
+
some information, and never ask for information about the file, document, or
|
68 |
+
knowledge base.'
|
69 |
+
- '"The answer to the question should be short, but should not omit important
|
70 |
+
information.'
|
71 |
+
taxonomy_prompter:
|
72 |
+
name: Decide Taxonomy
|
73 |
+
rules:
|
74 |
+
- '"behave as if you were a user asking an AI chatbot to generate a question for
|
75 |
+
you'
|
76 |
+
states:
|
77 |
+
assess_generated_q:
|
78 |
+
structures:
|
79 |
+
Similarity_Auditor: {}
|
80 |
+
end:
|
81 |
+
final: true
|
82 |
+
evaluate_q_count: {}
|
83 |
+
gather_parameters: {}
|
84 |
+
need_more_q:
|
85 |
+
structures:
|
86 |
+
best_practices_expert: {}
|
87 |
+
subject_matter_expert: {}
|
88 |
+
taxonomy_expert: {}
|
89 |
+
output_q: {}
|
90 |
+
start:
|
91 |
+
initial: true
|
92 |
+
structures:
|
93 |
+
best_practices_expert:
|
94 |
+
model: gpt-4o
|
95 |
+
prompt_id: best_practices_question
|
96 |
+
ruleset_ids:
|
97 |
+
- frame_question_best_practices
|
98 |
+
vector_stores:
|
99 |
+
- best_practices
|
100 |
+
similarity_auditor:
|
101 |
+
model: gpt-4o
|
102 |
+
prompt_id: similarity_auditor_prompt
|
103 |
+
ruleset_ids:
|
104 |
+
- similarity_checker
|
105 |
+
subject_matter_expert:
|
106 |
+
model: gpt-4o
|
107 |
+
prompt_id: scope_question_subject_expert
|
108 |
+
ruleset_ids:
|
109 |
+
- specific_question_creator
|
110 |
+
taxonomy_expert:
|
111 |
+
model: gpt-4o
|
112 |
+
prompt_id: scope_question_taxonomy
|
113 |
+
ruleset_ids:
|
114 |
+
- taxonomy_prompter
|
115 |
+
wrong_answers_generator:
|
116 |
+
model: gpt-4o
|
117 |
+
prompt_id: write_incorrect_answers
|
118 |
+
ruleset_ids:
|
119 |
+
- incorrect_answers_creator
|
uw_programmatic/question_pipeline.py
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
import ast
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
import random
|
6 |
+
import logging
|
7 |
+
|
8 |
+
import requests
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from griptape.artifacts import ListArtifact, TextArtifact
|
11 |
+
from griptape.configs import Defaults
|
12 |
+
from griptape.configs.drivers import OpenAiDriversConfig
|
13 |
+
from griptape.drivers import (
|
14 |
+
LocalStructureRunDriver,
|
15 |
+
OpenAiChatPromptDriver,
|
16 |
+
GriptapeCloudVectorStoreDriver,
|
17 |
+
)
|
18 |
+
from griptape.artifacts import ListArtifact, TextArtifact
|
19 |
+
from griptape.rules import Ruleset, Rule
|
20 |
+
|
21 |
+
import json
|
22 |
+
import requests
|
23 |
+
import random
|
24 |
+
import os
|
25 |
+
from dotenv import load_dotenv
|
26 |
+
|
27 |
+
from griptape.engines.rag import RagEngine
|
28 |
+
from griptape.engines.rag.modules import (
|
29 |
+
VectorStoreRetrievalRagModule,
|
30 |
+
TextChunksResponseRagModule,
|
31 |
+
)
|
32 |
+
from griptape.engines.rag.stages import ResponseRagStage, RetrievalRagStage
|
33 |
+
from griptape.tools import RagTool
|
34 |
+
from griptape.configs.logging import TruncateLoggingFilter
|
35 |
+
|
36 |
+
from griptape_statemachine.parsers.uw_csv_parser import CsvParser
|
37 |
+
|
38 |
+
load_dotenv()
|
39 |
+
|
40 |
+
# openai default config pass in a new openai driver
|
41 |
+
Defaults.drivers_config = OpenAiDriversConfig(
|
42 |
+
prompt_driver=OpenAiChatPromptDriver(model="gpt-4o", max_tokens=4096)
|
43 |
+
)
|
44 |
+
# logger = logging.getLogger(Defaults.logging_config.logger_name)
|
45 |
+
# logger.setLevel(logging.ERROR)
|
46 |
+
# logger.addFilter(TruncateLoggingFilter(max_log_length=5000))
|
47 |
+
|
48 |
+
|
49 |
+
# ALL METHODS RELATING TO THE WORKFLOW AND PIPELINE
|
50 |
+
def end_workflow(task: CodeExecutionTask) -> ListArtifact:
|
51 |
+
parent_outputs = task.parent_outputs
|
52 |
+
questions = []
|
53 |
+
for output in parent_outputs.values():
|
54 |
+
output = output.value
|
55 |
+
try:
|
56 |
+
output = ast.literal_eval(output)
|
57 |
+
question = {output["Question"]: output}
|
58 |
+
questions.append(TextArtifact(question))
|
59 |
+
except SyntaxError:
|
60 |
+
pass
|
61 |
+
return ListArtifact(questions)
|
62 |
+
|
63 |
+
|
64 |
+
def get_questions_workflow() -> Workflow:
|
65 |
+
workflow = Workflow(id="create_question_workflow")
|
66 |
+
# How many questions still need to be created
|
67 |
+
for _ in range(10):
|
68 |
+
task = StructureRunTask(
|
69 |
+
driver=LocalStructureRunDriver(create_structure=get_single_question),
|
70 |
+
child_ids=["end_task"],
|
71 |
+
)
|
72 |
+
workflow.add_task(task)
|
73 |
+
end_task = CodeExecutionTask(id="end_task", on_run=end_workflow)
|
74 |
+
workflow.add_task(end_task)
|
75 |
+
return workflow
|
76 |
+
|
77 |
+
|
78 |
+
def single_question_last_task(task: CodeExecutionTask) -> TextArtifact:
|
79 |
+
parent_outputs = task.parent_outputs
|
80 |
+
print(f"PARENT OUTPUTS ARE: {parent_outputs}")
|
81 |
+
wrong_answers = parent_outputs["wrong_answers"].value # Output is a list
|
82 |
+
wrong_answers = wrong_answers.split("\n")
|
83 |
+
question_and_answer = parent_outputs["get_question"].value # Output is a json
|
84 |
+
question_and_answer = json.loads(question_and_answer)
|
85 |
+
inputs = task.input.value.split(",")
|
86 |
+
question = {
|
87 |
+
"Question": question_and_answer["Question"],
|
88 |
+
"Answer": question_and_answer["Answer"],
|
89 |
+
"Wrong Answers": wrong_answers,
|
90 |
+
"Page": int(inputs[0]),
|
91 |
+
"Taxonomy": inputs[1],
|
92 |
+
}
|
93 |
+
return TextArtifact(question)
|
94 |
+
|
95 |
+
|
96 |
+
def get_question_for_wrong_answers(task: CodeExecutionTask) -> TextArtifact:
|
97 |
+
parent_outputs = task.parent_outputs
|
98 |
+
question = parent_outputs["get_question"].value
|
99 |
+
print(question)
|
100 |
+
question = json.loads(question)["Question"]
|
101 |
+
return TextArtifact(question)
|
102 |
+
|
103 |
+
|
104 |
+
def get_single_question() -> Workflow:
|
105 |
+
question_generator = Workflow()
|
106 |
+
page_number = random.choice(list(range(1, 9)))
|
107 |
+
taxonomy = random.choice(["Knowledge", "Comprehension", "Application"])
|
108 |
+
taxonomyprompt = {
|
109 |
+
"Knowledge": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'define', 'list', 'state', 'identify', or 'label'.",
|
110 |
+
"Comprehension": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The interrogative verb for the question should be one of 'explain', 'predict', 'interpret', 'infer', 'summarize', 'convert', or 'give an example of x'.",
|
111 |
+
"Application": "Generate a quiz question based ONLY on this information: {{parent_outputs['information_task']}}, then write the answer to the question. The structure of the question should be one of 'How could x be used to y?' or 'How would you show/make use of/modify/demonstrate/solve/apply x to conditions y?'",
|
112 |
+
}
|
113 |
+
# Get KBs and select it, assign it to the structure or create the structure right here.
|
114 |
+
# Rules for subject matter expert: return only a json with question and answer as keys.
|
115 |
+
generate_q_task = StructureRunTask(
|
116 |
+
id="get_question",
|
117 |
+
input=taxonomyprompt[taxonomy],
|
118 |
+
driver=LocalStructureRunDriver(
|
119 |
+
create_structure=lambda: get_structure("subject_matter_expert", page_number)
|
120 |
+
),
|
121 |
+
)
|
122 |
+
|
123 |
+
get_question_code_task = CodeExecutionTask(
|
124 |
+
id="get_only_question",
|
125 |
+
on_run=get_question_for_wrong_answers,
|
126 |
+
parent_ids=["get_question"],
|
127 |
+
child_ids=["wrong_answers"],
|
128 |
+
)
|
129 |
+
# This will use the same KB as the previous task
|
130 |
+
generate_wrong_answers = StructureRunTask(
|
131 |
+
id="wrong_answers",
|
132 |
+
input="""Write and return three incorrect answers for this question: {{parent_outputs['get_only_question']}} with this context: {{parent_outputs['information_task']}}""",
|
133 |
+
structure_run_driver=LocalStructureRunDriver(
|
134 |
+
create_structure=lambda: get_structure("wrong_answers_generator")
|
135 |
+
),
|
136 |
+
parent_ids=["get_only_question"],
|
137 |
+
)
|
138 |
+
compile_task = CodeExecutionTask(
|
139 |
+
id="compile_task",
|
140 |
+
input=f"{page_number}, {taxonomy})",
|
141 |
+
on_run=single_question_last_task,
|
142 |
+
parent_ids=["wrong_answers", "get_question"],
|
143 |
+
)
|
144 |
+
question_generator.add_tasks(
|
145 |
+
generate_q_task,
|
146 |
+
get_question_code_task,
|
147 |
+
generate_wrong_answers,
|
148 |
+
compile_task,
|
149 |
+
)
|
150 |
+
return question_generator
|
151 |
+
|
152 |
+
|
153 |
+
def get_structure(structure_id: str, page_number=0) -> Structure:
|
154 |
+
match structure_id:
|
155 |
+
case "subject_matter_expert":
|
156 |
+
rulesets = Ruleset(
|
157 |
+
name="specific_question_creator",
|
158 |
+
rules=[
|
159 |
+
Rule(
|
160 |
+
"Return ONLY a json with 'Question' and 'Answer' as keys. No markdown, no commentary, no code, no backticks."
|
161 |
+
),
|
162 |
+
Rule(
|
163 |
+
"Query to knowledge base should always be 'find information for quiz question'"
|
164 |
+
),
|
165 |
+
Rule("Use ONLY information from your knowledge base"),
|
166 |
+
Rule(
|
167 |
+
"Question should be a question based on the knowledge base. Answer should be from knowledge base."
|
168 |
+
),
|
169 |
+
Rule(
|
170 |
+
"The answer to the question should be short, but should not omit important information."
|
171 |
+
),
|
172 |
+
Rule("Answer length should be 10 words maximum, 5 words minimum"),
|
173 |
+
],
|
174 |
+
)
|
175 |
+
structure = Agent(
|
176 |
+
id="subject_matter_expert",
|
177 |
+
prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"),
|
178 |
+
rulesets=[rulesets],
|
179 |
+
tools=[tool],
|
180 |
+
)
|
181 |
+
case "taxonomy_expert":
|
182 |
+
rulesets = Ruleset(
|
183 |
+
name="KB Rules",
|
184 |
+
rules=[
|
185 |
+
Rule(
|
186 |
+
"Use only your knowledge base. Do not make up any additional information."
|
187 |
+
),
|
188 |
+
Rule("Maximum 10 words."),
|
189 |
+
Rule(
|
190 |
+
"Return information an AI chatbot could use to write a question on a subject."
|
191 |
+
),
|
192 |
+
],
|
193 |
+
)
|
194 |
+
kb_driver = get_taxonomy_vs()
|
195 |
+
tool = build_rag_tool(build_rag_engine(kb_driver))
|
196 |
+
structure = Agent(
|
197 |
+
id="taxonomy_expert",
|
198 |
+
prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"),
|
199 |
+
tools=[tool],
|
200 |
+
)
|
201 |
+
case "wrong_answers_generator":
|
202 |
+
rulesets = Ruleset(
|
203 |
+
name="incorrect_answers_creator",
|
204 |
+
rules=[
|
205 |
+
Rule(
|
206 |
+
"Return ONLY a list of 3 incorrect answers. No markdown, no commentary, no backticks."
|
207 |
+
),
|
208 |
+
Rule(
|
209 |
+
"All incorrect answers should be different, but plausible answers to the question."
|
210 |
+
),
|
211 |
+
Rule(
|
212 |
+
"Incorrect answers may reference material from the knowledge base, but must not be correct answers to the question"
|
213 |
+
),
|
214 |
+
Rule(
|
215 |
+
"Length of incorrect answers should be 10 words max, 5 words minimum"
|
216 |
+
),
|
217 |
+
],
|
218 |
+
)
|
219 |
+
kb_driver = get_vector_store_id_from_page(page_number)
|
220 |
+
tool = build_rag_tool(build_rag_engine(kb_driver))
|
221 |
+
structure = Agent(
|
222 |
+
id="wrong_answers_generator",
|
223 |
+
prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"),
|
224 |
+
rulesets=[rulesets],
|
225 |
+
tools=[tool],
|
226 |
+
)
|
227 |
+
case _:
|
228 |
+
structure = Agent(prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"))
|
229 |
+
return structure
|
230 |
+
|
231 |
+
|
232 |
+
def get_vector_store_id_from_page(page: int) -> GriptapeCloudVectorStoreDriver | None:
|
233 |
+
base_url = "https://cloud.griptape.ai/api/"
|
234 |
+
kb_url = f"{base_url}/knowledge-bases"
|
235 |
+
headers = {"Authorization": f"Bearer {os.getenv('GT_CLOUD_API_KEY')}"}
|
236 |
+
# TODO: This needs to change when I have my own bucket. Right now, I'm doing the 10 most recently made KBs
|
237 |
+
response = requests.get(url=kb_url, headers=headers)
|
238 |
+
response = requests.get(
|
239 |
+
url=kb_url,
|
240 |
+
headers=headers,
|
241 |
+
)
|
242 |
+
response.raise_for_status()
|
243 |
+
if response.status_code == 200:
|
244 |
+
data = response.json()
|
245 |
+
for kb in data["knowledge_bases"]:
|
246 |
+
name = kb["name"]
|
247 |
+
if "KB_section" not in name:
|
248 |
+
continue
|
249 |
+
page_nums = name.split("pg")[1].split("-")
|
250 |
+
start_page = int(page_nums[0])
|
251 |
+
end_page = int(page_nums[1])
|
252 |
+
if end_page <= 40 and start_page >= 1:
|
253 |
+
possible_kbs[kb["knowledge_base_id"]] = f"{start_page}-{end_page}"
|
254 |
+
kb_id = random.choice(list(possible_kbs.keys()))
|
255 |
+
page_value = possible_kbs[kb_id]
|
256 |
+
return page_value, GriptapeCloudVectorStoreDriver(
|
257 |
+
api_key=os.getenv("GT_CLOUD_API_KEY", ""),
|
258 |
+
knowledge_base_id=kb_id,
|
259 |
+
)
|
260 |
+
else:
|
261 |
+
raise ValueError(response.status_code)
|
262 |
+
return None
|
263 |
+
|
264 |
+
|
265 |
+
def get_taxonomy_vs() -> GriptapeCloudVectorStoreDriver:
|
266 |
+
return GriptapeCloudVectorStoreDriver(
|
267 |
+
api_key=os.getenv("GT_CLOUD_API_KEY", ""),
|
268 |
+
knowledge_base_id="2c3a6f19-51a8-43c3-8445-c7fbe06bf460",
|
269 |
+
)
|
270 |
+
|
271 |
+
|
272 |
+
def build_rag_engine(vector_store_driver) -> RagEngine:
|
273 |
+
return RagEngine(
|
274 |
+
retrieval_stage=RetrievalRagStage(
|
275 |
+
retrieval_modules=[
|
276 |
+
VectorStoreRetrievalRagModule(
|
277 |
+
vector_store_driver=vector_store_driver,
|
278 |
+
query_params={
|
279 |
+
"count": 100,
|
280 |
+
},
|
281 |
+
)
|
282 |
+
],
|
283 |
+
),
|
284 |
+
response_stage=ResponseRagStage(
|
285 |
+
response_modules=[TextChunksResponseRagModule()]
|
286 |
+
),
|
287 |
+
)
|
288 |
+
|
289 |
+
|
290 |
+
def build_rag_tool(engine) -> RagTool:
|
291 |
+
return RagTool(
|
292 |
+
description="Contains information about the textbook. Use it to answer any related questions.",
|
293 |
+
rag_engine=engine,
|
294 |
+
)
|
295 |
+
|
296 |
+
|
297 |
+
if __name__ == "__main__":
|
298 |
+
# workflow = get_questions_workflow()
|
299 |
+
# workflow.run()
|
300 |
+
CsvParser("uw_programmatic").csv_parser()
|
uw_programmatic/uw_machine.py
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import ast
|
4 |
+
import xlsxwriter
|
5 |
+
import csv
|
6 |
+
import json
|
7 |
+
from pathlib import Path
|
8 |
+
import random
|
9 |
+
from typing import TYPE_CHECKING, Any
|
10 |
+
from griptape.structures import Agent
|
11 |
+
from h11 import Event
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
from base_machine import UWBaseMachine
|
15 |
+
from griptape.events import EventBus, EventListener, FinishStructureRunEvent, BaseEvent
|
16 |
+
from griptape.loaders import PdfLoader
|
17 |
+
|
18 |
+
if TYPE_CHECKING:
|
19 |
+
from griptape.tools import BaseTool
|
20 |
+
|
21 |
+
|
22 |
+
class UWMachine(UWBaseMachine):
|
23 |
+
"""State machine with GOAP"""
|
24 |
+
|
25 |
+
@property
|
26 |
+
def tools(self) -> dict[str, BaseTool]:
|
27 |
+
return {}
|
28 |
+
|
29 |
+
def start_machine(self) -> None:
|
30 |
+
"""Starts the machine."""
|
31 |
+
# Clear input history.
|
32 |
+
# Clear csv file
|
33 |
+
with Path(Path.cwd().joinpath("outputs/similarity_step.csv")).open("w") as file:
|
34 |
+
file.write("")
|
35 |
+
self.send("enter_first_state")
|
36 |
+
|
37 |
+
def on_event_gather_parameters(self, event_: dict) -> None:
|
38 |
+
event_source = event_["type"]
|
39 |
+
event_value = event_["value"]
|
40 |
+
match event_source:
|
41 |
+
case "user_input":
|
42 |
+
parameters = event_value
|
43 |
+
self.page_range = parameters["page_range"]
|
44 |
+
self.question_number = parameters["question_number"]
|
45 |
+
self.taxonomy = parameters["taxonomy"]
|
46 |
+
self.send("next_state")
|
47 |
+
case _:
|
48 |
+
err_msg = f"Unexpected Transition Event ID: {event_value}."
|
49 |
+
raise ValueError(err_msg)
|
50 |
+
|
51 |
+
def on_enter_evaluate_q_count(self) -> None:
|
52 |
+
if len(self.question_list) >= self.question_number:
|
53 |
+
self.send("finish_state") # go to output questions
|
54 |
+
else:
|
55 |
+
self.send("next_state") # go to need more questions
|
56 |
+
|
57 |
+
def on_event_evaluate_q_count(self, event_: dict) -> None:
|
58 |
+
pass
|
59 |
+
|
60 |
+
def on_enter_need_more_q(self) -> None:
|
61 |
+
# Create the entire workflow to create another question.
|
62 |
+
self.get_questions_workflow().run()
|
63 |
+
|
64 |
+
def on_event_need_more_q(self, event_: dict) -> None:
|
65 |
+
event_source = event_["type"]
|
66 |
+
event_value = event_["value"]
|
67 |
+
match event_source:
|
68 |
+
case "griptape_event":
|
69 |
+
event_type = event_value["type"]
|
70 |
+
match event_type:
|
71 |
+
case "FinishStructureRunEvent":
|
72 |
+
structure_id = event_value["structure_id"]
|
73 |
+
match structure_id:
|
74 |
+
case "create_question_workflow":
|
75 |
+
values = event_value["output_task_output"]["value"]
|
76 |
+
questions = [
|
77 |
+
ast.literal_eval(question["value"])
|
78 |
+
for question in values
|
79 |
+
]
|
80 |
+
self.most_recent_questions = (
|
81 |
+
questions # This is a ListArtifact I'm pretty sure
|
82 |
+
)
|
83 |
+
self.send("next_state")
|
84 |
+
case _:
|
85 |
+
print(f"Error:{event_} ")
|
86 |
+
case _:
|
87 |
+
print(f"Unexpected: {event_}")
|
88 |
+
|
89 |
+
def on_enter_assess_generated_q(self) -> None:
|
90 |
+
# TODO: Should it append it to the list already and remove duplicates? or not?
|
91 |
+
# TODO: Merge incoming lists
|
92 |
+
with Path(Path.cwd().joinpath("outputs/similarity_step.csv")).open(
|
93 |
+
"a", newline=""
|
94 |
+
) as file:
|
95 |
+
writer = csv.DictWriter(
|
96 |
+
file,
|
97 |
+
fieldnames=[
|
98 |
+
"Question",
|
99 |
+
"Answer",
|
100 |
+
"Wrong Answers",
|
101 |
+
"Page",
|
102 |
+
"Taxonomy",
|
103 |
+
],
|
104 |
+
)
|
105 |
+
writer.writerow({"Question": "LIST OF QUESTIONS GENERATED THIS ROUND"})
|
106 |
+
writer.writerows(self.most_recent_questions)
|
107 |
+
merged_list = [*self.question_list, *self.most_recent_questions]
|
108 |
+
prompt = f"{merged_list}"
|
109 |
+
self.get_structure("similarity_auditor").run(prompt)
|
110 |
+
|
111 |
+
def on_event_assess_generated_q(self, event_: dict) -> None:
|
112 |
+
event_source = event_["type"]
|
113 |
+
event_value = event_["value"]
|
114 |
+
match event_source:
|
115 |
+
case "griptape_event":
|
116 |
+
event_type = event_value["type"]
|
117 |
+
match event_type:
|
118 |
+
case "FinishStructureRunEvent":
|
119 |
+
structure_id = event_value["structure_id"]
|
120 |
+
match structure_id:
|
121 |
+
case "similarity_auditor":
|
122 |
+
new_question_list = event_value["output_task_output"][
|
123 |
+
"value"
|
124 |
+
]
|
125 |
+
try:
|
126 |
+
new_question_list = json.loads(
|
127 |
+
new_question_list
|
128 |
+
) # This must be in that JSON format
|
129 |
+
except:
|
130 |
+
new_question_list = self.question_list
|
131 |
+
merged_list = [
|
132 |
+
*self.question_list,
|
133 |
+
*self.most_recent_questions,
|
134 |
+
]
|
135 |
+
deleted_q = [
|
136 |
+
question1
|
137 |
+
for question1 in merged_list
|
138 |
+
if not any(
|
139 |
+
question2["Question"] == question1["Question"]
|
140 |
+
for question2 in new_question_list
|
141 |
+
)
|
142 |
+
]
|
143 |
+
with Path(
|
144 |
+
Path.cwd().joinpath("outputs/similarity_step.csv")
|
145 |
+
).open("a", newline="") as file:
|
146 |
+
writer = csv.DictWriter(
|
147 |
+
file,
|
148 |
+
fieldnames=[
|
149 |
+
"Question",
|
150 |
+
"Answer",
|
151 |
+
"Wrong Answers",
|
152 |
+
"Page",
|
153 |
+
"Taxonomy",
|
154 |
+
],
|
155 |
+
)
|
156 |
+
writer.writerow(
|
157 |
+
{"Question": "QUESTIONS REMOVED THIS ROUND!"}
|
158 |
+
)
|
159 |
+
if len(deleted_q):
|
160 |
+
writer.writerows(deleted_q)
|
161 |
+
else:
|
162 |
+
writer.writerow({"Question": "No q removed"})
|
163 |
+
self.question_list = new_question_list
|
164 |
+
self.send("next_state") # move on
|
165 |
+
|
166 |
+
def on_enter_output_q(self) -> None:
|
167 |
+
columns = pd.MultiIndex.from_tuples(
|
168 |
+
[
|
169 |
+
("Professor", "Page Range"),
|
170 |
+
("Professor", "Taxonomy"),
|
171 |
+
("Professor", "Question"),
|
172 |
+
("Professor", "Answer"),
|
173 |
+
("Professor", "Wrong Answers"),
|
174 |
+
("Student", "Question"),
|
175 |
+
("Student", "Answers"),
|
176 |
+
]
|
177 |
+
)
|
178 |
+
data = pd.DataFrame(columns=columns)
|
179 |
+
for question in range(len(self.question_list)):
|
180 |
+
shuffled_answers = [
|
181 |
+
self.question_list[question]["Answer"],
|
182 |
+
*self.question_list[question]["Wrong Answers"],
|
183 |
+
]
|
184 |
+
random.shuffle(shuffled_answers)
|
185 |
+
shuffled_answers = "\n".join(shuffled_answers)
|
186 |
+
new_row = [
|
187 |
+
self.question_list[question]["Page"],
|
188 |
+
self.question_list[question]["Taxonomy"],
|
189 |
+
self.question_list[question]["Question"],
|
190 |
+
self.question_list[question]["Answer"],
|
191 |
+
self.question_list[question]["Wrong Answers"],
|
192 |
+
self.question_list[question]["Question"],
|
193 |
+
shuffled_answers,
|
194 |
+
]
|
195 |
+
data.loc[question] = new_row
|
196 |
+
data.columns = ["_".join(col).strip() for col in data.columns.values]
|
197 |
+
writer = pd.ExcelWriter("outputs/professor_guide.xlsx", engine="xlsxwriter")
|
198 |
+
data.to_excel(writer, sheet_name="Quiz Questions", index=False)
|
199 |
+
writer.close()
|
200 |
+
self.send("next_state")
|
201 |
+
|
202 |
+
def on_event_output_q(self, event_: dict) -> None:
|
203 |
+
pass
|
204 |
+
|
205 |
+
def on_exit_output_q(self) -> None:
|
206 |
+
# Reset the state machine values
|
207 |
+
self.question_list = []
|
208 |
+
self.most_recent_questions = []
|
209 |
+
|
210 |
+
if __name__ == "__main__":
|
211 |
+
|
212 |
+
question_list = [
|
213 |
+
{
|
214 |
+
"Page": "1-2",
|
215 |
+
"Taxonomy": "Knowledge",
|
216 |
+
"Question": "What is Python?",
|
217 |
+
"Answer": "A programming language",
|
218 |
+
"Wrong Answers": ["A snake", "A car brand", "A fruit"],
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"Page": "3-4",
|
222 |
+
"Taxonomy": "Comprehension",
|
223 |
+
"Question": "What does HTML stand for?",
|
224 |
+
"Answer": "HyperText Markup Language",
|
225 |
+
"Wrong Answers": [
|
226 |
+
"High Text Machine Language",
|
227 |
+
"Hyperlink Text Mode Language",
|
228 |
+
"None of the above",
|
229 |
+
],
|
230 |
+
},
|
231 |
+
]
|
232 |
+
|
233 |
+
columns = pd.MultiIndex.from_tuples(
|
234 |
+
[
|
235 |
+
("Professor", "Page Range"),
|
236 |
+
("Professor", "Taxonomy"),
|
237 |
+
("Professor", "Question"),
|
238 |
+
("Professor", "Answer"),
|
239 |
+
("Professor", "Wrong Answers"),
|
240 |
+
("Student", "Question"),
|
241 |
+
("Student", "Answers"),
|
242 |
+
]
|
243 |
+
)
|
244 |
+
data = pd.DataFrame(columns=columns)
|
245 |
+
for question in range(len(question_list)):
|
246 |
+
shuffled_answers = [
|
247 |
+
question_list[question]["Answer"],
|
248 |
+
*question_list[question]["Wrong Answers"],
|
249 |
+
]
|
250 |
+
random.shuffle(shuffled_answers)
|
251 |
+
shuffled_answers = "\n".join(shuffled_answers)
|
252 |
+
new_row = [
|
253 |
+
question_list[question]["Page"],
|
254 |
+
question_list[question]["Taxonomy"],
|
255 |
+
question_list[question]["Question"],
|
256 |
+
question_list[question]["Answer"],
|
257 |
+
question_list[question]["Wrong Answers"],
|
258 |
+
question_list[question]["Question"],
|
259 |
+
shuffled_answers,
|
260 |
+
]
|
261 |
+
data.loc[question] = new_row
|
262 |
+
data.columns = ["_".join(col).strip() for col in data.columns.values]
|
263 |
+
writer = pd.ExcelWriter("outputs/professor_guide.xlsx", engine="xlsxwriter")
|
264 |
+
data.to_excel(writer, sheet_name="Quiz Questions", index=False)
|
265 |
+
writer.close()
|