YoanSallami
commited on
Commit
·
fe643f6
1
Parent(s):
10db5b3
Add notebooks
Browse files- Dockerfile +6 -4
- README.md +5 -5
- code_examples/1_basics/1_first_steps.py +192 -0
- code_examples/1_basics/2_first_programs.py +308 -0
- code_examples/1_basics/3_control_flow.py +487 -0
- code_examples/1_basics/4_conversational_applications.py +176 -0
- code_examples/1_basics/5_rewards_metrics_and_optimizers.py +194 -0
- code_examples/1_basics/6_training_programs.py +335 -0
- code_examples/2_advanced/1_implementing_custom_modules_and_programs_via_subclassing.py +340 -0
- examples/chatbot.py +0 -152
- examples/dataset_explorer.py +0 -108
- requirements.txt +2 -8
Dockerfile
CHANGED
@@ -10,6 +10,9 @@ ENV VIRTUAL_ENV=/opt/venv \
|
|
10 |
PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
|
11 |
HOME=/home/user
|
12 |
|
|
|
|
|
|
|
13 |
# Install dependencies
|
14 |
COPY --chown=user:user ./requirements.txt requirements.txt
|
15 |
RUN uv venv $VIRTUAL_ENV \
|
@@ -31,7 +34,7 @@ RUN --mount=type=secret,id=MARIMO_PASSWORD \
|
|
31 |
WORKDIR /data
|
32 |
|
33 |
# Copy examples
|
34 |
-
COPY --chown=user:user ./
|
35 |
|
36 |
# Set user
|
37 |
USER user
|
@@ -47,7 +50,6 @@ dataframes = "rich"
|
|
47 |
theme = "light"
|
48 |
ENDCONFIG
|
49 |
|
50 |
-
|
51 |
# Uncomment to enable password protection
|
52 |
-
# CMD marimo edit /data --host=0.0.0.0 --port=7860 --token-password=$(cat $HOME/.marimo_password)
|
53 |
-
CMD marimo edit /data --host=0.0.0.0 --port=7860 --no-token
|
|
|
10 |
PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
|
11 |
HOME=/home/user
|
12 |
|
13 |
+
# Install graphviz for dot visualization
|
14 |
+
RUN apt update && apt install -y graphviz
|
15 |
+
|
16 |
# Install dependencies
|
17 |
COPY --chown=user:user ./requirements.txt requirements.txt
|
18 |
RUN uv venv $VIRTUAL_ENV \
|
|
|
34 |
WORKDIR /data
|
35 |
|
36 |
# Copy examples
|
37 |
+
COPY --chown=user:user ./code_examples ./code_examples
|
38 |
|
39 |
# Set user
|
40 |
USER user
|
|
|
50 |
theme = "light"
|
51 |
ENDCONFIG
|
52 |
|
|
|
53 |
# Uncomment to enable password protection
|
54 |
+
# CMD ["marimo", "edit", "/data", "--host=0.0.0.0", "--port=7860", "--token-password=$(cat $HOME/.marimo_password)"]
|
55 |
+
CMD ["marimo", "edit", "/data", "--host=0.0.0.0", "--port=7860", "--no-token"]
|
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
colorTo: blue
|
6 |
sdk: docker
|
7 |
pinned: true
|
8 |
license: mit
|
9 |
-
short_description: A marimo Space to edit
|
10 |
---
|
11 |
|
12 |
-
Check out
|
13 |
-
Check out the
|
|
|
1 |
---
|
2 |
+
title: synalinks notebooks
|
3 |
+
emoji: 🧠🔗
|
4 |
colorFrom: yellow
|
5 |
colorTo: blue
|
6 |
sdk: docker
|
7 |
pinned: true
|
8 |
license: mit
|
9 |
+
short_description: A marimo Space to edit Synalinks 🧠🔗 notebooks
|
10 |
---
|
11 |
|
12 |
+
Check out the documentation at <https://synalinks.github.io/synalinks>
|
13 |
+
Check out the repository at <https://github.com/SynaLinks/synalinks>
|
code_examples/1_basics/1_first_steps.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# First Steps
|
21 |
+
|
22 |
+
First, install Synalinks, the easiest way is using pip:
|
23 |
+
|
24 |
+
```shell
|
25 |
+
pip install synalinks
|
26 |
+
```
|
27 |
+
|
28 |
+
Or uv (recommended):
|
29 |
+
|
30 |
+
```shell
|
31 |
+
uv pip install synalinks
|
32 |
+
```
|
33 |
+
|
34 |
+
If you want to install it from source (for contributors), then do:
|
35 |
+
|
36 |
+
```shell
|
37 |
+
git clone https://github.com/SynaLinks/Synalinks
|
38 |
+
cd Synalinks
|
39 |
+
./shell/uv.sh # Install uv
|
40 |
+
./shell/install.sh # Create the virtual env and install Synalinks
|
41 |
+
```
|
42 |
+
|
43 |
+
After this, open a python file or notebook and check the install:
|
44 |
+
"""
|
45 |
+
)
|
46 |
+
return
|
47 |
+
|
48 |
+
|
49 |
+
@app.cell
|
50 |
+
def _(synalinks):
|
51 |
+
print(synalinks.__version__)
|
52 |
+
return
|
53 |
+
|
54 |
+
|
55 |
+
@app.cell(hide_code=True)
|
56 |
+
def _(mo):
|
57 |
+
mo.md(
|
58 |
+
r"""
|
59 |
+
Synalinks use a global context to ensure that each variable/module
|
60 |
+
have a unique name. Clear it at the beginning of your scripts to
|
61 |
+
ensure naming reproductability.
|
62 |
+
"""
|
63 |
+
)
|
64 |
+
return
|
65 |
+
|
66 |
+
|
67 |
+
@app.cell
|
68 |
+
def _(synalinks):
|
69 |
+
synalinks.backend.clear_session()
|
70 |
+
return
|
71 |
+
|
72 |
+
|
73 |
+
@app.cell(hide_code=True)
|
74 |
+
def _(mo):
|
75 |
+
mo.md(
|
76 |
+
r"""
|
77 |
+
Addtionally, you can install Ollama [here](https://ollama.com/) to run
|
78 |
+
Language Models (LMs) locally. You can run these notebooks locally by
|
79 |
+
|
80 |
+
|
81 |
+
## Prompting
|
82 |
+
|
83 |
+
You will notice that there is no traditional prompting involved in
|
84 |
+
Synalinks, everything is described as data models in and out.
|
85 |
+
However we use a prompt template, that will tell the system how to
|
86 |
+
construct the prompt automatically.
|
87 |
+
|
88 |
+
The prompt template is a jinja2 template that describe how to render
|
89 |
+
the examples, hints and how to convert them into chat messages:
|
90 |
+
"""
|
91 |
+
)
|
92 |
+
return
|
93 |
+
|
94 |
+
|
95 |
+
@app.cell
|
96 |
+
def _(synalinks):
|
97 |
+
print(synalinks.default_prompt_template())
|
98 |
+
return
|
99 |
+
|
100 |
+
|
101 |
+
@app.cell(hide_code=True)
|
102 |
+
def _(mo):
|
103 |
+
mo.md(
|
104 |
+
r"""
|
105 |
+
If you are making a conversational application, we provide the following template to use.
|
106 |
+
To use it, provide this template to the `prompt_template` argument of your `Generator` module.
|
107 |
+
Note that this template only works if your module has a `ChatMessages` input.
|
108 |
+
"""
|
109 |
+
)
|
110 |
+
|
111 |
+
|
112 |
+
@app.cell
|
113 |
+
def _(synalinks):
|
114 |
+
print(synalinks.chat_prompt_template())
|
115 |
+
return
|
116 |
+
|
117 |
+
|
118 |
+
@app.cell(hide_code=True)
|
119 |
+
def _(mo):
|
120 |
+
mo.md(
|
121 |
+
r"""
|
122 |
+
The template use the XML tags `<system>...</system>`, `<user>...</user>` and
|
123 |
+
`<assistant>...</assistant>` to know how to convert the prompt template
|
124 |
+
into messages. You can modify the default template used by using the
|
125 |
+
`prompt_template` argument in Synalinks modules. You can notice also,
|
126 |
+
that we send the inputs's and output's JSON schema to instruct the LMs
|
127 |
+
how to answer, you can enable/disable that behavior by using `use_inputs_schema`
|
128 |
+
and `use_outputs_schema` in Synalinks modules. Synalinks use constrained
|
129 |
+
structured output ensuring that the LMs answer respect the data models
|
130 |
+
specification (the JSON schema), and is ready to parse, so in theory
|
131 |
+
we don't need it, except if you use it to provide additional information
|
132 |
+
to the LMs. You can find more information in the
|
133 |
+
[`Generator`](https://synalinks.github.io/synalinks/Synalinks%20API/Modules%20API/Core%20Modules/Generator%20module/) documentation.
|
134 |
+
|
135 |
+
## Data Models
|
136 |
+
|
137 |
+
To provide additional information to the LMs, you can use the data models
|
138 |
+
`Field`. You can notice that Synalinks use Pydantic as default data backend.
|
139 |
+
Allowing Synalinks to be compatible out-of-the-box with structured output
|
140 |
+
and FastAPI.
|
141 |
+
"""
|
142 |
+
)
|
143 |
+
return
|
144 |
+
|
145 |
+
|
146 |
+
@app.cell
|
147 |
+
def _(synalinks):
|
148 |
+
class AnswerWithThinking(synalinks.DataModel):
|
149 |
+
thinking: str = synalinks.Field(
|
150 |
+
description="Your step by step thinking process",
|
151 |
+
)
|
152 |
+
answer: str = synalinks.Field(
|
153 |
+
description="The correct answer",
|
154 |
+
)
|
155 |
+
|
156 |
+
return (AnswerWithThinking,)
|
157 |
+
|
158 |
+
|
159 |
+
@app.cell(hide_code=True)
|
160 |
+
def _(mo):
|
161 |
+
mo.md(
|
162 |
+
r"""
|
163 |
+
|
164 |
+
## Conclusion
|
165 |
+
|
166 |
+
Usually that will be enough to instruct the LMs, you don't need to modify
|
167 |
+
the prompt template. Just by adding additional descriptions to the data
|
168 |
+
models fields you can instruct your system to behave as you want.
|
169 |
+
If the system needs general instructions about how to behave, you can
|
170 |
+
use the `hints` argument in Synalinks modules that will be formatted as
|
171 |
+
presented in the prompt template.
|
172 |
+
|
173 |
+
### Key Takeaways
|
174 |
+
|
175 |
+
- **Ease of Integration**: Synalinks seamlessly integrates with existing
|
176 |
+
Python projects, making it easy to incorporate advanced language
|
177 |
+
model capabilities without extensive modifications.
|
178 |
+
- **Structured Outputs**: By using data models and JSON schemas, Synalinks
|
179 |
+
ensures that the LMs responses are structured and ready for parsing,
|
180 |
+
reducing the need for additional post-processing.
|
181 |
+
- **Customizable Prompts**: The prompt templates in Synalinks are highly
|
182 |
+
customizable, allowing you to tailor the instructions provided to
|
183 |
+
the LMs based on your specific use case.
|
184 |
+
- **Compatibility**: Synalinks use Pydantic as the default data backend
|
185 |
+
ensures compatibility with structured output and FastAPI.
|
186 |
+
"""
|
187 |
+
)
|
188 |
+
return
|
189 |
+
|
190 |
+
|
191 |
+
if __name__ == "__main__":
|
192 |
+
app.run()
|
code_examples/1_basics/2_first_programs.py
ADDED
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# Your first programs
|
21 |
+
|
22 |
+
The main concept of Synalinks, is that an application (we call it a `Program`)
|
23 |
+
is a computation graph with JSON data (called `JsonDataModel`) as edges and
|
24 |
+
`Operation`s as nodes. What set apart Synalinks from other similar frameworks
|
25 |
+
like DSPy or AdalFlow is that we focus on graph-based systems but also that
|
26 |
+
it allow users to declare the computation graph using a Functional API inherited
|
27 |
+
from [Keras](https://keras.io/).
|
28 |
+
|
29 |
+
About modules, similar to layers in deep learning applications, modules are
|
30 |
+
composable blocks that you can assemble in multiple ways. Providing a modular
|
31 |
+
and composable architecture to experiment and unlock creativity.
|
32 |
+
|
33 |
+
Note that each `Program` is also a `Module`! Allowing you to encapsulate them
|
34 |
+
as you want.
|
35 |
+
|
36 |
+
Many people think that what enabled the Deep Learning revolution was compute
|
37 |
+
and data, but in reality, frameworks also played a pivotal role as they enabled
|
38 |
+
researchers and engineers to create complex architectures without having to
|
39 |
+
re-implement everything from scatch.
|
40 |
+
"""
|
41 |
+
)
|
42 |
+
return
|
43 |
+
|
44 |
+
|
45 |
+
@app.cell
|
46 |
+
def _(synalinks):
|
47 |
+
# Now we can define the data models that we are going to use in the notebook.
|
48 |
+
# Note that Synalinks use Pydantic as default data backend, which is compatible with FastAPI and structured output.
|
49 |
+
|
50 |
+
class Query(synalinks.DataModel):
|
51 |
+
query: str = synalinks.Field(
|
52 |
+
description="The user query",
|
53 |
+
)
|
54 |
+
|
55 |
+
class AnswerWithThinking(synalinks.DataModel):
|
56 |
+
thinking: str = synalinks.Field(
|
57 |
+
description="Your step by step thinking process",
|
58 |
+
)
|
59 |
+
answer: str = synalinks.Field(
|
60 |
+
description="The correct answer",
|
61 |
+
)
|
62 |
+
|
63 |
+
return AnswerWithThinking, Query
|
64 |
+
|
65 |
+
|
66 |
+
@app.cell(hide_code=True)
|
67 |
+
def _(mo):
|
68 |
+
mo.md(
|
69 |
+
r"""
|
70 |
+
## Functional API
|
71 |
+
|
72 |
+
You can program your application using 3 different ways, let's start with the
|
73 |
+
Functional way.
|
74 |
+
|
75 |
+
In this case, you start from `Input` and you chain modules calls to specify the
|
76 |
+
programs's structure, and finally, you create your program from inputs and outputs:
|
77 |
+
"""
|
78 |
+
)
|
79 |
+
return
|
80 |
+
|
81 |
+
|
82 |
+
@app.cell
|
83 |
+
async def _(AnswerWithThinking, Query, synalinks):
|
84 |
+
|
85 |
+
language_model = synalinks.LanguageModel(
|
86 |
+
model="openai/gpt-4o-mini",
|
87 |
+
)
|
88 |
+
|
89 |
+
_x0 = synalinks.Input(data_model=Query)
|
90 |
+
_x1 = await synalinks.Generator(
|
91 |
+
data_model=AnswerWithThinking,
|
92 |
+
language_model=language_model,
|
93 |
+
)(_x0)
|
94 |
+
|
95 |
+
program = synalinks.Program(
|
96 |
+
inputs=_x0,
|
97 |
+
outputs=_x1,
|
98 |
+
name="chain_of_thought",
|
99 |
+
description="Usefull to answer in a step by step manner.",
|
100 |
+
)
|
101 |
+
return language_model, program
|
102 |
+
|
103 |
+
|
104 |
+
@app.cell
|
105 |
+
def _(program):
|
106 |
+
# You can print a summary of your program in a table format
|
107 |
+
# which is really usefull to have a quick overview of your application
|
108 |
+
|
109 |
+
program.summary()
|
110 |
+
return
|
111 |
+
|
112 |
+
|
113 |
+
@app.cell
|
114 |
+
def _(mo, program, synalinks):
|
115 |
+
# Or plot your program in a graph format
|
116 |
+
|
117 |
+
synalinks.utils.plot_program(
|
118 |
+
program,
|
119 |
+
show_module_names=True,
|
120 |
+
show_trainable=True,
|
121 |
+
show_schemas=True,
|
122 |
+
)
|
123 |
+
return
|
124 |
+
|
125 |
+
|
126 |
+
@app.cell(hide_code=True)
|
127 |
+
def _(mo):
|
128 |
+
mo.md(
|
129 |
+
r"""
|
130 |
+
## Subclassing the `Program` class
|
131 |
+
|
132 |
+
Now let's try to program it using another method, subclassing the `Program`
|
133 |
+
class.
|
134 |
+
|
135 |
+
In that case, you should define your modules in `__init__()` and you should
|
136 |
+
implement the program's structure in `call()`.
|
137 |
+
"""
|
138 |
+
)
|
139 |
+
return
|
140 |
+
|
141 |
+
|
142 |
+
@app.cell
|
143 |
+
def _(AnswerWithThinking, language_model, synalinks):
|
144 |
+
|
145 |
+
class ChainOfThought(synalinks.Program):
|
146 |
+
"""Usefull to answer in a step by step manner.
|
147 |
+
|
148 |
+
The first line of the docstring is provided as description for the program
|
149 |
+
if not provided in the `super().__init__()`. In a similar way the name is
|
150 |
+
automatically infered based on the class name if not provided.
|
151 |
+
"""
|
152 |
+
|
153 |
+
def __init__(self, language_model=None):
|
154 |
+
super().__init__()
|
155 |
+
self.answer = synalinks.Generator(
|
156 |
+
data_model=AnswerWithThinking, language_model=language_model
|
157 |
+
)
|
158 |
+
|
159 |
+
async def call(self, inputs, training=False):
|
160 |
+
x = await self.answer(inputs)
|
161 |
+
return x
|
162 |
+
|
163 |
+
def get_config(self):
|
164 |
+
config = {
|
165 |
+
"name": self.name,
|
166 |
+
"description": self.description,
|
167 |
+
"trainable": self.trainable,
|
168 |
+
}
|
169 |
+
language_model_config = {
|
170 |
+
"language_model": synalinks.saving.serialize_synalinks_object(
|
171 |
+
self.language_model
|
172 |
+
)
|
173 |
+
}
|
174 |
+
return {**config, **language_model_config}
|
175 |
+
|
176 |
+
@classmethod
|
177 |
+
def from_config(cls, config):
|
178 |
+
language_model = synalinks.saving.deserialize_synalinks_object(
|
179 |
+
config.pop("language_model")
|
180 |
+
)
|
181 |
+
return cls(language_model=language_model, **config)
|
182 |
+
|
183 |
+
program_1 = ChainOfThought(language_model=language_model)
|
184 |
+
return ChainOfThought, program_1
|
185 |
+
|
186 |
+
|
187 |
+
@app.cell
|
188 |
+
def _(program_1):
|
189 |
+
program_1.summary()
|
190 |
+
return
|
191 |
+
|
192 |
+
|
193 |
+
@app.cell(hide_code=True)
|
194 |
+
def _(mo):
|
195 |
+
mo.md(
|
196 |
+
r"""
|
197 |
+
Note that the program isn't actually built, this behavior is intended its
|
198 |
+
means that it can accept any king of input, making the program truly
|
199 |
+
generalizable. Now we can explore the last way of programming as well as
|
200 |
+
illustrate one of the key feature of Synalinks, composability.
|
201 |
+
|
202 |
+
## Using `Sequential` program
|
203 |
+
|
204 |
+
In addition to the other ways of programming, `Sequential` is a special
|
205 |
+
case of programs where the program is purely a stack of single-input,
|
206 |
+
single-output modules.
|
207 |
+
|
208 |
+
In this example, we are going to re-use the `ChainOfThought` program that
|
209 |
+
we defined previously, illustrating the modularity of the framework.
|
210 |
+
"""
|
211 |
+
)
|
212 |
+
return
|
213 |
+
|
214 |
+
|
215 |
+
@app.cell
|
216 |
+
def _(ChainOfThought, Query, language_model, synalinks):
|
217 |
+
program_2 = synalinks.Sequential(
|
218 |
+
[
|
219 |
+
synalinks.Input(data_model=Query),
|
220 |
+
ChainOfThought(language_model=language_model),
|
221 |
+
],
|
222 |
+
name="chain_of_thought",
|
223 |
+
description="Usefull to answer in a step by step manner.",
|
224 |
+
)
|
225 |
+
program_2.summary()
|
226 |
+
return (program_2,)
|
227 |
+
|
228 |
+
|
229 |
+
@app.cell(hide_code=True)
|
230 |
+
def _(mo):
|
231 |
+
mo.md(
|
232 |
+
r"""
|
233 |
+
## Running your programs
|
234 |
+
|
235 |
+
In order to run your program, you just have to call it with the input data model
|
236 |
+
as argument.
|
237 |
+
"""
|
238 |
+
)
|
239 |
+
return
|
240 |
+
|
241 |
+
|
242 |
+
@app.cell(hide_code=True)
|
243 |
+
def _(mo):
|
244 |
+
openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
|
245 |
+
openai_api_key
|
246 |
+
return
|
247 |
+
|
248 |
+
|
249 |
+
@app.cell(hide_code=True)
|
250 |
+
def _(mo, openai_api_key):
|
251 |
+
import os
|
252 |
+
mo.stop(not openai_api_key.value)
|
253 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key.value
|
254 |
+
return
|
255 |
+
|
256 |
+
|
257 |
+
@app.cell(hide_code=True)
|
258 |
+
def _(mo):
|
259 |
+
run_button = mo.ui.run_button(label="Run program")
|
260 |
+
run_button.center()
|
261 |
+
return run_button
|
262 |
+
|
263 |
+
|
264 |
+
@app.cell
|
265 |
+
async def _(Query, program_2):
|
266 |
+
mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
|
267 |
+
mo.stop(not run_button.value, mo.md("Click on the run button above"))
|
268 |
+
|
269 |
+
result = await program_2(
|
270 |
+
Query(query="What are the key aspects of human cognition?"),
|
271 |
+
)
|
272 |
+
|
273 |
+
print(result.pretty_json())
|
274 |
+
|
275 |
+
return (result,)
|
276 |
+
|
277 |
+
|
278 |
+
@app.cell(hide_code=True)
|
279 |
+
def _(mo):
|
280 |
+
mo.md(
|
281 |
+
r"""
|
282 |
+
## Conclusion
|
283 |
+
|
284 |
+
Congratulations! You've successfully explored the fundamental concepts of programming
|
285 |
+
applications using Synalinks. By understanding and implementing the Functional API,
|
286 |
+
subclassing the `Program` class, and using `Sequential` programs, you've gained a
|
287 |
+
solid foundation in creating modular and composable applications.
|
288 |
+
|
289 |
+
Now that we know how to program applications, you can learn how to control
|
290 |
+
the data flow in the next notebook.
|
291 |
+
|
292 |
+
### Key Takeaways
|
293 |
+
|
294 |
+
- **Functional API**: Allows you to chain modules to define the program's structure,
|
295 |
+
providing a clear and intuitive way to build applications.
|
296 |
+
- **Subclassing**: Offers flexibility and control by defining modules and implementing
|
297 |
+
the program's structure from scratch within a class.
|
298 |
+
- **Sequential Programs**: Simplifies the creation of linear workflows, making it easy
|
299 |
+
to stack single-input, single-output modules.
|
300 |
+
- **Modularity and Composability**: Enables the reuse of components, fostering creativity
|
301 |
+
and efficiency in application development.
|
302 |
+
"""
|
303 |
+
)
|
304 |
+
return
|
305 |
+
|
306 |
+
|
307 |
+
if __name__ == "__main__":
|
308 |
+
app.run()
|
code_examples/1_basics/3_control_flow.py
ADDED
@@ -0,0 +1,487 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# Control Flow
|
21 |
+
|
22 |
+
Controlling the flow of information in a program is an essential feature of any LM framework.
|
23 |
+
In Synalinks, we implemented it in circuit-like fashion, where the flow of information can be
|
24 |
+
conditionaly or logically restricted to only flow in a subset of a computation graph.
|
25 |
+
|
26 |
+
## Parallel Branches
|
27 |
+
|
28 |
+
To create parallel branches, all you need to do is using the same inputs when declaring the modules.
|
29 |
+
Then Synalinks will automatically detect them and run them in parrallel with asyncio.
|
30 |
+
"""
|
31 |
+
)
|
32 |
+
return
|
33 |
+
|
34 |
+
|
35 |
+
@app.cell
|
36 |
+
async def _(synalinks):
|
37 |
+
class Query(synalinks.DataModel):
|
38 |
+
query: str = synalinks.Field(
|
39 |
+
description="The user query",
|
40 |
+
)
|
41 |
+
|
42 |
+
class AnswerWithThinking(synalinks.DataModel):
|
43 |
+
thinking: str = synalinks.Field(
|
44 |
+
description="Your step by step thinking process",
|
45 |
+
)
|
46 |
+
answer: str = synalinks.Field(
|
47 |
+
description="The correct answer",
|
48 |
+
)
|
49 |
+
|
50 |
+
language_model = synalinks.LanguageModel(model="ollama_chat/deepseek-r1")
|
51 |
+
_x0 = synalinks.Input(data_model=Query)
|
52 |
+
_x1 = await synalinks.Generator(
|
53 |
+
data_model=AnswerWithThinking,
|
54 |
+
language_model=language_model,
|
55 |
+
)(_x0)
|
56 |
+
_x2 = await synalinks.Generator(
|
57 |
+
data_model=AnswerWithThinking,
|
58 |
+
language_model=language_model,
|
59 |
+
)(_x0)
|
60 |
+
|
61 |
+
program = synalinks.Program(
|
62 |
+
inputs=_x0,
|
63 |
+
outputs=[_x1, _x2],
|
64 |
+
name="parallel_branches",
|
65 |
+
description="Illustrate the use of parallel branching",
|
66 |
+
)
|
67 |
+
return AnswerWithThinking, Query, language_model, program, synalinks
|
68 |
+
|
69 |
+
|
70 |
+
@app.cell
|
71 |
+
def _(mo, program, synalinks):
|
72 |
+
synalinks.utils.plot_program(
|
73 |
+
program,
|
74 |
+
show_module_names=True,
|
75 |
+
show_schemas=True,
|
76 |
+
show_trainable=True,
|
77 |
+
)
|
78 |
+
return
|
79 |
+
|
80 |
+
|
81 |
+
@app.cell(hide_code=True)
|
82 |
+
def _(mo):
|
83 |
+
mo.md(
|
84 |
+
r"""
|
85 |
+
## Decisions
|
86 |
+
|
87 |
+
Decisions in Synalinks can be viewed as a single label classification, they allow
|
88 |
+
the system to classify the inputs based on a question and labels to choose from.
|
89 |
+
The labels are used to create on the fly a Enum schema that ensure, thanks to
|
90 |
+
constrained structured output, that the system will answer one of the provided labels.
|
91 |
+
"""
|
92 |
+
)
|
93 |
+
return
|
94 |
+
|
95 |
+
|
96 |
+
@app.cell
|
97 |
+
async def _(Query, language_model, synalinks):
|
98 |
+
_x0 = synalinks.Input(data_model=Query)
|
99 |
+
_x1 = await synalinks.Decision(
|
100 |
+
question="Evaluate the difficulty to answer the provided query",
|
101 |
+
labels=["easy", "difficult"],
|
102 |
+
language_model=language_model,
|
103 |
+
)(_x0)
|
104 |
+
|
105 |
+
program_1 = synalinks.Program(
|
106 |
+
inputs=_x0,
|
107 |
+
outputs=_x1,
|
108 |
+
name="decision_making",
|
109 |
+
description="Illustrate the decision making process",
|
110 |
+
)
|
111 |
+
return (program_1,)
|
112 |
+
|
113 |
+
|
114 |
+
@app.cell
|
115 |
+
def _(mo, program_1, synalinks):
|
116 |
+
synalinks.utils.plot_program(
|
117 |
+
program_1,
|
118 |
+
show_module_names=True,
|
119 |
+
show_schemas=True,
|
120 |
+
show_trainable=True,
|
121 |
+
)
|
122 |
+
return
|
123 |
+
|
124 |
+
|
125 |
+
@app.cell(hide_code=True)
|
126 |
+
def _(mo):
|
127 |
+
mo.md(
|
128 |
+
r"""
|
129 |
+
## Conditional Branches
|
130 |
+
|
131 |
+
To make conditional branches, we will need the help of a core module: The Branch
|
132 |
+
module. This module use a decision and route the input data model to the selected
|
133 |
+
branch. When a branch is not selected, that branch output a None.
|
134 |
+
"""
|
135 |
+
)
|
136 |
+
return
|
137 |
+
|
138 |
+
|
139 |
+
@app.cell
|
140 |
+
async def _(AnswerWithThinking, Query, language_model, synalinks):
|
141 |
+
class Answer(synalinks.DataModel):
|
142 |
+
answer: str = synalinks.Field(
|
143 |
+
description="The correct answer",
|
144 |
+
)
|
145 |
+
|
146 |
+
_x0 = synalinks.Input(data_model=Query)
|
147 |
+
(_x1, _x2) = await synalinks.Branch(
|
148 |
+
question="Evaluate the difficulty to answer the provided query",
|
149 |
+
labels=["easy", "difficult"],
|
150 |
+
branches=[
|
151 |
+
synalinks.Generator(
|
152 |
+
data_model=Answer,
|
153 |
+
language_model=language_model,
|
154 |
+
),
|
155 |
+
synalinks.Generator(
|
156 |
+
data_model=AnswerWithThinking,
|
157 |
+
language_model=language_model,
|
158 |
+
),
|
159 |
+
],
|
160 |
+
)(_x0)
|
161 |
+
|
162 |
+
program_2 = synalinks.Program(
|
163 |
+
inputs=_x0,
|
164 |
+
outputs=[_x1, _x2],
|
165 |
+
name="conditional_branches",
|
166 |
+
description="Illustrate the conditional branches",
|
167 |
+
)
|
168 |
+
return Answer, program_2
|
169 |
+
|
170 |
+
|
171 |
+
@app.cell
|
172 |
+
def _(mo, program_2, synalinks):
|
173 |
+
synalinks.utils.plot_program(
|
174 |
+
program_2,
|
175 |
+
show_module_names=True,
|
176 |
+
show_schemas=True,
|
177 |
+
show_trainable=True,
|
178 |
+
)
|
179 |
+
return
|
180 |
+
|
181 |
+
|
182 |
+
@app.cell(hide_code=True)
|
183 |
+
def _(mo):
|
184 |
+
mo.md(
|
185 |
+
r"""
|
186 |
+
## Data Models Operators
|
187 |
+
|
188 |
+
Synalinks implement few operators that works with data models, some of them are
|
189 |
+
straightforward, like the concatenation, implemented in the Python `+` operator.
|
190 |
+
But others like the `logical_and` and `logical_or` implemented respectively
|
191 |
+
in the `&` and `|` operator are more difficult to grasp at first. As explained
|
192 |
+
above, in the conditional branches, the branch not selected will have a None
|
193 |
+
as output. To account that fact and to implement logical flows, we need operators
|
194 |
+
that can work with them. See the [Ops API](https://synalinks.github.io/synalinks/Synalinks%20API/Ops%20API/)
|
195 |
+
section for an extensive list of all data model operations.
|
196 |
+
|
197 |
+
### Concatenation
|
198 |
+
|
199 |
+
The concatenation, consist in creating a data model that have the fields of both
|
200 |
+
inputs. When one of the input is `None`, it raise an exception. Note that you can
|
201 |
+
use the concatenation, like any other operator, at a meta-class level, meaning
|
202 |
+
you can actually concatenate data model types.
|
203 |
+
|
204 |
+
### Concatenation Table
|
205 |
+
"""
|
206 |
+
)
|
207 |
+
return
|
208 |
+
|
209 |
+
|
210 |
+
@app.cell(hide_code=True)
|
211 |
+
def _(mo):
|
212 |
+
mo.md(
|
213 |
+
r"""
|
214 |
+
| `x1` | `x2` | Concat (`+`) |
|
215 |
+
| ------ | ------ | ----------------- |
|
216 |
+
| `x1` | `x2` | `x1 + x2` |
|
217 |
+
| `x1` | `None` | `Exception` |
|
218 |
+
| `None` | `x2` | `Exception` |
|
219 |
+
| `None` | `None` | `Exception` |
|
220 |
+
"""
|
221 |
+
).center()
|
222 |
+
return
|
223 |
+
|
224 |
+
|
225 |
+
@app.cell(hide_code=True)
|
226 |
+
def _(mo):
|
227 |
+
mo.md(
|
228 |
+
r"""
|
229 |
+
### Concatenation Example
|
230 |
+
"""
|
231 |
+
)
|
232 |
+
return
|
233 |
+
|
234 |
+
|
235 |
+
@app.cell
|
236 |
+
async def _(AnswerWithThinking, Query, language_model, synalinks):
|
237 |
+
_x0 = synalinks.Input(data_model=Query)
|
238 |
+
_x1 = await synalinks.Generator(
|
239 |
+
data_model=AnswerWithThinking,
|
240 |
+
language_model=language_model,
|
241 |
+
)(_x0)
|
242 |
+
_x2 = await synalinks.Generator(
|
243 |
+
data_model=AnswerWithThinking,
|
244 |
+
language_model=language_model,
|
245 |
+
)(_x0)
|
246 |
+
_x3 = _x1 + _x2
|
247 |
+
|
248 |
+
program_3 = synalinks.Program(
|
249 |
+
inputs=_x0,
|
250 |
+
outputs=_x3,
|
251 |
+
name="concatenation",
|
252 |
+
description="Illustrate the use of concatenate",
|
253 |
+
)
|
254 |
+
return (program_3,)
|
255 |
+
|
256 |
+
|
257 |
+
@app.cell
|
258 |
+
def _(mo, program_3, synalinks):
|
259 |
+
synalinks.utils.plot_program(
|
260 |
+
program_3,
|
261 |
+
show_module_names=True,
|
262 |
+
show_schemas=True,
|
263 |
+
show_trainable=True,
|
264 |
+
)
|
265 |
+
return
|
266 |
+
|
267 |
+
|
268 |
+
@app.cell(hide_code=True)
|
269 |
+
def _(mo):
|
270 |
+
mo.md(
|
271 |
+
r"""
|
272 |
+
### Logical And
|
273 |
+
|
274 |
+
The `logical_and` is a concatenation that instead of raising an `Exception`,
|
275 |
+
output a `None`. This operator should be used, when you have to concatenate
|
276 |
+
a data model with an another one that can be `None`, like a `Branch` output.
|
277 |
+
|
278 |
+
### Logical And Table
|
279 |
+
"""
|
280 |
+
)
|
281 |
+
return
|
282 |
+
|
283 |
+
@app.cell(hide_code=True)
|
284 |
+
def _(mo):
|
285 |
+
mo.md(
|
286 |
+
r"""
|
287 |
+
| `x1` | `x2` | Logical And (`&`) |
|
288 |
+
| ------ | ------ | ----------------- |
|
289 |
+
| `x1` | `x2` | `x1 + x2` |
|
290 |
+
| `x1` | `None` | `None` |
|
291 |
+
| `None` | `x2` | `None` |
|
292 |
+
| `None` | `None` | `None` |
|
293 |
+
"""
|
294 |
+
).center()
|
295 |
+
return
|
296 |
+
|
297 |
+
|
298 |
+
@app.cell(hide_code=True)
|
299 |
+
def _(mo):
|
300 |
+
mo.md(
|
301 |
+
r"""
|
302 |
+
### Logical And Example
|
303 |
+
"""
|
304 |
+
)
|
305 |
+
return
|
306 |
+
|
307 |
+
@app.cell
|
308 |
+
async def _(Answer, AnswerWithThinking, Query, language_model, synalinks):
|
309 |
+
class Critique(synalinks.DataModel):
|
310 |
+
critique: str = synalinks.Field(
|
311 |
+
description="The critique of the answer",
|
312 |
+
)
|
313 |
+
|
314 |
+
_x0 = synalinks.Input(data_model=Query)
|
315 |
+
(_x1, _x2) = await synalinks.Branch(
|
316 |
+
question="Evaluate the difficulty to answer the provided query",
|
317 |
+
labels=["easy", "difficult"],
|
318 |
+
branches=[
|
319 |
+
synalinks.Generator(
|
320 |
+
data_model=Answer,
|
321 |
+
language_model=language_model,
|
322 |
+
),
|
323 |
+
synalinks.Generator(
|
324 |
+
data_model=AnswerWithThinking,
|
325 |
+
language_model=language_model,
|
326 |
+
),
|
327 |
+
],
|
328 |
+
return_decision=False,
|
329 |
+
)(_x0)
|
330 |
+
_x3 = _x0 & _x1
|
331 |
+
_x4 = _x0 & _x2
|
332 |
+
_x5 = await synalinks.Generator(
|
333 |
+
data_model=Critique,
|
334 |
+
language_model=language_model,
|
335 |
+
return_inputs=True,
|
336 |
+
)(_x3)
|
337 |
+
_x6 = await synalinks.Generator(
|
338 |
+
data_model=Critique,
|
339 |
+
language_model=language_model,
|
340 |
+
return_inputs=True,
|
341 |
+
)(_x4)
|
342 |
+
_x7 = _x5 | _x6
|
343 |
+
_x8 = await synalinks.Generator(
|
344 |
+
data_model=Answer,
|
345 |
+
language_model=language_model,
|
346 |
+
)(_x7)
|
347 |
+
|
348 |
+
program_4 = synalinks.Program(
|
349 |
+
inputs=_x0,
|
350 |
+
outputs=_x8,
|
351 |
+
name="logical_and",
|
352 |
+
description="Illustrate the use of logical and",
|
353 |
+
)
|
354 |
+
return Critique, program_4
|
355 |
+
|
356 |
+
|
357 |
+
@app.cell
|
358 |
+
def _(mo, program_4, synalinks):
|
359 |
+
synalinks.utils.plot_program(
|
360 |
+
program_4,
|
361 |
+
show_module_names=True,
|
362 |
+
show_schemas=True,
|
363 |
+
show_trainable=True,
|
364 |
+
)
|
365 |
+
return
|
366 |
+
|
367 |
+
|
368 |
+
@app.cell(hide_code=True)
|
369 |
+
def _(mo):
|
370 |
+
mo.md(
|
371 |
+
r"""
|
372 |
+
### Logical Or
|
373 |
+
|
374 |
+
The `logical_or` is used when you want to combine two data models, but you can
|
375 |
+
accomodate that one of them is `None`. Another use, is to gather the outputs of
|
376 |
+
a `Branch`, as only one branch is active, it allows you merge the branches outputs
|
377 |
+
into a unique data model.
|
378 |
+
|
379 |
+
|
380 |
+
### Logical Or Table
|
381 |
+
"""
|
382 |
+
)
|
383 |
+
return
|
384 |
+
|
385 |
+
|
386 |
+
@app.cell(hide_code=True)
|
387 |
+
def _(mo):
|
388 |
+
mo.md(
|
389 |
+
r"""
|
390 |
+
| `x1` | `x2` | Logical Or (`|`) |
|
391 |
+
| ------ | ------ | ---------------- |
|
392 |
+
| `x1` | `x2` | `x1 + x2` |
|
393 |
+
| `x1` | `None` | `x1` |
|
394 |
+
| `None` | `x2` | `x2` |
|
395 |
+
| `None` | `None` | `None` |
|
396 |
+
"""
|
397 |
+
).center()
|
398 |
+
return
|
399 |
+
|
400 |
+
|
401 |
+
@app.cell(hide_code=True)
|
402 |
+
def _(mo):
|
403 |
+
mo.md(
|
404 |
+
r"""
|
405 |
+
### Logical Or Example
|
406 |
+
"""
|
407 |
+
)
|
408 |
+
return
|
409 |
+
|
410 |
+
|
411 |
+
@app.cell
|
412 |
+
async def _(Answer, AnswerWithThinking, Query, language_model, synalinks):
|
413 |
+
_x0 = synalinks.Input(data_model=Query)
|
414 |
+
(_x1, _x2) = await synalinks.Branch(
|
415 |
+
question="Evaluate the difficulty to answer the provided query",
|
416 |
+
labels=["easy", "difficult"],
|
417 |
+
branches=[
|
418 |
+
synalinks.Generator(
|
419 |
+
data_model=Answer,
|
420 |
+
language_model=language_model,
|
421 |
+
),
|
422 |
+
synalinks.Generator(
|
423 |
+
data_model=AnswerWithThinking, language_model=language_model
|
424 |
+
),
|
425 |
+
],
|
426 |
+
return_decision=False,
|
427 |
+
)(_x0)
|
428 |
+
_x3 = _x1 | _x2
|
429 |
+
|
430 |
+
program_5 = synalinks.Program(
|
431 |
+
inputs=_x0,
|
432 |
+
outputs=_x3,
|
433 |
+
name="logical_or",
|
434 |
+
description="Illustrate the use of logical or",
|
435 |
+
)
|
436 |
+
return (program_5,)
|
437 |
+
|
438 |
+
|
439 |
+
@app.cell
|
440 |
+
def _(mo, program_5, synalinks):
|
441 |
+
synalinks.utils.plot_program(
|
442 |
+
program_5,
|
443 |
+
show_module_names=True,
|
444 |
+
show_schemas=True,
|
445 |
+
show_trainable=True,
|
446 |
+
)
|
447 |
+
return
|
448 |
+
|
449 |
+
|
450 |
+
@app.cell(hide_code=True)
|
451 |
+
async def _(mo):
|
452 |
+
mo.md(
|
453 |
+
r"""
|
454 |
+
## Conclusion
|
455 |
+
|
456 |
+
In this notebook, we explored the fundamental concepts of controlling information
|
457 |
+
flow within Synalinks programs. We intoduced the creation of parallel branches,
|
458 |
+
decision-making processes, and conditional branching, all of which are essential
|
459 |
+
for building dynamic and robust applications.
|
460 |
+
|
461 |
+
### Key Takeaways
|
462 |
+
|
463 |
+
- **Parallel Branches**: We demonstrated how to run modules in parallel using
|
464 |
+
the same inputs, leveraging asyncio for concurrent execution.
|
465 |
+
This approach enhances performance and allows for simultaneous processing of tasks.
|
466 |
+
|
467 |
+
- **Decision-Making**: We introduced decision-making as a form of single-label
|
468 |
+
classification, enabling the system to classify inputs based on predefined
|
469 |
+
questions and labels. This ensures that the system's responses are structured
|
470 |
+
and adhere to the specified schemas.
|
471 |
+
|
472 |
+
- **Conditional Branching**: We explored the use of the Branch module to route
|
473 |
+
input data models based on decisions, allowing for conditional execution of
|
474 |
+
branches. This feature is essential for creating adaptive and context-aware
|
475 |
+
applications.
|
476 |
+
|
477 |
+
- **Data Model Operators**: We discussed various data model operators, such as
|
478 |
+
concatenation, logical AND, and logical OR. These operators enable
|
479 |
+
sophisticated data manipulation and flow control, ensuring robust program
|
480 |
+
execution even when branches output None.
|
481 |
+
"""
|
482 |
+
)
|
483 |
+
return
|
484 |
+
|
485 |
+
|
486 |
+
if __name__ == "__main__":
|
487 |
+
app.run()
|
code_examples/1_basics/4_conversational_applications.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# Conversational Applications
|
21 |
+
|
22 |
+
Synalinks is designed to handle conversational applications as well as
|
23 |
+
query-based systems. In the case of a conversational applications, the
|
24 |
+
input data model is a list of chat messages, and the output an individual
|
25 |
+
chat message. The `Program` is in that case responsible of handling a
|
26 |
+
**single conversation turn**.
|
27 |
+
"""
|
28 |
+
)
|
29 |
+
return
|
30 |
+
|
31 |
+
|
32 |
+
@app.cell(hide_code=True)
|
33 |
+
def _(mo):
|
34 |
+
mo.md(
|
35 |
+
r"""
|
36 |
+
Now we can program our application like you would do with any `Program`. For this example,
|
37 |
+
we are going to make a very simple chatbot.
|
38 |
+
|
39 |
+
By default, if no data_model/schema is provided to the `Generator` it will output a `ChatMessage` like output.
|
40 |
+
If the data model is `None`, then you can enable streaming.
|
41 |
+
|
42 |
+
**Note:** Streaming is disabled during training and should only be used in the **last** `Generator` of your pipeline.
|
43 |
+
"""
|
44 |
+
)
|
45 |
+
return
|
46 |
+
|
47 |
+
|
48 |
+
@app.cell
|
49 |
+
async def _(synalinks):
|
50 |
+
from synalinks.backend import ChatMessage
|
51 |
+
from synalinks.backend import ChatRole
|
52 |
+
from synalinks.backend import ChatMessages
|
53 |
+
|
54 |
+
language_model = synalinks.LanguageModel(
|
55 |
+
model="openai/gpt-4o-mini",
|
56 |
+
)
|
57 |
+
|
58 |
+
_x0 = synalinks.Input(data_model=ChatMessages)
|
59 |
+
_x1 = await synalinks.Generator(
|
60 |
+
language_model=language_model,
|
61 |
+
prompt_template=synalinks.chat_prompt_template(),
|
62 |
+
streaming=False, # Marimo chat don't handle streaming yet
|
63 |
+
)(_x0)
|
64 |
+
|
65 |
+
program = synalinks.Program(
|
66 |
+
inputs=_x0,
|
67 |
+
outputs=_x1,
|
68 |
+
)
|
69 |
+
|
70 |
+
# Let's plot this program to understand it
|
71 |
+
|
72 |
+
synalinks.utils.plot_program(
|
73 |
+
program,
|
74 |
+
show_module_names=True,
|
75 |
+
show_trainable=True,
|
76 |
+
show_schemas=True,
|
77 |
+
)
|
78 |
+
return ChatMessage, ChatMessages, ChatRole, language_model, program
|
79 |
+
|
80 |
+
|
81 |
+
@app.cell(hide_code=True)
|
82 |
+
def _(mo):
|
83 |
+
mo.md(
|
84 |
+
r"""
|
85 |
+
## Running the chatbot inside the notebook
|
86 |
+
|
87 |
+
In this example, we will show you how to run the conversational application inside this reactive notebook.
|
88 |
+
"""
|
89 |
+
)
|
90 |
+
return
|
91 |
+
|
92 |
+
|
93 |
+
@app.cell(hide_code=True)
|
94 |
+
def _(mo):
|
95 |
+
openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
|
96 |
+
openai_api_key
|
97 |
+
return
|
98 |
+
|
99 |
+
|
100 |
+
@app.cell(hide_code=True)
|
101 |
+
def _(mo, openai_api_key):
|
102 |
+
import os
|
103 |
+
mo.stop(not openai_api_key.value)
|
104 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key.value
|
105 |
+
return
|
106 |
+
|
107 |
+
|
108 |
+
@app.cell(hide_code=True)
|
109 |
+
def _(ChatMessage, ChatMessages, ChatRole, mo, program):
|
110 |
+
mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
|
111 |
+
|
112 |
+
def cleanup_assistant_message(msg):
|
113 |
+
start_tok = '<span class="paragraph">'
|
114 |
+
end_tok = "</span>"
|
115 |
+
if msg.content.find(start_tok) > 0:
|
116 |
+
msg.content = msg.content[msg.content.find(start_tok) + len(start_tok) :]
|
117 |
+
if msg.content.find(end_tok, 1) > 0:
|
118 |
+
msg.content = msg.content[: msg.content.find(end_tok, 1)]
|
119 |
+
return msg
|
120 |
+
|
121 |
+
async def synalinks_program(messages, config):
|
122 |
+
chat_history = ChatMessages()
|
123 |
+
for msg in messages:
|
124 |
+
if msg.role == "user":
|
125 |
+
chat_history.messages.append(
|
126 |
+
ChatMessage(
|
127 |
+
role=ChatRole.USER,
|
128 |
+
content=msg.content,
|
129 |
+
)
|
130 |
+
)
|
131 |
+
else:
|
132 |
+
msg = cleanup_assistant_message(msg)
|
133 |
+
chat_history.messages.append(
|
134 |
+
ChatMessage(
|
135 |
+
role=ChatRole.ASSISTANT,
|
136 |
+
content=msg.content,
|
137 |
+
)
|
138 |
+
)
|
139 |
+
result = await program(chat_history)
|
140 |
+
return result.get("content")
|
141 |
+
|
142 |
+
chat = mo.ui.chat(synalinks_program)
|
143 |
+
chat
|
144 |
+
return chat, cleanup_assistant_message, synalinks_program
|
145 |
+
|
146 |
+
|
147 |
+
@app.cell(hide_code=True)
|
148 |
+
async def _(mo):
|
149 |
+
mo.md(
|
150 |
+
r"""
|
151 |
+
## Conclusion
|
152 |
+
|
153 |
+
In this notebook, we explored how Synalinks handle conversational applications.
|
154 |
+
You have now a solid understanding to create chatbots and conversational agents.
|
155 |
+
|
156 |
+
### Key Takeaways
|
157 |
+
|
158 |
+
- **Conversational Flow Management**: Synalinks effectively manages conversational
|
159 |
+
applications by handling inputs as a list of chat messages and generating
|
160 |
+
individual chat messages as outputs. This structure allows for efficient
|
161 |
+
processing of conversation turns.
|
162 |
+
|
163 |
+
- **Streaming and Real-Time Interaction**: Synalinks supports streaming for
|
164 |
+
real-time interactions, enhancing user engagement. However, streaming is
|
165 |
+
disabled during training and should be used only in the final `Generator`.
|
166 |
+
|
167 |
+
- **Customizable Prompt Templates**: The prompt templates can be tailored to fit
|
168 |
+
conversational contexts, guiding the language model to produce coherent and
|
169 |
+
relevant responses.
|
170 |
+
"""
|
171 |
+
)
|
172 |
+
return
|
173 |
+
|
174 |
+
|
175 |
+
if __name__ == "__main__":
|
176 |
+
app.run()
|
code_examples/1_basics/5_rewards_metrics_and_optimizers.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# Rewards, Metrics & Optimizers
|
21 |
+
|
22 |
+
## Understanding Rewards
|
23 |
+
|
24 |
+
`Reward`s are an essential part of reinforcement learning frameworks.
|
25 |
+
They are typically float values (usually between 0.0 and 1.0, but they can be
|
26 |
+
negative also) that guide the process into making more efficient decisions or
|
27 |
+
predictions. During training, the goal is to maximize the reward function.
|
28 |
+
The reward gives the system an indication of how well it performed for that task.
|
29 |
+
"""
|
30 |
+
)
|
31 |
+
return
|
32 |
+
|
33 |
+
|
34 |
+
@app.cell(hide_code=True)
|
35 |
+
def _(mo):
|
36 |
+
mo.mermaid(
|
37 |
+
r"""
|
38 |
+
graph LR
|
39 |
+
A[Training Data] -->|Provide x:DataModel| B[Program];
|
40 |
+
B -->|Generate y_pred:JsonDataModel| C[Reward];
|
41 |
+
A -->|Provide y_true:DataModel| C;
|
42 |
+
C -->|Compute reward:Float| D[Optimizer];
|
43 |
+
D -->|Update trainable_variable:Variable| B;
|
44 |
+
"""
|
45 |
+
)
|
46 |
+
return
|
47 |
+
|
48 |
+
|
49 |
+
@app.cell(hide_code=True)
|
50 |
+
def _(mo):
|
51 |
+
mo.md(
|
52 |
+
r"""
|
53 |
+
This reinforcement loop is what makes possible for the system to learn by
|
54 |
+
repeatedly making predictions and refining its knowledge/methodology in order
|
55 |
+
to maximize the reward.
|
56 |
+
|
57 |
+
All rewards consist of a function or program that takes two inputs:
|
58 |
+
|
59 |
+
- `y_pred`: The prediction of the program.
|
60 |
+
- `y_true`: The ground truth/target value provided by the training data.
|
61 |
+
|
62 |
+
In Synalinks, we provide for several built-in rewards but it is also possible to
|
63 |
+
easily create new rewards if you needs to. Overall the choice will depend on the
|
64 |
+
task to perform. You can have a look at the rewards provided in the
|
65 |
+
[API section](https://synalinks.github.io/synalinks/Synalinks%20API/Rewards/).
|
66 |
+
|
67 |
+
### Understanding Metrics
|
68 |
+
|
69 |
+
`Metric`s are scalar values that are monitored during training and evaluation.
|
70 |
+
These values are used to know which program is best, in order to save it. Or to
|
71 |
+
provide additional information to compare different architectures with each others.
|
72 |
+
Unlike `Reward`s, a `Metric` is not used during training, meaning the metric value
|
73 |
+
is not backpropagated. Additionaly every reward function can be used as metric.
|
74 |
+
You can have a look at the metrics provided in the
|
75 |
+
[API section](https://synalinks.github.io/synalinks/Synalinks%20API/Metrics/).
|
76 |
+
|
77 |
+
### Predictions Filtering
|
78 |
+
|
79 |
+
Sometimes, your program have to output a complex JSON but you want to evaluate
|
80 |
+
just part of it. This could be because your training data only include a subset
|
81 |
+
of the JSON, or because the additonal fields were added only to help the LMs.
|
82 |
+
In that case, you have to filter out or filter in your predictions and ground
|
83 |
+
truth. Meaning that you want to remove or keep respectively only specific fields
|
84 |
+
of your JSON data. This can be achieved by adding a `out_mask` or `in_mask` list
|
85 |
+
parameter containing the keys to remove or keep for evaluation. This parameters
|
86 |
+
can be added to both reward and metrics. Like in the above example where we only
|
87 |
+
keep the field `answer` to compute the rewards and metrics.
|
88 |
+
|
89 |
+
### Understanding Optimizers
|
90 |
+
|
91 |
+
Optimizers are systems that handle the update of the module's state in order to
|
92 |
+
make them more performant. They are in charge of backpropagating the rewards
|
93 |
+
from the training process and select or generate examples and hints for the LMs.
|
94 |
+
|
95 |
+
Here is an example of program compilation, which is how you configure the reward,
|
96 |
+
metrics, and optimizer:
|
97 |
+
"""
|
98 |
+
)
|
99 |
+
return
|
100 |
+
|
101 |
+
|
102 |
+
@app.cell
|
103 |
+
def _(synalinks):
|
104 |
+
|
105 |
+
class Query(synalinks.DataModel):
|
106 |
+
query: str = synalinks.Field(
|
107 |
+
description="The user query",
|
108 |
+
)
|
109 |
+
|
110 |
+
class AnswerWithThinking(synalinks.DataModel):
|
111 |
+
thinking: str = synalinks.Field(
|
112 |
+
description="Your step by step thinking process",
|
113 |
+
)
|
114 |
+
answer: str = synalinks.Field(
|
115 |
+
description="The correct answer",
|
116 |
+
)
|
117 |
+
return Query, AnswerWithThinking
|
118 |
+
|
119 |
+
|
120 |
+
@app.cell
|
121 |
+
async def _(synalinks):
|
122 |
+
|
123 |
+
language_model = synalinks.LanguageModel(
|
124 |
+
model="openai/gpt-4o-mini",
|
125 |
+
)
|
126 |
+
|
127 |
+
_x0 = synalinks.Input(data_model=Query)
|
128 |
+
_x1 = await synalinks.Generator(
|
129 |
+
data_model=AnswerWithThinking,
|
130 |
+
language_model=language_model,
|
131 |
+
)(_x0)
|
132 |
+
|
133 |
+
program = synalinks.Program(
|
134 |
+
inputs=_x0,
|
135 |
+
outputs=_x1,
|
136 |
+
name="chain_of_thought",
|
137 |
+
description="Usefull to answer in a step by step manner.",
|
138 |
+
)
|
139 |
+
|
140 |
+
program.compile(
|
141 |
+
reward=synalinks.rewards.CosineSimilarity(in_mask=["answer"]),
|
142 |
+
optimizer=synalinks.optimizers.RandomFewShot(),
|
143 |
+
metrics=[
|
144 |
+
synalinks.metrics.F1Score(in_mask=["answer"]),
|
145 |
+
],
|
146 |
+
)
|
147 |
+
return program
|
148 |
+
|
149 |
+
|
150 |
+
@app.cell(hide_code=True)
|
151 |
+
async def _(mo):
|
152 |
+
mo.md(
|
153 |
+
r"""
|
154 |
+
## Conclusion
|
155 |
+
|
156 |
+
In this notebook, we explored the fundamental concepts of training and
|
157 |
+
optimizing Synalinks programs using rewards, metrics, and optimizers.
|
158 |
+
These components are crucial for building efficient and adaptive language
|
159 |
+
model applications.
|
160 |
+
|
161 |
+
### Key Takeaways
|
162 |
+
|
163 |
+
- **Rewards**: `Reward`s guide the reinforcement learning process by
|
164 |
+
providing feedback on the system's performance. They are typically
|
165 |
+
float values that indicate how well the system performed a task,
|
166 |
+
with the goal of maximizing the reward function during training.
|
167 |
+
Synalinks offers built-in rewards and allows for custom reward
|
168 |
+
functions to suit specific tasks.
|
169 |
+
|
170 |
+
- **Metrics**: `Metric`s are scalar values monitored during training
|
171 |
+
and evaluation to determine the best-performing program. Unlike
|
172 |
+
rewards, metrics are not used for backpropagation. They provide
|
173 |
+
additional insights for comparing different architectures and
|
174 |
+
saving the optimal model.
|
175 |
+
|
176 |
+
- **Optimizers**: `Optimizer`s update the module's state to improve
|
177 |
+
performance. They handle the backpropagation of rewards and
|
178 |
+
select or generate examples and hints for the language models.
|
179 |
+
Proper configuration of optimizers is essential for effective
|
180 |
+
training.
|
181 |
+
|
182 |
+
- **Filtering Outputs**: When dealing with complex JSON outputs,
|
183 |
+
filtering predictions and ground truths using `out_mask` or
|
184 |
+
`in_mask` parameters ensures that only relevant fields are
|
185 |
+
evaluated. This is particularly useful when the training data
|
186 |
+
includes a subset of the JSON or when additional fields are
|
187 |
+
used to aid the language models.
|
188 |
+
"""
|
189 |
+
)
|
190 |
+
return
|
191 |
+
|
192 |
+
|
193 |
+
if __name__ == "__main__":
|
194 |
+
app.run()
|
code_examples/1_basics/6_training_programs.py
ADDED
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# Training Programs
|
21 |
+
|
22 |
+
Like in machine learning, a LM application needs to be trained. In that case, we
|
23 |
+
don't update the weights of the model, but optimize the prompts by automatically
|
24 |
+
picking the best examples or generate hints in order to help the program to
|
25 |
+
perform better on your dataset.
|
26 |
+
|
27 |
+
For this lesson we are going to work on GSM8k a well known dataset of grade school
|
28 |
+
math word problems. Nowedays, most (all?) public datasets have been leaked, meaning
|
29 |
+
that their test set have been included in the LM trainset. This basically means
|
30 |
+
that the baseline score won't give you much information about the reasoning abilities
|
31 |
+
of the underlying language model (but more about its capability to remember),
|
32 |
+
however it is still interesing to have it as a baseline to evaluate the progress
|
33 |
+
of the programs training and the neuro-symbolic methods used or if you use small
|
34 |
+
models like here.
|
35 |
+
|
36 |
+
First, let's have a look at the dataset.
|
37 |
+
"""
|
38 |
+
)
|
39 |
+
return
|
40 |
+
|
41 |
+
|
42 |
+
@app.cell
|
43 |
+
def _(synalinks):
|
44 |
+
gsm8k_input_data_model = synalinks.datasets.gsm8k.get_input_data_model()
|
45 |
+
print("GSM8K input schema:\n")
|
46 |
+
print(gsm8k_input_data_model.pretty_schema())
|
47 |
+
return (gsm8k_input_data_model,)
|
48 |
+
|
49 |
+
|
50 |
+
@app.cell
|
51 |
+
def _(synalinks):
|
52 |
+
gsm8k_output_data_model = synalinks.datasets.gsm8k.get_output_data_model()
|
53 |
+
print("GSM8K output schema:\n")
|
54 |
+
print(gsm8k_output_data_model.pretty_schema())
|
55 |
+
return (gsm8k_output_data_model,)
|
56 |
+
|
57 |
+
|
58 |
+
@app.cell(hide_code=True)
|
59 |
+
def _(mo):
|
60 |
+
mo.md(
|
61 |
+
r"""
|
62 |
+
## Programming the pipeline
|
63 |
+
|
64 |
+
Now let's make a simple baseline program like in the first lessons
|
65 |
+
For this example we are going to use the data models from GSM8k.
|
66 |
+
"""
|
67 |
+
)
|
68 |
+
return
|
69 |
+
|
70 |
+
|
71 |
+
@app.cell
|
72 |
+
async def _(gsm8k_input_data_model, gsm8k_output_data_model, synalinks):
|
73 |
+
|
74 |
+
language_model = synalinks.LanguageModel(
|
75 |
+
model="openai/gpt-4o-mini",
|
76 |
+
)
|
77 |
+
|
78 |
+
_x0 = synalinks.Input(data_model=gsm8k_input_data_model)
|
79 |
+
_x1 = await synalinks.Generator(
|
80 |
+
data_model=gsm8k_output_data_model,
|
81 |
+
language_model=language_model,
|
82 |
+
)(_x0)
|
83 |
+
|
84 |
+
program = synalinks.Program(
|
85 |
+
inputs=_x0,
|
86 |
+
outputs=_x1,
|
87 |
+
name="chain_of_thought",
|
88 |
+
description="Usefull to answer in a step by step manner.",
|
89 |
+
)
|
90 |
+
return language_model, program
|
91 |
+
|
92 |
+
|
93 |
+
@app.cell(hide_code=True)
|
94 |
+
def _(mo):
|
95 |
+
mo.md(
|
96 |
+
r"""
|
97 |
+
## Compiling the program
|
98 |
+
|
99 |
+
For this example, we are going to select the `RandomFewShot` optimizer.
|
100 |
+
The reward fucntion will be `ExactMatch` masked to match only the numerical answer.
|
101 |
+
While the additional metric will be the `F1Score` masked to process only the LMs thinking.
|
102 |
+
|
103 |
+
This metric will give us an indication to see if the chain of thought match with the dataset one.
|
104 |
+
"""
|
105 |
+
)
|
106 |
+
return
|
107 |
+
|
108 |
+
|
109 |
+
@app.cell
|
110 |
+
def _(program, synalinks):
|
111 |
+
program.compile(
|
112 |
+
optimizer=synalinks.optimizers.RandomFewShot(),
|
113 |
+
reward=synalinks.rewards.ExactMatch(in_mask=["answer"]),
|
114 |
+
metrics=[
|
115 |
+
synalinks.metrics.F1Score(in_mask=["thinking"]),
|
116 |
+
],
|
117 |
+
)
|
118 |
+
return
|
119 |
+
|
120 |
+
|
121 |
+
@app.cell(hide_code=True)
|
122 |
+
def _(mo):
|
123 |
+
mo.md(
|
124 |
+
r"""
|
125 |
+
## Training
|
126 |
+
|
127 |
+
### What do "sample", "batch", and "epoch" mean?
|
128 |
+
|
129 |
+
- **Sample**: A sample is one element of a dataset. For example, one DataModel
|
130 |
+
is one sample.
|
131 |
+
- **Batch**: A batch is a set of N samples. The samples in a batch are processed
|
132 |
+
independently, in parallel. During training, a batch result in only one
|
133 |
+
program update. A batch approximates the input distribution better than a
|
134 |
+
single input. The larger the batch, the better the approximation; however a
|
135 |
+
larger batch will take longer to process and still result in only one update.
|
136 |
+
- **Epochs**: A epochs is an arbitrarly cutoff, generally defined as "one pass
|
137 |
+
over the entire dataset", used to separate training into distinct phases,
|
138 |
+
which is usefull for logging and periodic evaluation. When using
|
139 |
+
`validation_split` or `validation_data` with the `fit` method of Synalinks
|
140 |
+
programs, evaluation will be run at the end of every epoch.
|
141 |
+
"""
|
142 |
+
)
|
143 |
+
return
|
144 |
+
|
145 |
+
|
146 |
+
@app.cell(hide_code=True)
|
147 |
+
def _(mo):
|
148 |
+
load_data = mo.ui.run_button(label="Load dataset")
|
149 |
+
load_data.center()
|
150 |
+
return (load_data,)
|
151 |
+
|
152 |
+
|
153 |
+
@app.cell
|
154 |
+
def _(load_data, mo, synalinks):
|
155 |
+
mo.stop(not load_data.value, mo.md("Click on the load button above"))
|
156 |
+
# Now we can load the dataset
|
157 |
+
with mo.status.spinner(title="Loading dataset...") as _spinner:
|
158 |
+
(x_train, y_train), (x_test, y_test) = synalinks.datasets.gsm8k.load_data()
|
159 |
+
_spinner.update("Done.")
|
160 |
+
return x_test, x_train, y_test, y_train
|
161 |
+
|
162 |
+
|
163 |
+
@app.cell(hide_code=True)
|
164 |
+
def _(mo, x_test, x_train):
|
165 |
+
epochs = mo.ui.slider(start=1, stop=64, value=5, label="Epochs")
|
166 |
+
batch_size = mo.ui.slider(start=1, stop=64, value=32, label="Batch size")
|
167 |
+
train_samples = mo.ui.slider(
|
168 |
+
start=1, stop=len(x_train), value=50, label="Train Samples"
|
169 |
+
)
|
170 |
+
test_samples = mo.ui.slider(start=1, stop=len(x_test), value=50, label="Test Samples")
|
171 |
+
return batch_size, epochs, test_samples, train_samples
|
172 |
+
|
173 |
+
|
174 |
+
@app.cell(hide_code=True)
|
175 |
+
def _(epochs):
|
176 |
+
mo.hstack([epochs, mo.md(f"Epochs: {epochs.value}")])
|
177 |
+
return
|
178 |
+
|
179 |
+
@app.cell(hide_code=True)
|
180 |
+
def _(batch_size):
|
181 |
+
mo.hstack([batch_size, mo.md(f"Batch size: {batch_size.value}")])
|
182 |
+
return
|
183 |
+
|
184 |
+
@app.cell(hide_code=True)
|
185 |
+
def _(train_samples):
|
186 |
+
mo.hstack([train_samples, mo.md(f"Nb train samples: {train_samples.value}")])
|
187 |
+
return
|
188 |
+
|
189 |
+
@app.cell(hide_code=True)
|
190 |
+
def _(test_samples):
|
191 |
+
mo.hstack([test_samples, mo.md(f"Nb test samples: {test_samples.value}")])
|
192 |
+
return
|
193 |
+
|
194 |
+
@app.cell(hide_code=True)
|
195 |
+
def _(mo):
|
196 |
+
openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
|
197 |
+
openai_api_key
|
198 |
+
return
|
199 |
+
|
200 |
+
|
201 |
+
@app.cell(hide_code=True)
|
202 |
+
def _(mo, openai_api_key):
|
203 |
+
import os
|
204 |
+
mo.stop(not openai_api_key.value)
|
205 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key.value
|
206 |
+
return
|
207 |
+
|
208 |
+
|
209 |
+
@app.cell(hide_code=True)
|
210 |
+
def _(mo):
|
211 |
+
train_button = mo.ui.run_button(label="Train")
|
212 |
+
train_button.center()
|
213 |
+
return (train_button,)
|
214 |
+
|
215 |
+
|
216 |
+
@app.cell
|
217 |
+
async def train(
|
218 |
+
batch_size,
|
219 |
+
epochs,
|
220 |
+
mo,
|
221 |
+
program,
|
222 |
+
train_button,
|
223 |
+
synalinks,
|
224 |
+
test_samples,
|
225 |
+
train_samples,
|
226 |
+
x_test,
|
227 |
+
x_train,
|
228 |
+
y_test,
|
229 |
+
y_train,
|
230 |
+
):
|
231 |
+
mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
|
232 |
+
mo.stop(not train_button.value, mo.md("Click on the train button above"))
|
233 |
+
# Where to save the best performing program
|
234 |
+
checkpoint_filepath = "checkpoint.program.json"
|
235 |
+
|
236 |
+
_program_checkpoint_callback = synalinks.callbacks.ProgramCheckpoint(
|
237 |
+
filepath=checkpoint_filepath,
|
238 |
+
monitor="val_reward",
|
239 |
+
mode="max",
|
240 |
+
save_best_only=True,
|
241 |
+
)
|
242 |
+
|
243 |
+
# For the purpose of the tutorial, we'll only train on the first N samples
|
244 |
+
|
245 |
+
history = await program.fit(
|
246 |
+
epochs=epochs.value,
|
247 |
+
batch_size=batch_size.value,
|
248 |
+
x=x_train[: train_samples.value],
|
249 |
+
y=y_train[: train_samples.value],
|
250 |
+
validation_data=(x_test[: test_samples.value], y_test[: test_samples.value]),
|
251 |
+
callbacks=[_program_checkpoint_callback],
|
252 |
+
)
|
253 |
+
return checkpoint_filepath, history
|
254 |
+
|
255 |
+
|
256 |
+
@app.cell
|
257 |
+
def _(history, synalinks):
|
258 |
+
synalinks.utils.plot_history(history)
|
259 |
+
return
|
260 |
+
|
261 |
+
|
262 |
+
@app.cell(hide_code=True)
|
263 |
+
def _(synalinks):
|
264 |
+
mo.md(
|
265 |
+
r"""
|
266 |
+
## Evaluate Checkpoint
|
267 |
+
"""
|
268 |
+
)
|
269 |
+
return
|
270 |
+
|
271 |
+
|
272 |
+
@app.cell
|
273 |
+
def _(
|
274 |
+
checkpoint_filepath,
|
275 |
+
train,
|
276 |
+
x_test,
|
277 |
+
y_test,
|
278 |
+
test_samples,
|
279 |
+
synalinks,
|
280 |
+
):
|
281 |
+
# Load the JSON serialized program from disk
|
282 |
+
loaded_program = synalinks.Program.load(checkpoint_filepath)
|
283 |
+
|
284 |
+
metrics = loaded_program.evaluate(
|
285 |
+
x=x_test[: test_samples],
|
286 |
+
y=y_test[: test_samples],
|
287 |
+
)
|
288 |
+
|
289 |
+
synalinks.utils.plot_metrics(metrics)
|
290 |
+
|
291 |
+
@app.cell(hide_code=True)
|
292 |
+
async def _(mo):
|
293 |
+
mo.md(
|
294 |
+
r"""
|
295 |
+
## Conclusion
|
296 |
+
|
297 |
+
In this notebook, we explored the process of training Synalinks programs
|
298 |
+
to optimize their performance on specific datasets. By leveraging the GSM8k
|
299 |
+
dataset of grade school math word problems, we demonstrated how to train a
|
300 |
+
language model application to improve its reasoning abilities and accuracy.
|
301 |
+
|
302 |
+
### Key Takeaways
|
303 |
+
|
304 |
+
- **Rewards**: `Reward`s guide the reinforcement learning process by
|
305 |
+
providing feedback on the system's performance. They are typically
|
306 |
+
float values that indicate how well the system performed a task,
|
307 |
+
with the goal of maximizing the reward function during training.
|
308 |
+
Synalinks offers built-in rewards and allows for custom reward
|
309 |
+
functions to suit specific tasks.
|
310 |
+
|
311 |
+
- **Metrics**: `Metric`s are scalar values monitored during training
|
312 |
+
and evaluation to determine the best-performing program. Unlike
|
313 |
+
rewards, metrics are not used for backpropagation. They provide
|
314 |
+
additional insights for comparing different architectures and
|
315 |
+
saving the optimal model.
|
316 |
+
|
317 |
+
- **Optimizers**: `Optimizer`s update the module's state to improve
|
318 |
+
performance. They handle the backpropagation of rewards and
|
319 |
+
select or generate examples and hints for the language models.
|
320 |
+
Proper configuration of optimizers is essential for effective
|
321 |
+
training.
|
322 |
+
|
323 |
+
- **Filtering Outputs**: When dealing with complex JSON outputs,
|
324 |
+
filtering predictions and ground truths using `out_mask` or
|
325 |
+
`in_mask` parameters ensures that only relevant fields are
|
326 |
+
evaluated. This is particularly useful when the training data
|
327 |
+
includes a subset of the JSON or when additional fields are
|
328 |
+
used to aid the language models.
|
329 |
+
"""
|
330 |
+
)
|
331 |
+
return
|
332 |
+
|
333 |
+
|
334 |
+
if __name__ == "__main__":
|
335 |
+
app.run()
|
code_examples/2_advanced/1_implementing_custom_modules_and_programs_via_subclassing.py
ADDED
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import marimo
|
2 |
+
|
3 |
+
__generated_with = "0.11.9"
|
4 |
+
app = marimo.App()
|
5 |
+
|
6 |
+
|
7 |
+
@app.cell(hide_code=True)
|
8 |
+
def _():
|
9 |
+
import marimo as mo
|
10 |
+
import synalinks
|
11 |
+
|
12 |
+
synalinks.backend.clear_session()
|
13 |
+
return mo, synalinks
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell(hide_code=True)
|
17 |
+
def _(mo):
|
18 |
+
mo.md(
|
19 |
+
r"""
|
20 |
+
# Implementing custom modules & programs via subclassing
|
21 |
+
|
22 |
+
This tutorial, is for more advanced users, it will cover how to
|
23 |
+
create custom modules/programs via subclassing.
|
24 |
+
|
25 |
+
In this tutorial, we will cover the following themes:
|
26 |
+
|
27 |
+
- The `Module` class
|
28 |
+
- The `add_variable()` method
|
29 |
+
- Trainable and non-trainable variables
|
30 |
+
- The `compute_output_spec()` and `build()` method
|
31 |
+
- The training argument in `call()`
|
32 |
+
- Making sure your module/program can be serialized
|
33 |
+
|
34 |
+
---
|
35 |
+
|
36 |
+
One of the main abstraction of Synalinks is the `Module` class.
|
37 |
+
A `Module` encapsulate both a state (the module's variables) and
|
38 |
+
a transformation from inputs to outputs (the `call()` method).
|
39 |
+
|
40 |
+
For this tutorial, we are going to make a simple neuro-symbolic component
|
41 |
+
called `BacktrackingOfThought`. This component is an adaptation of the
|
42 |
+
famous backtracking algorithm, used a lot in symbolic planning/reasoning,
|
43 |
+
combined with chain of thought, nowadays most used technique to enhance
|
44 |
+
the LMs predicitons.
|
45 |
+
|
46 |
+
The principle is straitforward, the component will have to "think" then
|
47 |
+
we will critique at runtime the thinking and aggregate it to
|
48 |
+
the current chain of thinking only if it is above the given threshold.
|
49 |
+
This mechanism will allow the system to discard bad thinking to resume
|
50 |
+
at the previsous step. Additionally we will add a stop condition.
|
51 |
+
|
52 |
+
This algorithm a simplified version of the popular `TreeOfThought` that
|
53 |
+
instead of being a tree strucutre, is only a sequential chain of thinking.
|
54 |
+
"""
|
55 |
+
)
|
56 |
+
return
|
57 |
+
|
58 |
+
|
59 |
+
@app.cell
|
60 |
+
def _(synalinks):
|
61 |
+
|
62 |
+
class Thinking(synalinks.DataModel):
|
63 |
+
thinking: str = synalinks.Field(
|
64 |
+
description="Your step by step thinking"
|
65 |
+
)
|
66 |
+
|
67 |
+
class CritiqueWithReward(synalinks.DataModel):
|
68 |
+
critique: str = synalinks.Field(description="The step by step critique")
|
69 |
+
reward: float = synalinks.Field(
|
70 |
+
description="The reward corresponding to the critique between [0.0, 1.0]",
|
71 |
+
le=1.0,
|
72 |
+
ge=0.0,
|
73 |
+
)
|
74 |
+
|
75 |
+
class BacktrackingOfThought(synalinks.Module):
|
76 |
+
def __init__(
|
77 |
+
self,
|
78 |
+
schema=None,
|
79 |
+
data_model=None,
|
80 |
+
language_model=None,
|
81 |
+
backtracking_threshold=0.5,
|
82 |
+
stop_threshold=0.8,
|
83 |
+
max_iterations=5,
|
84 |
+
critique_program=None,
|
85 |
+
prompt_template=None,
|
86 |
+
examples=None,
|
87 |
+
hints=None,
|
88 |
+
use_inputs_schema=False,
|
89 |
+
use_outputs_schema=False,
|
90 |
+
name=None,
|
91 |
+
description=None,
|
92 |
+
trainable=None,
|
93 |
+
):
|
94 |
+
super().__init__(
|
95 |
+
name=name,
|
96 |
+
description=description,
|
97 |
+
trainable=trainable,
|
98 |
+
)
|
99 |
+
if not schema and data_model:
|
100 |
+
schema = data_model.schema()
|
101 |
+
self.schema = schema
|
102 |
+
self.language_model = language_model
|
103 |
+
self.backtracking_threshold = backtracking_threshold
|
104 |
+
self.stop_threshold = stop_threshold
|
105 |
+
self.max_iterations = max_iterations
|
106 |
+
self.critique_program = critique_program
|
107 |
+
self.prompt_template = prompt_template
|
108 |
+
self.examples = examples
|
109 |
+
self.hints = hints
|
110 |
+
self.use_inputs_schema = use_inputs_schema
|
111 |
+
self.use_outputs_schema = use_outputs_schema
|
112 |
+
if not self.critique_program:
|
113 |
+
# If no critique program is provided
|
114 |
+
# We compute the reward in the thinking step
|
115 |
+
thinking_data_model = (
|
116 |
+
Thinking
|
117 |
+
+ synalinks.SymbolicDataModel(schema=self.schema)
|
118 |
+
+ CritiqueWithReward
|
119 |
+
)
|
120 |
+
else:
|
121 |
+
thinking_data_model = Thinking + synalinks.SymbolicDataModel(
|
122 |
+
schema=self.schema
|
123 |
+
)
|
124 |
+
# This is for generating the intermediary steps
|
125 |
+
self.thinking = synalinks.Generator(
|
126 |
+
data_model=thinking_data_model,
|
127 |
+
language_model=self.language_model,
|
128 |
+
prompt_template=self.prompt_template,
|
129 |
+
examples=self.examples,
|
130 |
+
hints=self.hints,
|
131 |
+
use_inputs_schema=self.use_inputs_schema,
|
132 |
+
use_outputs_schema=self.use_outputs_schema,
|
133 |
+
name=self.name + "_thinking_generator",
|
134 |
+
)
|
135 |
+
# This is going to be the final generator
|
136 |
+
self.generator = synalinks.Generator(
|
137 |
+
schema=self.schema,
|
138 |
+
language_model=self.language_model,
|
139 |
+
prompt_template=self.prompt_template,
|
140 |
+
examples=self.examples,
|
141 |
+
hints=self.hints,
|
142 |
+
use_inputs_schema=self.use_inputs_schema,
|
143 |
+
use_outputs_schema=self.use_outputs_schema,
|
144 |
+
name=self.name + "_generator",
|
145 |
+
)
|
146 |
+
|
147 |
+
async def call(self, inputs, training=False):
|
148 |
+
if not inputs:
|
149 |
+
# This is to allow logical flows
|
150 |
+
# (don't run the module if no inputs provided)
|
151 |
+
return None
|
152 |
+
for i in self.max_iterations:
|
153 |
+
thinking = await self.thinking(inputs)
|
154 |
+
reward = 0.0
|
155 |
+
if self.critique_program:
|
156 |
+
critique = await self.critique_program(thinking)
|
157 |
+
reward = critique.get("reward")
|
158 |
+
else:
|
159 |
+
reward = thinking.get("reward")
|
160 |
+
if reward > self.backtracking_threshold:
|
161 |
+
if reward > self.stop_threshold:
|
162 |
+
break
|
163 |
+
inputs = await synalinks.ops.concat(
|
164 |
+
inputs,
|
165 |
+
thinking,
|
166 |
+
name=self.name + f"_inputs_with_thinking_{i}",
|
167 |
+
)
|
168 |
+
return await self.generator(inputs)
|
169 |
+
|
170 |
+
async def compute_output_spec(self, _, training=False):
|
171 |
+
return synalinks.SymbolicDataModel(self.schema)
|
172 |
+
|
173 |
+
def get_config(self):
|
174 |
+
config = {
|
175 |
+
"schema": self.schema,
|
176 |
+
"backtracking_threshold": self.backtracking_threshold,
|
177 |
+
"stop_threshold": self.stop_threshold,
|
178 |
+
"max_iterations": self.max_iterations,
|
179 |
+
"prompt_template": self.prompt_template,
|
180 |
+
"examples": self.examples,
|
181 |
+
"hints": self.hints,
|
182 |
+
"use_inputs_schema": self.use_inputs_schema,
|
183 |
+
"use_outputs_schema": self.use_outputs_schema,
|
184 |
+
"name": self.name,
|
185 |
+
"description": self.description,
|
186 |
+
"trainable": self.trainable,
|
187 |
+
}
|
188 |
+
language_model_config = {
|
189 |
+
"language_model": synalinks.saving.serialize_synalinks_object(
|
190 |
+
self.language_model,
|
191 |
+
)
|
192 |
+
}
|
193 |
+
if self.critique_program:
|
194 |
+
critique_program_config = {
|
195 |
+
"critique_program": synalinks.saving.serialize_synalinks_object(
|
196 |
+
self.critique_program,
|
197 |
+
)
|
198 |
+
}
|
199 |
+
else:
|
200 |
+
critique_program_config = {
|
201 |
+
"critique_program": None,
|
202 |
+
}
|
203 |
+
return {**config, **language_model_config, **critique_program_config}
|
204 |
+
|
205 |
+
@classmethod
|
206 |
+
def from_config(cls, config):
|
207 |
+
language_model = synalinks.saving.deserialize_synalinks_object(
|
208 |
+
config.pop("language_model")
|
209 |
+
)
|
210 |
+
if config.get("critique_program"):
|
211 |
+
critique_program = synalinks.saving.deserialize_synalinks_object(
|
212 |
+
config.pop("critique_program")
|
213 |
+
)
|
214 |
+
else:
|
215 |
+
critique_program = None
|
216 |
+
return cls(
|
217 |
+
language_model=language_model,
|
218 |
+
critique_program=critique_program,
|
219 |
+
**config,
|
220 |
+
)
|
221 |
+
|
222 |
+
return BacktrackingOfThought, CritiqueWithReward, Thinking
|
223 |
+
|
224 |
+
|
225 |
+
@app.cell(hide_code=True)
|
226 |
+
def _(mo):
|
227 |
+
mo.md(
|
228 |
+
r"""
|
229 |
+
### The `__init__()` function
|
230 |
+
|
231 |
+
First, let's explain the `__init__()` function. When implementing modules that
|
232 |
+
use a `Generator`, you want to externalize the generator's parameters
|
233 |
+
(`prompt_template`, `hints`, `examples`, `use_inputs_schema`, `use_outputs_schema`)
|
234 |
+
to give maximum flexibility to your module when possible.
|
235 |
+
Then, you have to include the default arguments of a module (`name`, `description`, `trainable`)
|
236 |
+
that will be provided to the `super().__init__()`.
|
237 |
+
Although the name and description are inferred automatically it is a good practice to
|
238 |
+
let the user personalize them. The `trainable` argument, will indicate if the module
|
239 |
+
is frozen or not, meaning that their variables could be updated by the optimizer,
|
240 |
+
by default, a module should be trainable.
|
241 |
+
|
242 |
+
And finally, you can add any relevant information, weither for the initialization of
|
243 |
+
the variables, or a config parameter like here.
|
244 |
+
|
245 |
+
To add a variable to the module, you have to use the `add_variables` function,
|
246 |
+
this function can only be used in the `__init__()` or in the `build()` function.
|
247 |
+
The build function is usefull to create variables, or initialize your module/program
|
248 |
+
based on the actual inputs, that is not known at this stage, remember the module can
|
249 |
+
accept any inputs.
|
250 |
+
|
251 |
+
### How to know when using a `Variable`?
|
252 |
+
|
253 |
+
As a rule of thumb, the variables should be anything that evolve over time during
|
254 |
+
inference/training. These variables could be updated by the module itself, or by
|
255 |
+
the optimizer if you have an optimizer designed for that. They will be serialized
|
256 |
+
when you save your program so you can recover the state of your program by loading
|
257 |
+
a JSON file. In this example, the variables are encapsulated in the `Generator`.
|
258 |
+
|
259 |
+
### The `call()` function
|
260 |
+
|
261 |
+
The `call()` function is the core of the `Module` class. It defines the computation
|
262 |
+
performed at every call of the module.
|
263 |
+
This function takes `inputs` and an optional `training` argument, which indicates
|
264 |
+
whether the module is in training mode or not.
|
265 |
+
|
266 |
+
In the `BacktrackingOfThought` module, the `call()` function implements the
|
267 |
+
backtracking logic:
|
268 |
+
|
269 |
+
- It iterates up to `max_iterations` times.
|
270 |
+
- In each iteration, it generates a "thinking" step using the `thinking` generator.
|
271 |
+
- It then critiques the generated thinking using either a provided critique program or
|
272 |
+
a reward value embedded in the thinking step.
|
273 |
+
- If the reward exceeds the `backtracking_threshold`, the thinking step is concatenated
|
274 |
+
with the inputs for the next iteration.
|
275 |
+
- If the reward exceeds the `stop_threshold`, the iteration stops early.
|
276 |
+
- Finally, the `generator` produces the final output based on the accumulated inputs.
|
277 |
+
|
278 |
+
### The `compute_output_spec()` function
|
279 |
+
|
280 |
+
The `compute_output_spec()` function is responsible for defining the output data model
|
281 |
+
of the module/program. It allows the system to understand the structure of the data
|
282 |
+
produced by this module.
|
283 |
+
|
284 |
+
In this example, `compute_output_spec()` returns a `SymbolicDataModel` based on the module's
|
285 |
+
schema, indicating the expected structure of the output data.
|
286 |
+
|
287 |
+
As a rule of thumb, if you access a data model field (using `get()`) you will have to
|
288 |
+
implement it otherwise, Synalinks will infer the output spec by running the call
|
289 |
+
function with symbolic data models. If you have any doubt, do not implement it and the system will
|
290 |
+
raise an error if you needs to.
|
291 |
+
|
292 |
+
### Serialization and Deserialization
|
293 |
+
|
294 |
+
To ensure that your module can be saved and loaded correctly, you need to implement serialization
|
295 |
+
and deserialization methods. This is crucial for saving the state of your module, including
|
296 |
+
any trainable variables, and restoring it later.
|
297 |
+
|
298 |
+
- The `get_config()` method should return a dictionary containing all the information needed
|
299 |
+
to recreate the module. This includes the module's configuration and any serialized
|
300 |
+
sub-components like the language model or critique program in this case.
|
301 |
+
- The `from_config()` class method should be able to reconstruct the module from the
|
302 |
+
configuration dictionary returned by `get_config()`.
|
303 |
+
|
304 |
+
## Conclusion
|
305 |
+
|
306 |
+
By following these guidelines, you can create custom modules in Synalinks that are flexible,
|
307 |
+
reusable, and can be integrated into larger programs. The `BacktrackingOfThought` module
|
308 |
+
demonstrates how to combine symbolic reasoning with language model predictions to enhance
|
309 |
+
the decision-making process.
|
310 |
+
|
311 |
+
### Key Takeaways
|
312 |
+
|
313 |
+
- **Module Class**: The `Module` class in Synalinks encapsulates both state (variables)
|
314 |
+
and transformation logic (`call()` method), serving as a foundational abstraction for
|
315 |
+
building custom components.
|
316 |
+
- **Initialization and Variables**: The __init__() function initializes the module,
|
317 |
+
externalizing generator parameters for flexibility. Trainable and non-trainable
|
318 |
+
variables are managed using the add_variables function, ensuring that the
|
319 |
+
module's state can evolve over time and be serialized.
|
320 |
+
- **Call Function**: The `call()` function defines the core computation of the module,
|
321 |
+
handling inputs and producing outputs. In `BacktrackingOfThought`, it implements
|
322 |
+
backtracking logic, iteratively generating and critiquing thinking steps to refine
|
323 |
+
the output.
|
324 |
+
- **Output Specification**: The `compute_output_spec()` function defines the output data
|
325 |
+
model, allowing the system to understand the structure of the produced data.
|
326 |
+
Implementing this function is crucial when accessing data model fields directly.
|
327 |
+
- **Serialization**: Proper serialization and deserialization methods (`get_config()`
|
328 |
+
and `from_config()`) ensure that the module's state can be saved and restored,
|
329 |
+
facilitating reuse and integration into larger programs.
|
330 |
+
- **Flexibility and Reusability**: By following these guidelines, you can create
|
331 |
+
custom modules that are flexible, reusable, and easily integrated into neuro-symbolic
|
332 |
+
programs. The `BacktrackingOfThought` module exemplifies how to combine symbolic
|
333 |
+
reasoning with language models to improve decision-making processes.
|
334 |
+
"""
|
335 |
+
)
|
336 |
+
return
|
337 |
+
|
338 |
+
|
339 |
+
if __name__ == "__main__":
|
340 |
+
app.run()
|
examples/chatbot.py
DELETED
@@ -1,152 +0,0 @@
|
|
1 |
-
import marimo
|
2 |
-
|
3 |
-
__generated_with = "0.9.14"
|
4 |
-
app = marimo.App(width="medium")
|
5 |
-
|
6 |
-
|
7 |
-
@app.cell
|
8 |
-
def __():
|
9 |
-
import marimo as mo
|
10 |
-
import os
|
11 |
-
from huggingface_hub import InferenceClient
|
12 |
-
return InferenceClient, mo, os
|
13 |
-
|
14 |
-
|
15 |
-
@app.cell
|
16 |
-
def __():
|
17 |
-
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
18 |
-
return (MODEL_NAME,)
|
19 |
-
|
20 |
-
|
21 |
-
@app.cell(hide_code=True)
|
22 |
-
def __(MODEL_NAME, mo):
|
23 |
-
mo.md(f"""
|
24 |
-
# Chat with **{MODEL_NAME}**
|
25 |
-
""")
|
26 |
-
return
|
27 |
-
|
28 |
-
|
29 |
-
@app.cell
|
30 |
-
def __(max_tokens, mo, system_message, temperature, top_p):
|
31 |
-
mo.hstack(
|
32 |
-
[
|
33 |
-
system_message,
|
34 |
-
mo.vstack([temperature, top_p, max_tokens], align="end"),
|
35 |
-
],
|
36 |
-
)
|
37 |
-
return
|
38 |
-
|
39 |
-
|
40 |
-
@app.cell
|
41 |
-
def __(mo, respond):
|
42 |
-
chat = mo.ui.chat(
|
43 |
-
model=respond,
|
44 |
-
prompts=["Tell me a joke.", "What is the square root of {{number}}?"],
|
45 |
-
)
|
46 |
-
chat
|
47 |
-
return (chat,)
|
48 |
-
|
49 |
-
|
50 |
-
@app.cell
|
51 |
-
def __(InferenceClient, MODEL_NAME, os):
|
52 |
-
"""
|
53 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.26.2/en/guides/inference
|
54 |
-
"""
|
55 |
-
|
56 |
-
hf_token = os.environ.get("HF_TOKEN")
|
57 |
-
if not hf_token:
|
58 |
-
print("HF_TOKEN not set, may have limited access.")
|
59 |
-
|
60 |
-
client = InferenceClient(
|
61 |
-
MODEL_NAME,
|
62 |
-
token=hf_token,
|
63 |
-
)
|
64 |
-
return client, hf_token
|
65 |
-
|
66 |
-
|
67 |
-
@app.cell
|
68 |
-
def __(client, mo):
|
69 |
-
# Create UI controls
|
70 |
-
system_message = mo.ui.text_area(
|
71 |
-
value="You are a friendly Chatbot.",
|
72 |
-
label="System message",
|
73 |
-
)
|
74 |
-
max_tokens = mo.ui.slider(
|
75 |
-
start=1,
|
76 |
-
stop=2048,
|
77 |
-
value=512,
|
78 |
-
step=1,
|
79 |
-
label="Max new tokens",
|
80 |
-
show_value=True,
|
81 |
-
)
|
82 |
-
temperature = mo.ui.slider(
|
83 |
-
start=0.1,
|
84 |
-
stop=4.0,
|
85 |
-
value=0.7,
|
86 |
-
step=0.1,
|
87 |
-
label="Temperature",
|
88 |
-
show_value=True,
|
89 |
-
)
|
90 |
-
top_p = mo.ui.slider(
|
91 |
-
start=0.1,
|
92 |
-
stop=1.0,
|
93 |
-
value=0.95,
|
94 |
-
step=0.05,
|
95 |
-
label="Top-p (nucleus sampling)",
|
96 |
-
show_value=True,
|
97 |
-
)
|
98 |
-
|
99 |
-
# Add more configuration options if needed.
|
100 |
-
|
101 |
-
|
102 |
-
# Create chat callback
|
103 |
-
def respond(messages: list[mo.ai.ChatMessage], config):
|
104 |
-
chat_messages = [{"role": "system", "content": system_message.value}]
|
105 |
-
|
106 |
-
for message in messages:
|
107 |
-
parts = []
|
108 |
-
# Add text
|
109 |
-
parts.append({"type": "text", "text": message.content})
|
110 |
-
|
111 |
-
# Add attachments
|
112 |
-
if message.attachments:
|
113 |
-
for attachment in message.attachments:
|
114 |
-
content_type = attachment.content_type or ""
|
115 |
-
# This example only supports image attachments
|
116 |
-
if content_type.startswith("image"):
|
117 |
-
parts.append(
|
118 |
-
{
|
119 |
-
"type": "image_url",
|
120 |
-
"image_url": {"url": attachment.url},
|
121 |
-
}
|
122 |
-
)
|
123 |
-
else:
|
124 |
-
raise ValueError(
|
125 |
-
f"Unsupported content type {content_type}"
|
126 |
-
)
|
127 |
-
|
128 |
-
chat_messages.append({"role": message.role, "content": parts})
|
129 |
-
|
130 |
-
response = client.chat_completion(
|
131 |
-
chat_messages,
|
132 |
-
max_tokens=max_tokens.value,
|
133 |
-
temperature=temperature.value,
|
134 |
-
top_p=top_p.value,
|
135 |
-
stream=False,
|
136 |
-
)
|
137 |
-
|
138 |
-
# You can return strings, markdown, charts, tables, dataframes, and more.
|
139 |
-
return response.choices[0].message.content
|
140 |
-
return max_tokens, respond, system_message, temperature, top_p
|
141 |
-
|
142 |
-
|
143 |
-
@app.cell
|
144 |
-
def __():
|
145 |
-
# If you need to do anything _reactively_ to the chat messages,
|
146 |
-
# you can access the chat messages using the `chat.value` attribute.
|
147 |
-
# chat.value
|
148 |
-
return
|
149 |
-
|
150 |
-
|
151 |
-
if __name__ == "__main__":
|
152 |
-
app.run()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dataset_explorer.py
DELETED
@@ -1,108 +0,0 @@
|
|
1 |
-
import marimo
|
2 |
-
|
3 |
-
__generated_with = "0.9.18"
|
4 |
-
app = marimo.App(width="full")
|
5 |
-
|
6 |
-
|
7 |
-
@app.cell
|
8 |
-
def __():
|
9 |
-
datasets = [
|
10 |
-
# Add your own HF datasets
|
11 |
-
"scikit-learn/iris/Iris.csv",
|
12 |
-
"scikit-learn/adult-census-income/adult.csv",
|
13 |
-
"scikit-learn/auto-mpg/auto-mpg.csv",
|
14 |
-
"scikit-learn/credit-card-clients/UCI_Credit_Card.csv",
|
15 |
-
"scikit-learn/Fish/Fish.csv",
|
16 |
-
"scikit-learn/tips/tips.csv",
|
17 |
-
]
|
18 |
-
return (datasets,)
|
19 |
-
|
20 |
-
|
21 |
-
@app.cell(hide_code=True)
|
22 |
-
def __(mo):
|
23 |
-
mo.md(r"""## Select a dataset""")
|
24 |
-
return
|
25 |
-
|
26 |
-
|
27 |
-
@app.cell(hide_code=True)
|
28 |
-
def __(datasets, mo):
|
29 |
-
dataset = mo.ui.dropdown(datasets, value=datasets[0], label="Select a dataset")
|
30 |
-
no_limit = mo.ui.switch(label="Limit 1000", value=True)
|
31 |
-
mo.hstack([dataset, no_limit])
|
32 |
-
return dataset, no_limit
|
33 |
-
|
34 |
-
|
35 |
-
@app.cell
|
36 |
-
def __(dataset, mo, no_limit):
|
37 |
-
explore = mo.sql(
|
38 |
-
f"""
|
39 |
-
CREATE OR REPLACE TEMP TABLE explore
|
40 |
-
AS (FROM 'hf://datasets/{dataset.value}')
|
41 |
-
{'LIMIT 1000' if no_limit.value else ''};
|
42 |
-
|
43 |
-
FROM explore;
|
44 |
-
"""
|
45 |
-
)
|
46 |
-
return (explore,)
|
47 |
-
|
48 |
-
|
49 |
-
@app.cell(hide_code=True)
|
50 |
-
def __(mo):
|
51 |
-
mo.md(r"""## Summary""")
|
52 |
-
return
|
53 |
-
|
54 |
-
|
55 |
-
@app.cell(hide_code=True)
|
56 |
-
def __(explore, mo):
|
57 |
-
_schema = mo.accordion({"Schema": explore.schema})
|
58 |
-
|
59 |
-
mo.md(f"""
|
60 |
-
* Total rows: **{len(explore):,}**
|
61 |
-
* Total columns: **{len(explore.columns)}**
|
62 |
-
|
63 |
-
{_schema}
|
64 |
-
""")
|
65 |
-
return
|
66 |
-
|
67 |
-
|
68 |
-
@app.cell
|
69 |
-
def __(explore):
|
70 |
-
explore.describe()
|
71 |
-
return
|
72 |
-
|
73 |
-
|
74 |
-
@app.cell(hide_code=True)
|
75 |
-
def __(mo):
|
76 |
-
mo.md("""## Manipulate the data""")
|
77 |
-
return
|
78 |
-
|
79 |
-
|
80 |
-
@app.cell
|
81 |
-
def __(explore, mo):
|
82 |
-
transformed = mo.ui.dataframe(explore)
|
83 |
-
transformed
|
84 |
-
return (transformed,)
|
85 |
-
|
86 |
-
|
87 |
-
@app.cell(hide_code=True)
|
88 |
-
def __(mo):
|
89 |
-
mo.md(r"""## Explore the data""")
|
90 |
-
return
|
91 |
-
|
92 |
-
|
93 |
-
@app.cell
|
94 |
-
def __(mo, transformed):
|
95 |
-
mo.ui.data_explorer(transformed.value)
|
96 |
-
return
|
97 |
-
|
98 |
-
|
99 |
-
@app.cell(hide_code=True)
|
100 |
-
def __():
|
101 |
-
# Imports
|
102 |
-
import marimo as mo
|
103 |
-
import polars
|
104 |
-
return mo, polars
|
105 |
-
|
106 |
-
|
107 |
-
if __name__ == "__main__":
|
108 |
-
app.run()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
huggingface-hub==0.26.2
|
2 |
marimo[sql]
|
3 |
-
|
4 |
-
|
5 |
-
openai
|
6 |
-
pyarrow
|
7 |
-
# Or a specific version
|
8 |
-
# marimo>=0.9.0
|
9 |
-
|
10 |
-
# Add other dependencies as needed
|
|
|
1 |
huggingface-hub==0.26.2
|
2 |
marimo[sql]
|
3 |
+
datasets
|
4 |
+
synalinks
|
|
|
|
|
|
|
|
|
|
|
|