YoanSallami commited on
Commit
fe643f6
·
1 Parent(s): 10db5b3

Add notebooks

Browse files
Dockerfile CHANGED
@@ -10,6 +10,9 @@ ENV VIRTUAL_ENV=/opt/venv \
10
  PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
11
  HOME=/home/user
12
 
 
 
 
13
  # Install dependencies
14
  COPY --chown=user:user ./requirements.txt requirements.txt
15
  RUN uv venv $VIRTUAL_ENV \
@@ -31,7 +34,7 @@ RUN --mount=type=secret,id=MARIMO_PASSWORD \
31
  WORKDIR /data
32
 
33
  # Copy examples
34
- COPY --chown=user:user ./examples ./examples
35
 
36
  # Set user
37
  USER user
@@ -47,7 +50,6 @@ dataframes = "rich"
47
  theme = "light"
48
  ENDCONFIG
49
 
50
-
51
  # Uncomment to enable password protection
52
- # CMD marimo edit /data --host=0.0.0.0 --port=7860 --token-password=$(cat $HOME/.marimo_password)
53
- CMD marimo edit /data --host=0.0.0.0 --port=7860 --no-token
 
10
  PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
11
  HOME=/home/user
12
 
13
+ # Install graphviz for dot visualization
14
+ RUN apt update && apt install -y graphviz
15
+
16
  # Install dependencies
17
  COPY --chown=user:user ./requirements.txt requirements.txt
18
  RUN uv venv $VIRTUAL_ENV \
 
34
  WORKDIR /data
35
 
36
  # Copy examples
37
+ COPY --chown=user:user ./code_examples ./code_examples
38
 
39
  # Set user
40
  USER user
 
50
  theme = "light"
51
  ENDCONFIG
52
 
 
53
  # Uncomment to enable password protection
54
+ # CMD ["marimo", "edit", "/data", "--host=0.0.0.0", "--port=7860", "--token-password=$(cat $HOME/.marimo_password)"]
55
+ CMD ["marimo", "edit", "/data", "--host=0.0.0.0", "--port=7860", "--no-token"]
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: marimo server template
3
- emoji: 📝
4
  colorFrom: yellow
5
  colorTo: blue
6
  sdk: docker
7
  pinned: true
8
  license: mit
9
- short_description: A marimo Space to edit marimo notebooks
10
  ---
11
 
12
- Check out marimo at <https://github.com/marimo-team/marimo>
13
- Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
 
1
  ---
2
+ title: synalinks notebooks
3
+ emoji: 🧠🔗
4
  colorFrom: yellow
5
  colorTo: blue
6
  sdk: docker
7
  pinned: true
8
  license: mit
9
+ short_description: A marimo Space to edit Synalinks 🧠🔗 notebooks
10
  ---
11
 
12
+ Check out the documentation at <https://synalinks.github.io/synalinks>
13
+ Check out the repository at <https://github.com/SynaLinks/synalinks>
code_examples/1_basics/1_first_steps.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # First Steps
21
+
22
+ First, install Synalinks, the easiest way is using pip:
23
+
24
+ ```shell
25
+ pip install synalinks
26
+ ```
27
+
28
+ Or uv (recommended):
29
+
30
+ ```shell
31
+ uv pip install synalinks
32
+ ```
33
+
34
+ If you want to install it from source (for contributors), then do:
35
+
36
+ ```shell
37
+ git clone https://github.com/SynaLinks/Synalinks
38
+ cd Synalinks
39
+ ./shell/uv.sh # Install uv
40
+ ./shell/install.sh # Create the virtual env and install Synalinks
41
+ ```
42
+
43
+ After this, open a python file or notebook and check the install:
44
+ """
45
+ )
46
+ return
47
+
48
+
49
+ @app.cell
50
+ def _(synalinks):
51
+ print(synalinks.__version__)
52
+ return
53
+
54
+
55
+ @app.cell(hide_code=True)
56
+ def _(mo):
57
+ mo.md(
58
+ r"""
59
+ Synalinks use a global context to ensure that each variable/module
60
+ have a unique name. Clear it at the beginning of your scripts to
61
+ ensure naming reproductability.
62
+ """
63
+ )
64
+ return
65
+
66
+
67
+ @app.cell
68
+ def _(synalinks):
69
+ synalinks.backend.clear_session()
70
+ return
71
+
72
+
73
+ @app.cell(hide_code=True)
74
+ def _(mo):
75
+ mo.md(
76
+ r"""
77
+ Addtionally, you can install Ollama [here](https://ollama.com/) to run
78
+ Language Models (LMs) locally. You can run these notebooks locally by
79
+
80
+
81
+ ## Prompting
82
+
83
+ You will notice that there is no traditional prompting involved in
84
+ Synalinks, everything is described as data models in and out.
85
+ However we use a prompt template, that will tell the system how to
86
+ construct the prompt automatically.
87
+
88
+ The prompt template is a jinja2 template that describe how to render
89
+ the examples, hints and how to convert them into chat messages:
90
+ """
91
+ )
92
+ return
93
+
94
+
95
+ @app.cell
96
+ def _(synalinks):
97
+ print(synalinks.default_prompt_template())
98
+ return
99
+
100
+
101
+ @app.cell(hide_code=True)
102
+ def _(mo):
103
+ mo.md(
104
+ r"""
105
+ If you are making a conversational application, we provide the following template to use.
106
+ To use it, provide this template to the `prompt_template` argument of your `Generator` module.
107
+ Note that this template only works if your module has a `ChatMessages` input.
108
+ """
109
+ )
110
+
111
+
112
+ @app.cell
113
+ def _(synalinks):
114
+ print(synalinks.chat_prompt_template())
115
+ return
116
+
117
+
118
+ @app.cell(hide_code=True)
119
+ def _(mo):
120
+ mo.md(
121
+ r"""
122
+ The template use the XML tags `<system>...</system>`, `<user>...</user>` and
123
+ `<assistant>...</assistant>` to know how to convert the prompt template
124
+ into messages. You can modify the default template used by using the
125
+ `prompt_template` argument in Synalinks modules. You can notice also,
126
+ that we send the inputs's and output's JSON schema to instruct the LMs
127
+ how to answer, you can enable/disable that behavior by using `use_inputs_schema`
128
+ and `use_outputs_schema` in Synalinks modules. Synalinks use constrained
129
+ structured output ensuring that the LMs answer respect the data models
130
+ specification (the JSON schema), and is ready to parse, so in theory
131
+ we don't need it, except if you use it to provide additional information
132
+ to the LMs. You can find more information in the
133
+ [`Generator`](https://synalinks.github.io/synalinks/Synalinks%20API/Modules%20API/Core%20Modules/Generator%20module/) documentation.
134
+
135
+ ## Data Models
136
+
137
+ To provide additional information to the LMs, you can use the data models
138
+ `Field`. You can notice that Synalinks use Pydantic as default data backend.
139
+ Allowing Synalinks to be compatible out-of-the-box with structured output
140
+ and FastAPI.
141
+ """
142
+ )
143
+ return
144
+
145
+
146
+ @app.cell
147
+ def _(synalinks):
148
+ class AnswerWithThinking(synalinks.DataModel):
149
+ thinking: str = synalinks.Field(
150
+ description="Your step by step thinking process",
151
+ )
152
+ answer: str = synalinks.Field(
153
+ description="The correct answer",
154
+ )
155
+
156
+ return (AnswerWithThinking,)
157
+
158
+
159
+ @app.cell(hide_code=True)
160
+ def _(mo):
161
+ mo.md(
162
+ r"""
163
+
164
+ ## Conclusion
165
+
166
+ Usually that will be enough to instruct the LMs, you don't need to modify
167
+ the prompt template. Just by adding additional descriptions to the data
168
+ models fields you can instruct your system to behave as you want.
169
+ If the system needs general instructions about how to behave, you can
170
+ use the `hints` argument in Synalinks modules that will be formatted as
171
+ presented in the prompt template.
172
+
173
+ ### Key Takeaways
174
+
175
+ - **Ease of Integration**: Synalinks seamlessly integrates with existing
176
+ Python projects, making it easy to incorporate advanced language
177
+ model capabilities without extensive modifications.
178
+ - **Structured Outputs**: By using data models and JSON schemas, Synalinks
179
+ ensures that the LMs responses are structured and ready for parsing,
180
+ reducing the need for additional post-processing.
181
+ - **Customizable Prompts**: The prompt templates in Synalinks are highly
182
+ customizable, allowing you to tailor the instructions provided to
183
+ the LMs based on your specific use case.
184
+ - **Compatibility**: Synalinks use Pydantic as the default data backend
185
+ ensures compatibility with structured output and FastAPI.
186
+ """
187
+ )
188
+ return
189
+
190
+
191
+ if __name__ == "__main__":
192
+ app.run()
code_examples/1_basics/2_first_programs.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # Your first programs
21
+
22
+ The main concept of Synalinks, is that an application (we call it a `Program`)
23
+ is a computation graph with JSON data (called `JsonDataModel`) as edges and
24
+ `Operation`s as nodes. What set apart Synalinks from other similar frameworks
25
+ like DSPy or AdalFlow is that we focus on graph-based systems but also that
26
+ it allow users to declare the computation graph using a Functional API inherited
27
+ from [Keras](https://keras.io/).
28
+
29
+ About modules, similar to layers in deep learning applications, modules are
30
+ composable blocks that you can assemble in multiple ways. Providing a modular
31
+ and composable architecture to experiment and unlock creativity.
32
+
33
+ Note that each `Program` is also a `Module`! Allowing you to encapsulate them
34
+ as you want.
35
+
36
+ Many people think that what enabled the Deep Learning revolution was compute
37
+ and data, but in reality, frameworks also played a pivotal role as they enabled
38
+ researchers and engineers to create complex architectures without having to
39
+ re-implement everything from scatch.
40
+ """
41
+ )
42
+ return
43
+
44
+
45
+ @app.cell
46
+ def _(synalinks):
47
+ # Now we can define the data models that we are going to use in the notebook.
48
+ # Note that Synalinks use Pydantic as default data backend, which is compatible with FastAPI and structured output.
49
+
50
+ class Query(synalinks.DataModel):
51
+ query: str = synalinks.Field(
52
+ description="The user query",
53
+ )
54
+
55
+ class AnswerWithThinking(synalinks.DataModel):
56
+ thinking: str = synalinks.Field(
57
+ description="Your step by step thinking process",
58
+ )
59
+ answer: str = synalinks.Field(
60
+ description="The correct answer",
61
+ )
62
+
63
+ return AnswerWithThinking, Query
64
+
65
+
66
+ @app.cell(hide_code=True)
67
+ def _(mo):
68
+ mo.md(
69
+ r"""
70
+ ## Functional API
71
+
72
+ You can program your application using 3 different ways, let's start with the
73
+ Functional way.
74
+
75
+ In this case, you start from `Input` and you chain modules calls to specify the
76
+ programs's structure, and finally, you create your program from inputs and outputs:
77
+ """
78
+ )
79
+ return
80
+
81
+
82
+ @app.cell
83
+ async def _(AnswerWithThinking, Query, synalinks):
84
+
85
+ language_model = synalinks.LanguageModel(
86
+ model="openai/gpt-4o-mini",
87
+ )
88
+
89
+ _x0 = synalinks.Input(data_model=Query)
90
+ _x1 = await synalinks.Generator(
91
+ data_model=AnswerWithThinking,
92
+ language_model=language_model,
93
+ )(_x0)
94
+
95
+ program = synalinks.Program(
96
+ inputs=_x0,
97
+ outputs=_x1,
98
+ name="chain_of_thought",
99
+ description="Usefull to answer in a step by step manner.",
100
+ )
101
+ return language_model, program
102
+
103
+
104
+ @app.cell
105
+ def _(program):
106
+ # You can print a summary of your program in a table format
107
+ # which is really usefull to have a quick overview of your application
108
+
109
+ program.summary()
110
+ return
111
+
112
+
113
+ @app.cell
114
+ def _(mo, program, synalinks):
115
+ # Or plot your program in a graph format
116
+
117
+ synalinks.utils.plot_program(
118
+ program,
119
+ show_module_names=True,
120
+ show_trainable=True,
121
+ show_schemas=True,
122
+ )
123
+ return
124
+
125
+
126
+ @app.cell(hide_code=True)
127
+ def _(mo):
128
+ mo.md(
129
+ r"""
130
+ ## Subclassing the `Program` class
131
+
132
+ Now let's try to program it using another method, subclassing the `Program`
133
+ class.
134
+
135
+ In that case, you should define your modules in `__init__()` and you should
136
+ implement the program's structure in `call()`.
137
+ """
138
+ )
139
+ return
140
+
141
+
142
+ @app.cell
143
+ def _(AnswerWithThinking, language_model, synalinks):
144
+
145
+ class ChainOfThought(synalinks.Program):
146
+ """Usefull to answer in a step by step manner.
147
+
148
+ The first line of the docstring is provided as description for the program
149
+ if not provided in the `super().__init__()`. In a similar way the name is
150
+ automatically infered based on the class name if not provided.
151
+ """
152
+
153
+ def __init__(self, language_model=None):
154
+ super().__init__()
155
+ self.answer = synalinks.Generator(
156
+ data_model=AnswerWithThinking, language_model=language_model
157
+ )
158
+
159
+ async def call(self, inputs, training=False):
160
+ x = await self.answer(inputs)
161
+ return x
162
+
163
+ def get_config(self):
164
+ config = {
165
+ "name": self.name,
166
+ "description": self.description,
167
+ "trainable": self.trainable,
168
+ }
169
+ language_model_config = {
170
+ "language_model": synalinks.saving.serialize_synalinks_object(
171
+ self.language_model
172
+ )
173
+ }
174
+ return {**config, **language_model_config}
175
+
176
+ @classmethod
177
+ def from_config(cls, config):
178
+ language_model = synalinks.saving.deserialize_synalinks_object(
179
+ config.pop("language_model")
180
+ )
181
+ return cls(language_model=language_model, **config)
182
+
183
+ program_1 = ChainOfThought(language_model=language_model)
184
+ return ChainOfThought, program_1
185
+
186
+
187
+ @app.cell
188
+ def _(program_1):
189
+ program_1.summary()
190
+ return
191
+
192
+
193
+ @app.cell(hide_code=True)
194
+ def _(mo):
195
+ mo.md(
196
+ r"""
197
+ Note that the program isn't actually built, this behavior is intended its
198
+ means that it can accept any king of input, making the program truly
199
+ generalizable. Now we can explore the last way of programming as well as
200
+ illustrate one of the key feature of Synalinks, composability.
201
+
202
+ ## Using `Sequential` program
203
+
204
+ In addition to the other ways of programming, `Sequential` is a special
205
+ case of programs where the program is purely a stack of single-input,
206
+ single-output modules.
207
+
208
+ In this example, we are going to re-use the `ChainOfThought` program that
209
+ we defined previously, illustrating the modularity of the framework.
210
+ """
211
+ )
212
+ return
213
+
214
+
215
+ @app.cell
216
+ def _(ChainOfThought, Query, language_model, synalinks):
217
+ program_2 = synalinks.Sequential(
218
+ [
219
+ synalinks.Input(data_model=Query),
220
+ ChainOfThought(language_model=language_model),
221
+ ],
222
+ name="chain_of_thought",
223
+ description="Usefull to answer in a step by step manner.",
224
+ )
225
+ program_2.summary()
226
+ return (program_2,)
227
+
228
+
229
+ @app.cell(hide_code=True)
230
+ def _(mo):
231
+ mo.md(
232
+ r"""
233
+ ## Running your programs
234
+
235
+ In order to run your program, you just have to call it with the input data model
236
+ as argument.
237
+ """
238
+ )
239
+ return
240
+
241
+
242
+ @app.cell(hide_code=True)
243
+ def _(mo):
244
+ openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
245
+ openai_api_key
246
+ return
247
+
248
+
249
+ @app.cell(hide_code=True)
250
+ def _(mo, openai_api_key):
251
+ import os
252
+ mo.stop(not openai_api_key.value)
253
+ os.environ["OPENAI_API_KEY"] = openai_api_key.value
254
+ return
255
+
256
+
257
+ @app.cell(hide_code=True)
258
+ def _(mo):
259
+ run_button = mo.ui.run_button(label="Run program")
260
+ run_button.center()
261
+ return run_button
262
+
263
+
264
+ @app.cell
265
+ async def _(Query, program_2):
266
+ mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
267
+ mo.stop(not run_button.value, mo.md("Click on the run button above"))
268
+
269
+ result = await program_2(
270
+ Query(query="What are the key aspects of human cognition?"),
271
+ )
272
+
273
+ print(result.pretty_json())
274
+
275
+ return (result,)
276
+
277
+
278
+ @app.cell(hide_code=True)
279
+ def _(mo):
280
+ mo.md(
281
+ r"""
282
+ ## Conclusion
283
+
284
+ Congratulations! You've successfully explored the fundamental concepts of programming
285
+ applications using Synalinks. By understanding and implementing the Functional API,
286
+ subclassing the `Program` class, and using `Sequential` programs, you've gained a
287
+ solid foundation in creating modular and composable applications.
288
+
289
+ Now that we know how to program applications, you can learn how to control
290
+ the data flow in the next notebook.
291
+
292
+ ### Key Takeaways
293
+
294
+ - **Functional API**: Allows you to chain modules to define the program's structure,
295
+ providing a clear and intuitive way to build applications.
296
+ - **Subclassing**: Offers flexibility and control by defining modules and implementing
297
+ the program's structure from scratch within a class.
298
+ - **Sequential Programs**: Simplifies the creation of linear workflows, making it easy
299
+ to stack single-input, single-output modules.
300
+ - **Modularity and Composability**: Enables the reuse of components, fostering creativity
301
+ and efficiency in application development.
302
+ """
303
+ )
304
+ return
305
+
306
+
307
+ if __name__ == "__main__":
308
+ app.run()
code_examples/1_basics/3_control_flow.py ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # Control Flow
21
+
22
+ Controlling the flow of information in a program is an essential feature of any LM framework.
23
+ In Synalinks, we implemented it in circuit-like fashion, where the flow of information can be
24
+ conditionaly or logically restricted to only flow in a subset of a computation graph.
25
+
26
+ ## Parallel Branches
27
+
28
+ To create parallel branches, all you need to do is using the same inputs when declaring the modules.
29
+ Then Synalinks will automatically detect them and run them in parrallel with asyncio.
30
+ """
31
+ )
32
+ return
33
+
34
+
35
+ @app.cell
36
+ async def _(synalinks):
37
+ class Query(synalinks.DataModel):
38
+ query: str = synalinks.Field(
39
+ description="The user query",
40
+ )
41
+
42
+ class AnswerWithThinking(synalinks.DataModel):
43
+ thinking: str = synalinks.Field(
44
+ description="Your step by step thinking process",
45
+ )
46
+ answer: str = synalinks.Field(
47
+ description="The correct answer",
48
+ )
49
+
50
+ language_model = synalinks.LanguageModel(model="ollama_chat/deepseek-r1")
51
+ _x0 = synalinks.Input(data_model=Query)
52
+ _x1 = await synalinks.Generator(
53
+ data_model=AnswerWithThinking,
54
+ language_model=language_model,
55
+ )(_x0)
56
+ _x2 = await synalinks.Generator(
57
+ data_model=AnswerWithThinking,
58
+ language_model=language_model,
59
+ )(_x0)
60
+
61
+ program = synalinks.Program(
62
+ inputs=_x0,
63
+ outputs=[_x1, _x2],
64
+ name="parallel_branches",
65
+ description="Illustrate the use of parallel branching",
66
+ )
67
+ return AnswerWithThinking, Query, language_model, program, synalinks
68
+
69
+
70
+ @app.cell
71
+ def _(mo, program, synalinks):
72
+ synalinks.utils.plot_program(
73
+ program,
74
+ show_module_names=True,
75
+ show_schemas=True,
76
+ show_trainable=True,
77
+ )
78
+ return
79
+
80
+
81
+ @app.cell(hide_code=True)
82
+ def _(mo):
83
+ mo.md(
84
+ r"""
85
+ ## Decisions
86
+
87
+ Decisions in Synalinks can be viewed as a single label classification, they allow
88
+ the system to classify the inputs based on a question and labels to choose from.
89
+ The labels are used to create on the fly a Enum schema that ensure, thanks to
90
+ constrained structured output, that the system will answer one of the provided labels.
91
+ """
92
+ )
93
+ return
94
+
95
+
96
+ @app.cell
97
+ async def _(Query, language_model, synalinks):
98
+ _x0 = synalinks.Input(data_model=Query)
99
+ _x1 = await synalinks.Decision(
100
+ question="Evaluate the difficulty to answer the provided query",
101
+ labels=["easy", "difficult"],
102
+ language_model=language_model,
103
+ )(_x0)
104
+
105
+ program_1 = synalinks.Program(
106
+ inputs=_x0,
107
+ outputs=_x1,
108
+ name="decision_making",
109
+ description="Illustrate the decision making process",
110
+ )
111
+ return (program_1,)
112
+
113
+
114
+ @app.cell
115
+ def _(mo, program_1, synalinks):
116
+ synalinks.utils.plot_program(
117
+ program_1,
118
+ show_module_names=True,
119
+ show_schemas=True,
120
+ show_trainable=True,
121
+ )
122
+ return
123
+
124
+
125
+ @app.cell(hide_code=True)
126
+ def _(mo):
127
+ mo.md(
128
+ r"""
129
+ ## Conditional Branches
130
+
131
+ To make conditional branches, we will need the help of a core module: The Branch
132
+ module. This module use a decision and route the input data model to the selected
133
+ branch. When a branch is not selected, that branch output a None.
134
+ """
135
+ )
136
+ return
137
+
138
+
139
+ @app.cell
140
+ async def _(AnswerWithThinking, Query, language_model, synalinks):
141
+ class Answer(synalinks.DataModel):
142
+ answer: str = synalinks.Field(
143
+ description="The correct answer",
144
+ )
145
+
146
+ _x0 = synalinks.Input(data_model=Query)
147
+ (_x1, _x2) = await synalinks.Branch(
148
+ question="Evaluate the difficulty to answer the provided query",
149
+ labels=["easy", "difficult"],
150
+ branches=[
151
+ synalinks.Generator(
152
+ data_model=Answer,
153
+ language_model=language_model,
154
+ ),
155
+ synalinks.Generator(
156
+ data_model=AnswerWithThinking,
157
+ language_model=language_model,
158
+ ),
159
+ ],
160
+ )(_x0)
161
+
162
+ program_2 = synalinks.Program(
163
+ inputs=_x0,
164
+ outputs=[_x1, _x2],
165
+ name="conditional_branches",
166
+ description="Illustrate the conditional branches",
167
+ )
168
+ return Answer, program_2
169
+
170
+
171
+ @app.cell
172
+ def _(mo, program_2, synalinks):
173
+ synalinks.utils.plot_program(
174
+ program_2,
175
+ show_module_names=True,
176
+ show_schemas=True,
177
+ show_trainable=True,
178
+ )
179
+ return
180
+
181
+
182
+ @app.cell(hide_code=True)
183
+ def _(mo):
184
+ mo.md(
185
+ r"""
186
+ ## Data Models Operators
187
+
188
+ Synalinks implement few operators that works with data models, some of them are
189
+ straightforward, like the concatenation, implemented in the Python `+` operator.
190
+ But others like the `logical_and` and `logical_or` implemented respectively
191
+ in the `&` and `|` operator are more difficult to grasp at first. As explained
192
+ above, in the conditional branches, the branch not selected will have a None
193
+ as output. To account that fact and to implement logical flows, we need operators
194
+ that can work with them. See the [Ops API](https://synalinks.github.io/synalinks/Synalinks%20API/Ops%20API/)
195
+ section for an extensive list of all data model operations.
196
+
197
+ ### Concatenation
198
+
199
+ The concatenation, consist in creating a data model that have the fields of both
200
+ inputs. When one of the input is `None`, it raise an exception. Note that you can
201
+ use the concatenation, like any other operator, at a meta-class level, meaning
202
+ you can actually concatenate data model types.
203
+
204
+ ### Concatenation Table
205
+ """
206
+ )
207
+ return
208
+
209
+
210
+ @app.cell(hide_code=True)
211
+ def _(mo):
212
+ mo.md(
213
+ r"""
214
+ | `x1` | `x2` | Concat (`+`) |
215
+ | ------ | ------ | ----------------- |
216
+ | `x1` | `x2` | `x1 + x2` |
217
+ | `x1` | `None` | `Exception` |
218
+ | `None` | `x2` | `Exception` |
219
+ | `None` | `None` | `Exception` |
220
+ """
221
+ ).center()
222
+ return
223
+
224
+
225
+ @app.cell(hide_code=True)
226
+ def _(mo):
227
+ mo.md(
228
+ r"""
229
+ ### Concatenation Example
230
+ """
231
+ )
232
+ return
233
+
234
+
235
+ @app.cell
236
+ async def _(AnswerWithThinking, Query, language_model, synalinks):
237
+ _x0 = synalinks.Input(data_model=Query)
238
+ _x1 = await synalinks.Generator(
239
+ data_model=AnswerWithThinking,
240
+ language_model=language_model,
241
+ )(_x0)
242
+ _x2 = await synalinks.Generator(
243
+ data_model=AnswerWithThinking,
244
+ language_model=language_model,
245
+ )(_x0)
246
+ _x3 = _x1 + _x2
247
+
248
+ program_3 = synalinks.Program(
249
+ inputs=_x0,
250
+ outputs=_x3,
251
+ name="concatenation",
252
+ description="Illustrate the use of concatenate",
253
+ )
254
+ return (program_3,)
255
+
256
+
257
+ @app.cell
258
+ def _(mo, program_3, synalinks):
259
+ synalinks.utils.plot_program(
260
+ program_3,
261
+ show_module_names=True,
262
+ show_schemas=True,
263
+ show_trainable=True,
264
+ )
265
+ return
266
+
267
+
268
+ @app.cell(hide_code=True)
269
+ def _(mo):
270
+ mo.md(
271
+ r"""
272
+ ### Logical And
273
+
274
+ The `logical_and` is a concatenation that instead of raising an `Exception`,
275
+ output a `None`. This operator should be used, when you have to concatenate
276
+ a data model with an another one that can be `None`, like a `Branch` output.
277
+
278
+ ### Logical And Table
279
+ """
280
+ )
281
+ return
282
+
283
+ @app.cell(hide_code=True)
284
+ def _(mo):
285
+ mo.md(
286
+ r"""
287
+ | `x1` | `x2` | Logical And (`&`) |
288
+ | ------ | ------ | ----------------- |
289
+ | `x1` | `x2` | `x1 + x2` |
290
+ | `x1` | `None` | `None` |
291
+ | `None` | `x2` | `None` |
292
+ | `None` | `None` | `None` |
293
+ """
294
+ ).center()
295
+ return
296
+
297
+
298
+ @app.cell(hide_code=True)
299
+ def _(mo):
300
+ mo.md(
301
+ r"""
302
+ ### Logical And Example
303
+ """
304
+ )
305
+ return
306
+
307
+ @app.cell
308
+ async def _(Answer, AnswerWithThinking, Query, language_model, synalinks):
309
+ class Critique(synalinks.DataModel):
310
+ critique: str = synalinks.Field(
311
+ description="The critique of the answer",
312
+ )
313
+
314
+ _x0 = synalinks.Input(data_model=Query)
315
+ (_x1, _x2) = await synalinks.Branch(
316
+ question="Evaluate the difficulty to answer the provided query",
317
+ labels=["easy", "difficult"],
318
+ branches=[
319
+ synalinks.Generator(
320
+ data_model=Answer,
321
+ language_model=language_model,
322
+ ),
323
+ synalinks.Generator(
324
+ data_model=AnswerWithThinking,
325
+ language_model=language_model,
326
+ ),
327
+ ],
328
+ return_decision=False,
329
+ )(_x0)
330
+ _x3 = _x0 & _x1
331
+ _x4 = _x0 & _x2
332
+ _x5 = await synalinks.Generator(
333
+ data_model=Critique,
334
+ language_model=language_model,
335
+ return_inputs=True,
336
+ )(_x3)
337
+ _x6 = await synalinks.Generator(
338
+ data_model=Critique,
339
+ language_model=language_model,
340
+ return_inputs=True,
341
+ )(_x4)
342
+ _x7 = _x5 | _x6
343
+ _x8 = await synalinks.Generator(
344
+ data_model=Answer,
345
+ language_model=language_model,
346
+ )(_x7)
347
+
348
+ program_4 = synalinks.Program(
349
+ inputs=_x0,
350
+ outputs=_x8,
351
+ name="logical_and",
352
+ description="Illustrate the use of logical and",
353
+ )
354
+ return Critique, program_4
355
+
356
+
357
+ @app.cell
358
+ def _(mo, program_4, synalinks):
359
+ synalinks.utils.plot_program(
360
+ program_4,
361
+ show_module_names=True,
362
+ show_schemas=True,
363
+ show_trainable=True,
364
+ )
365
+ return
366
+
367
+
368
+ @app.cell(hide_code=True)
369
+ def _(mo):
370
+ mo.md(
371
+ r"""
372
+ ### Logical Or
373
+
374
+ The `logical_or` is used when you want to combine two data models, but you can
375
+ accomodate that one of them is `None`. Another use, is to gather the outputs of
376
+ a `Branch`, as only one branch is active, it allows you merge the branches outputs
377
+ into a unique data model.
378
+
379
+
380
+ ### Logical Or Table
381
+ """
382
+ )
383
+ return
384
+
385
+
386
+ @app.cell(hide_code=True)
387
+ def _(mo):
388
+ mo.md(
389
+ r"""
390
+ | `x1` | `x2` | Logical Or (`|`) |
391
+ | ------ | ------ | ---------------- |
392
+ | `x1` | `x2` | `x1 + x2` |
393
+ | `x1` | `None` | `x1` |
394
+ | `None` | `x2` | `x2` |
395
+ | `None` | `None` | `None` |
396
+ """
397
+ ).center()
398
+ return
399
+
400
+
401
+ @app.cell(hide_code=True)
402
+ def _(mo):
403
+ mo.md(
404
+ r"""
405
+ ### Logical Or Example
406
+ """
407
+ )
408
+ return
409
+
410
+
411
+ @app.cell
412
+ async def _(Answer, AnswerWithThinking, Query, language_model, synalinks):
413
+ _x0 = synalinks.Input(data_model=Query)
414
+ (_x1, _x2) = await synalinks.Branch(
415
+ question="Evaluate the difficulty to answer the provided query",
416
+ labels=["easy", "difficult"],
417
+ branches=[
418
+ synalinks.Generator(
419
+ data_model=Answer,
420
+ language_model=language_model,
421
+ ),
422
+ synalinks.Generator(
423
+ data_model=AnswerWithThinking, language_model=language_model
424
+ ),
425
+ ],
426
+ return_decision=False,
427
+ )(_x0)
428
+ _x3 = _x1 | _x2
429
+
430
+ program_5 = synalinks.Program(
431
+ inputs=_x0,
432
+ outputs=_x3,
433
+ name="logical_or",
434
+ description="Illustrate the use of logical or",
435
+ )
436
+ return (program_5,)
437
+
438
+
439
+ @app.cell
440
+ def _(mo, program_5, synalinks):
441
+ synalinks.utils.plot_program(
442
+ program_5,
443
+ show_module_names=True,
444
+ show_schemas=True,
445
+ show_trainable=True,
446
+ )
447
+ return
448
+
449
+
450
+ @app.cell(hide_code=True)
451
+ async def _(mo):
452
+ mo.md(
453
+ r"""
454
+ ## Conclusion
455
+
456
+ In this notebook, we explored the fundamental concepts of controlling information
457
+ flow within Synalinks programs. We intoduced the creation of parallel branches,
458
+ decision-making processes, and conditional branching, all of which are essential
459
+ for building dynamic and robust applications.
460
+
461
+ ### Key Takeaways
462
+
463
+ - **Parallel Branches**: We demonstrated how to run modules in parallel using
464
+ the same inputs, leveraging asyncio for concurrent execution.
465
+ This approach enhances performance and allows for simultaneous processing of tasks.
466
+
467
+ - **Decision-Making**: We introduced decision-making as a form of single-label
468
+ classification, enabling the system to classify inputs based on predefined
469
+ questions and labels. This ensures that the system's responses are structured
470
+ and adhere to the specified schemas.
471
+
472
+ - **Conditional Branching**: We explored the use of the Branch module to route
473
+ input data models based on decisions, allowing for conditional execution of
474
+ branches. This feature is essential for creating adaptive and context-aware
475
+ applications.
476
+
477
+ - **Data Model Operators**: We discussed various data model operators, such as
478
+ concatenation, logical AND, and logical OR. These operators enable
479
+ sophisticated data manipulation and flow control, ensuring robust program
480
+ execution even when branches output None.
481
+ """
482
+ )
483
+ return
484
+
485
+
486
+ if __name__ == "__main__":
487
+ app.run()
code_examples/1_basics/4_conversational_applications.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # Conversational Applications
21
+
22
+ Synalinks is designed to handle conversational applications as well as
23
+ query-based systems. In the case of a conversational applications, the
24
+ input data model is a list of chat messages, and the output an individual
25
+ chat message. The `Program` is in that case responsible of handling a
26
+ **single conversation turn**.
27
+ """
28
+ )
29
+ return
30
+
31
+
32
+ @app.cell(hide_code=True)
33
+ def _(mo):
34
+ mo.md(
35
+ r"""
36
+ Now we can program our application like you would do with any `Program`. For this example,
37
+ we are going to make a very simple chatbot.
38
+
39
+ By default, if no data_model/schema is provided to the `Generator` it will output a `ChatMessage` like output.
40
+ If the data model is `None`, then you can enable streaming.
41
+
42
+ **Note:** Streaming is disabled during training and should only be used in the **last** `Generator` of your pipeline.
43
+ """
44
+ )
45
+ return
46
+
47
+
48
+ @app.cell
49
+ async def _(synalinks):
50
+ from synalinks.backend import ChatMessage
51
+ from synalinks.backend import ChatRole
52
+ from synalinks.backend import ChatMessages
53
+
54
+ language_model = synalinks.LanguageModel(
55
+ model="openai/gpt-4o-mini",
56
+ )
57
+
58
+ _x0 = synalinks.Input(data_model=ChatMessages)
59
+ _x1 = await synalinks.Generator(
60
+ language_model=language_model,
61
+ prompt_template=synalinks.chat_prompt_template(),
62
+ streaming=False, # Marimo chat don't handle streaming yet
63
+ )(_x0)
64
+
65
+ program = synalinks.Program(
66
+ inputs=_x0,
67
+ outputs=_x1,
68
+ )
69
+
70
+ # Let's plot this program to understand it
71
+
72
+ synalinks.utils.plot_program(
73
+ program,
74
+ show_module_names=True,
75
+ show_trainable=True,
76
+ show_schemas=True,
77
+ )
78
+ return ChatMessage, ChatMessages, ChatRole, language_model, program
79
+
80
+
81
+ @app.cell(hide_code=True)
82
+ def _(mo):
83
+ mo.md(
84
+ r"""
85
+ ## Running the chatbot inside the notebook
86
+
87
+ In this example, we will show you how to run the conversational application inside this reactive notebook.
88
+ """
89
+ )
90
+ return
91
+
92
+
93
+ @app.cell(hide_code=True)
94
+ def _(mo):
95
+ openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
96
+ openai_api_key
97
+ return
98
+
99
+
100
+ @app.cell(hide_code=True)
101
+ def _(mo, openai_api_key):
102
+ import os
103
+ mo.stop(not openai_api_key.value)
104
+ os.environ["OPENAI_API_KEY"] = openai_api_key.value
105
+ return
106
+
107
+
108
+ @app.cell(hide_code=True)
109
+ def _(ChatMessage, ChatMessages, ChatRole, mo, program):
110
+ mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
111
+
112
+ def cleanup_assistant_message(msg):
113
+ start_tok = '<span class="paragraph">'
114
+ end_tok = "</span>"
115
+ if msg.content.find(start_tok) > 0:
116
+ msg.content = msg.content[msg.content.find(start_tok) + len(start_tok) :]
117
+ if msg.content.find(end_tok, 1) > 0:
118
+ msg.content = msg.content[: msg.content.find(end_tok, 1)]
119
+ return msg
120
+
121
+ async def synalinks_program(messages, config):
122
+ chat_history = ChatMessages()
123
+ for msg in messages:
124
+ if msg.role == "user":
125
+ chat_history.messages.append(
126
+ ChatMessage(
127
+ role=ChatRole.USER,
128
+ content=msg.content,
129
+ )
130
+ )
131
+ else:
132
+ msg = cleanup_assistant_message(msg)
133
+ chat_history.messages.append(
134
+ ChatMessage(
135
+ role=ChatRole.ASSISTANT,
136
+ content=msg.content,
137
+ )
138
+ )
139
+ result = await program(chat_history)
140
+ return result.get("content")
141
+
142
+ chat = mo.ui.chat(synalinks_program)
143
+ chat
144
+ return chat, cleanup_assistant_message, synalinks_program
145
+
146
+
147
+ @app.cell(hide_code=True)
148
+ async def _(mo):
149
+ mo.md(
150
+ r"""
151
+ ## Conclusion
152
+
153
+ In this notebook, we explored how Synalinks handle conversational applications.
154
+ You have now a solid understanding to create chatbots and conversational agents.
155
+
156
+ ### Key Takeaways
157
+
158
+ - **Conversational Flow Management**: Synalinks effectively manages conversational
159
+ applications by handling inputs as a list of chat messages and generating
160
+ individual chat messages as outputs. This structure allows for efficient
161
+ processing of conversation turns.
162
+
163
+ - **Streaming and Real-Time Interaction**: Synalinks supports streaming for
164
+ real-time interactions, enhancing user engagement. However, streaming is
165
+ disabled during training and should be used only in the final `Generator`.
166
+
167
+ - **Customizable Prompt Templates**: The prompt templates can be tailored to fit
168
+ conversational contexts, guiding the language model to produce coherent and
169
+ relevant responses.
170
+ """
171
+ )
172
+ return
173
+
174
+
175
+ if __name__ == "__main__":
176
+ app.run()
code_examples/1_basics/5_rewards_metrics_and_optimizers.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # Rewards, Metrics & Optimizers
21
+
22
+ ## Understanding Rewards
23
+
24
+ `Reward`s are an essential part of reinforcement learning frameworks.
25
+ They are typically float values (usually between 0.0 and 1.0, but they can be
26
+ negative also) that guide the process into making more efficient decisions or
27
+ predictions. During training, the goal is to maximize the reward function.
28
+ The reward gives the system an indication of how well it performed for that task.
29
+ """
30
+ )
31
+ return
32
+
33
+
34
+ @app.cell(hide_code=True)
35
+ def _(mo):
36
+ mo.mermaid(
37
+ r"""
38
+ graph LR
39
+ A[Training Data] -->|Provide x:DataModel| B[Program];
40
+ B -->|Generate y_pred:JsonDataModel| C[Reward];
41
+ A -->|Provide y_true:DataModel| C;
42
+ C -->|Compute reward:Float| D[Optimizer];
43
+ D -->|Update trainable_variable:Variable| B;
44
+ """
45
+ )
46
+ return
47
+
48
+
49
+ @app.cell(hide_code=True)
50
+ def _(mo):
51
+ mo.md(
52
+ r"""
53
+ This reinforcement loop is what makes possible for the system to learn by
54
+ repeatedly making predictions and refining its knowledge/methodology in order
55
+ to maximize the reward.
56
+
57
+ All rewards consist of a function or program that takes two inputs:
58
+
59
+ - `y_pred`: The prediction of the program.
60
+ - `y_true`: The ground truth/target value provided by the training data.
61
+
62
+ In Synalinks, we provide for several built-in rewards but it is also possible to
63
+ easily create new rewards if you needs to. Overall the choice will depend on the
64
+ task to perform. You can have a look at the rewards provided in the
65
+ [API section](https://synalinks.github.io/synalinks/Synalinks%20API/Rewards/).
66
+
67
+ ### Understanding Metrics
68
+
69
+ `Metric`s are scalar values that are monitored during training and evaluation.
70
+ These values are used to know which program is best, in order to save it. Or to
71
+ provide additional information to compare different architectures with each others.
72
+ Unlike `Reward`s, a `Metric` is not used during training, meaning the metric value
73
+ is not backpropagated. Additionaly every reward function can be used as metric.
74
+ You can have a look at the metrics provided in the
75
+ [API section](https://synalinks.github.io/synalinks/Synalinks%20API/Metrics/).
76
+
77
+ ### Predictions Filtering
78
+
79
+ Sometimes, your program have to output a complex JSON but you want to evaluate
80
+ just part of it. This could be because your training data only include a subset
81
+ of the JSON, or because the additonal fields were added only to help the LMs.
82
+ In that case, you have to filter out or filter in your predictions and ground
83
+ truth. Meaning that you want to remove or keep respectively only specific fields
84
+ of your JSON data. This can be achieved by adding a `out_mask` or `in_mask` list
85
+ parameter containing the keys to remove or keep for evaluation. This parameters
86
+ can be added to both reward and metrics. Like in the above example where we only
87
+ keep the field `answer` to compute the rewards and metrics.
88
+
89
+ ### Understanding Optimizers
90
+
91
+ Optimizers are systems that handle the update of the module's state in order to
92
+ make them more performant. They are in charge of backpropagating the rewards
93
+ from the training process and select or generate examples and hints for the LMs.
94
+
95
+ Here is an example of program compilation, which is how you configure the reward,
96
+ metrics, and optimizer:
97
+ """
98
+ )
99
+ return
100
+
101
+
102
+ @app.cell
103
+ def _(synalinks):
104
+
105
+ class Query(synalinks.DataModel):
106
+ query: str = synalinks.Field(
107
+ description="The user query",
108
+ )
109
+
110
+ class AnswerWithThinking(synalinks.DataModel):
111
+ thinking: str = synalinks.Field(
112
+ description="Your step by step thinking process",
113
+ )
114
+ answer: str = synalinks.Field(
115
+ description="The correct answer",
116
+ )
117
+ return Query, AnswerWithThinking
118
+
119
+
120
+ @app.cell
121
+ async def _(synalinks):
122
+
123
+ language_model = synalinks.LanguageModel(
124
+ model="openai/gpt-4o-mini",
125
+ )
126
+
127
+ _x0 = synalinks.Input(data_model=Query)
128
+ _x1 = await synalinks.Generator(
129
+ data_model=AnswerWithThinking,
130
+ language_model=language_model,
131
+ )(_x0)
132
+
133
+ program = synalinks.Program(
134
+ inputs=_x0,
135
+ outputs=_x1,
136
+ name="chain_of_thought",
137
+ description="Usefull to answer in a step by step manner.",
138
+ )
139
+
140
+ program.compile(
141
+ reward=synalinks.rewards.CosineSimilarity(in_mask=["answer"]),
142
+ optimizer=synalinks.optimizers.RandomFewShot(),
143
+ metrics=[
144
+ synalinks.metrics.F1Score(in_mask=["answer"]),
145
+ ],
146
+ )
147
+ return program
148
+
149
+
150
+ @app.cell(hide_code=True)
151
+ async def _(mo):
152
+ mo.md(
153
+ r"""
154
+ ## Conclusion
155
+
156
+ In this notebook, we explored the fundamental concepts of training and
157
+ optimizing Synalinks programs using rewards, metrics, and optimizers.
158
+ These components are crucial for building efficient and adaptive language
159
+ model applications.
160
+
161
+ ### Key Takeaways
162
+
163
+ - **Rewards**: `Reward`s guide the reinforcement learning process by
164
+ providing feedback on the system's performance. They are typically
165
+ float values that indicate how well the system performed a task,
166
+ with the goal of maximizing the reward function during training.
167
+ Synalinks offers built-in rewards and allows for custom reward
168
+ functions to suit specific tasks.
169
+
170
+ - **Metrics**: `Metric`s are scalar values monitored during training
171
+ and evaluation to determine the best-performing program. Unlike
172
+ rewards, metrics are not used for backpropagation. They provide
173
+ additional insights for comparing different architectures and
174
+ saving the optimal model.
175
+
176
+ - **Optimizers**: `Optimizer`s update the module's state to improve
177
+ performance. They handle the backpropagation of rewards and
178
+ select or generate examples and hints for the language models.
179
+ Proper configuration of optimizers is essential for effective
180
+ training.
181
+
182
+ - **Filtering Outputs**: When dealing with complex JSON outputs,
183
+ filtering predictions and ground truths using `out_mask` or
184
+ `in_mask` parameters ensures that only relevant fields are
185
+ evaluated. This is particularly useful when the training data
186
+ includes a subset of the JSON or when additional fields are
187
+ used to aid the language models.
188
+ """
189
+ )
190
+ return
191
+
192
+
193
+ if __name__ == "__main__":
194
+ app.run()
code_examples/1_basics/6_training_programs.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # Training Programs
21
+
22
+ Like in machine learning, a LM application needs to be trained. In that case, we
23
+ don't update the weights of the model, but optimize the prompts by automatically
24
+ picking the best examples or generate hints in order to help the program to
25
+ perform better on your dataset.
26
+
27
+ For this lesson we are going to work on GSM8k a well known dataset of grade school
28
+ math word problems. Nowedays, most (all?) public datasets have been leaked, meaning
29
+ that their test set have been included in the LM trainset. This basically means
30
+ that the baseline score won't give you much information about the reasoning abilities
31
+ of the underlying language model (but more about its capability to remember),
32
+ however it is still interesing to have it as a baseline to evaluate the progress
33
+ of the programs training and the neuro-symbolic methods used or if you use small
34
+ models like here.
35
+
36
+ First, let's have a look at the dataset.
37
+ """
38
+ )
39
+ return
40
+
41
+
42
+ @app.cell
43
+ def _(synalinks):
44
+ gsm8k_input_data_model = synalinks.datasets.gsm8k.get_input_data_model()
45
+ print("GSM8K input schema:\n")
46
+ print(gsm8k_input_data_model.pretty_schema())
47
+ return (gsm8k_input_data_model,)
48
+
49
+
50
+ @app.cell
51
+ def _(synalinks):
52
+ gsm8k_output_data_model = synalinks.datasets.gsm8k.get_output_data_model()
53
+ print("GSM8K output schema:\n")
54
+ print(gsm8k_output_data_model.pretty_schema())
55
+ return (gsm8k_output_data_model,)
56
+
57
+
58
+ @app.cell(hide_code=True)
59
+ def _(mo):
60
+ mo.md(
61
+ r"""
62
+ ## Programming the pipeline
63
+
64
+ Now let's make a simple baseline program like in the first lessons
65
+ For this example we are going to use the data models from GSM8k.
66
+ """
67
+ )
68
+ return
69
+
70
+
71
+ @app.cell
72
+ async def _(gsm8k_input_data_model, gsm8k_output_data_model, synalinks):
73
+
74
+ language_model = synalinks.LanguageModel(
75
+ model="openai/gpt-4o-mini",
76
+ )
77
+
78
+ _x0 = synalinks.Input(data_model=gsm8k_input_data_model)
79
+ _x1 = await synalinks.Generator(
80
+ data_model=gsm8k_output_data_model,
81
+ language_model=language_model,
82
+ )(_x0)
83
+
84
+ program = synalinks.Program(
85
+ inputs=_x0,
86
+ outputs=_x1,
87
+ name="chain_of_thought",
88
+ description="Usefull to answer in a step by step manner.",
89
+ )
90
+ return language_model, program
91
+
92
+
93
+ @app.cell(hide_code=True)
94
+ def _(mo):
95
+ mo.md(
96
+ r"""
97
+ ## Compiling the program
98
+
99
+ For this example, we are going to select the `RandomFewShot` optimizer.
100
+ The reward fucntion will be `ExactMatch` masked to match only the numerical answer.
101
+ While the additional metric will be the `F1Score` masked to process only the LMs thinking.
102
+
103
+ This metric will give us an indication to see if the chain of thought match with the dataset one.
104
+ """
105
+ )
106
+ return
107
+
108
+
109
+ @app.cell
110
+ def _(program, synalinks):
111
+ program.compile(
112
+ optimizer=synalinks.optimizers.RandomFewShot(),
113
+ reward=synalinks.rewards.ExactMatch(in_mask=["answer"]),
114
+ metrics=[
115
+ synalinks.metrics.F1Score(in_mask=["thinking"]),
116
+ ],
117
+ )
118
+ return
119
+
120
+
121
+ @app.cell(hide_code=True)
122
+ def _(mo):
123
+ mo.md(
124
+ r"""
125
+ ## Training
126
+
127
+ ### What do "sample", "batch", and "epoch" mean?
128
+
129
+ - **Sample**: A sample is one element of a dataset. For example, one DataModel
130
+ is one sample.
131
+ - **Batch**: A batch is a set of N samples. The samples in a batch are processed
132
+ independently, in parallel. During training, a batch result in only one
133
+ program update. A batch approximates the input distribution better than a
134
+ single input. The larger the batch, the better the approximation; however a
135
+ larger batch will take longer to process and still result in only one update.
136
+ - **Epochs**: A epochs is an arbitrarly cutoff, generally defined as "one pass
137
+ over the entire dataset", used to separate training into distinct phases,
138
+ which is usefull for logging and periodic evaluation. When using
139
+ `validation_split` or `validation_data` with the `fit` method of Synalinks
140
+ programs, evaluation will be run at the end of every epoch.
141
+ """
142
+ )
143
+ return
144
+
145
+
146
+ @app.cell(hide_code=True)
147
+ def _(mo):
148
+ load_data = mo.ui.run_button(label="Load dataset")
149
+ load_data.center()
150
+ return (load_data,)
151
+
152
+
153
+ @app.cell
154
+ def _(load_data, mo, synalinks):
155
+ mo.stop(not load_data.value, mo.md("Click on the load button above"))
156
+ # Now we can load the dataset
157
+ with mo.status.spinner(title="Loading dataset...") as _spinner:
158
+ (x_train, y_train), (x_test, y_test) = synalinks.datasets.gsm8k.load_data()
159
+ _spinner.update("Done.")
160
+ return x_test, x_train, y_test, y_train
161
+
162
+
163
+ @app.cell(hide_code=True)
164
+ def _(mo, x_test, x_train):
165
+ epochs = mo.ui.slider(start=1, stop=64, value=5, label="Epochs")
166
+ batch_size = mo.ui.slider(start=1, stop=64, value=32, label="Batch size")
167
+ train_samples = mo.ui.slider(
168
+ start=1, stop=len(x_train), value=50, label="Train Samples"
169
+ )
170
+ test_samples = mo.ui.slider(start=1, stop=len(x_test), value=50, label="Test Samples")
171
+ return batch_size, epochs, test_samples, train_samples
172
+
173
+
174
+ @app.cell(hide_code=True)
175
+ def _(epochs):
176
+ mo.hstack([epochs, mo.md(f"Epochs: {epochs.value}")])
177
+ return
178
+
179
+ @app.cell(hide_code=True)
180
+ def _(batch_size):
181
+ mo.hstack([batch_size, mo.md(f"Batch size: {batch_size.value}")])
182
+ return
183
+
184
+ @app.cell(hide_code=True)
185
+ def _(train_samples):
186
+ mo.hstack([train_samples, mo.md(f"Nb train samples: {train_samples.value}")])
187
+ return
188
+
189
+ @app.cell(hide_code=True)
190
+ def _(test_samples):
191
+ mo.hstack([test_samples, mo.md(f"Nb test samples: {test_samples.value}")])
192
+ return
193
+
194
+ @app.cell(hide_code=True)
195
+ def _(mo):
196
+ openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
197
+ openai_api_key
198
+ return
199
+
200
+
201
+ @app.cell(hide_code=True)
202
+ def _(mo, openai_api_key):
203
+ import os
204
+ mo.stop(not openai_api_key.value)
205
+ os.environ["OPENAI_API_KEY"] = openai_api_key.value
206
+ return
207
+
208
+
209
+ @app.cell(hide_code=True)
210
+ def _(mo):
211
+ train_button = mo.ui.run_button(label="Train")
212
+ train_button.center()
213
+ return (train_button,)
214
+
215
+
216
+ @app.cell
217
+ async def train(
218
+ batch_size,
219
+ epochs,
220
+ mo,
221
+ program,
222
+ train_button,
223
+ synalinks,
224
+ test_samples,
225
+ train_samples,
226
+ x_test,
227
+ x_train,
228
+ y_test,
229
+ y_train,
230
+ ):
231
+ mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
232
+ mo.stop(not train_button.value, mo.md("Click on the train button above"))
233
+ # Where to save the best performing program
234
+ checkpoint_filepath = "checkpoint.program.json"
235
+
236
+ _program_checkpoint_callback = synalinks.callbacks.ProgramCheckpoint(
237
+ filepath=checkpoint_filepath,
238
+ monitor="val_reward",
239
+ mode="max",
240
+ save_best_only=True,
241
+ )
242
+
243
+ # For the purpose of the tutorial, we'll only train on the first N samples
244
+
245
+ history = await program.fit(
246
+ epochs=epochs.value,
247
+ batch_size=batch_size.value,
248
+ x=x_train[: train_samples.value],
249
+ y=y_train[: train_samples.value],
250
+ validation_data=(x_test[: test_samples.value], y_test[: test_samples.value]),
251
+ callbacks=[_program_checkpoint_callback],
252
+ )
253
+ return checkpoint_filepath, history
254
+
255
+
256
+ @app.cell
257
+ def _(history, synalinks):
258
+ synalinks.utils.plot_history(history)
259
+ return
260
+
261
+
262
+ @app.cell(hide_code=True)
263
+ def _(synalinks):
264
+ mo.md(
265
+ r"""
266
+ ## Evaluate Checkpoint
267
+ """
268
+ )
269
+ return
270
+
271
+
272
+ @app.cell
273
+ def _(
274
+ checkpoint_filepath,
275
+ train,
276
+ x_test,
277
+ y_test,
278
+ test_samples,
279
+ synalinks,
280
+ ):
281
+ # Load the JSON serialized program from disk
282
+ loaded_program = synalinks.Program.load(checkpoint_filepath)
283
+
284
+ metrics = loaded_program.evaluate(
285
+ x=x_test[: test_samples],
286
+ y=y_test[: test_samples],
287
+ )
288
+
289
+ synalinks.utils.plot_metrics(metrics)
290
+
291
+ @app.cell(hide_code=True)
292
+ async def _(mo):
293
+ mo.md(
294
+ r"""
295
+ ## Conclusion
296
+
297
+ In this notebook, we explored the process of training Synalinks programs
298
+ to optimize their performance on specific datasets. By leveraging the GSM8k
299
+ dataset of grade school math word problems, we demonstrated how to train a
300
+ language model application to improve its reasoning abilities and accuracy.
301
+
302
+ ### Key Takeaways
303
+
304
+ - **Rewards**: `Reward`s guide the reinforcement learning process by
305
+ providing feedback on the system's performance. They are typically
306
+ float values that indicate how well the system performed a task,
307
+ with the goal of maximizing the reward function during training.
308
+ Synalinks offers built-in rewards and allows for custom reward
309
+ functions to suit specific tasks.
310
+
311
+ - **Metrics**: `Metric`s are scalar values monitored during training
312
+ and evaluation to determine the best-performing program. Unlike
313
+ rewards, metrics are not used for backpropagation. They provide
314
+ additional insights for comparing different architectures and
315
+ saving the optimal model.
316
+
317
+ - **Optimizers**: `Optimizer`s update the module's state to improve
318
+ performance. They handle the backpropagation of rewards and
319
+ select or generate examples and hints for the language models.
320
+ Proper configuration of optimizers is essential for effective
321
+ training.
322
+
323
+ - **Filtering Outputs**: When dealing with complex JSON outputs,
324
+ filtering predictions and ground truths using `out_mask` or
325
+ `in_mask` parameters ensures that only relevant fields are
326
+ evaluated. This is particularly useful when the training data
327
+ includes a subset of the JSON or when additional fields are
328
+ used to aid the language models.
329
+ """
330
+ )
331
+ return
332
+
333
+
334
+ if __name__ == "__main__":
335
+ app.run()
code_examples/2_advanced/1_implementing_custom_modules_and_programs_via_subclassing.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import marimo
2
+
3
+ __generated_with = "0.11.9"
4
+ app = marimo.App()
5
+
6
+
7
+ @app.cell(hide_code=True)
8
+ def _():
9
+ import marimo as mo
10
+ import synalinks
11
+
12
+ synalinks.backend.clear_session()
13
+ return mo, synalinks
14
+
15
+
16
+ @app.cell(hide_code=True)
17
+ def _(mo):
18
+ mo.md(
19
+ r"""
20
+ # Implementing custom modules & programs via subclassing
21
+
22
+ This tutorial, is for more advanced users, it will cover how to
23
+ create custom modules/programs via subclassing.
24
+
25
+ In this tutorial, we will cover the following themes:
26
+
27
+ - The `Module` class
28
+ - The `add_variable()` method
29
+ - Trainable and non-trainable variables
30
+ - The `compute_output_spec()` and `build()` method
31
+ - The training argument in `call()`
32
+ - Making sure your module/program can be serialized
33
+
34
+ ---
35
+
36
+ One of the main abstraction of Synalinks is the `Module` class.
37
+ A `Module` encapsulate both a state (the module's variables) and
38
+ a transformation from inputs to outputs (the `call()` method).
39
+
40
+ For this tutorial, we are going to make a simple neuro-symbolic component
41
+ called `BacktrackingOfThought`. This component is an adaptation of the
42
+ famous backtracking algorithm, used a lot in symbolic planning/reasoning,
43
+ combined with chain of thought, nowadays most used technique to enhance
44
+ the LMs predicitons.
45
+
46
+ The principle is straitforward, the component will have to "think" then
47
+ we will critique at runtime the thinking and aggregate it to
48
+ the current chain of thinking only if it is above the given threshold.
49
+ This mechanism will allow the system to discard bad thinking to resume
50
+ at the previsous step. Additionally we will add a stop condition.
51
+
52
+ This algorithm a simplified version of the popular `TreeOfThought` that
53
+ instead of being a tree strucutre, is only a sequential chain of thinking.
54
+ """
55
+ )
56
+ return
57
+
58
+
59
+ @app.cell
60
+ def _(synalinks):
61
+
62
+ class Thinking(synalinks.DataModel):
63
+ thinking: str = synalinks.Field(
64
+ description="Your step by step thinking"
65
+ )
66
+
67
+ class CritiqueWithReward(synalinks.DataModel):
68
+ critique: str = synalinks.Field(description="The step by step critique")
69
+ reward: float = synalinks.Field(
70
+ description="The reward corresponding to the critique between [0.0, 1.0]",
71
+ le=1.0,
72
+ ge=0.0,
73
+ )
74
+
75
+ class BacktrackingOfThought(synalinks.Module):
76
+ def __init__(
77
+ self,
78
+ schema=None,
79
+ data_model=None,
80
+ language_model=None,
81
+ backtracking_threshold=0.5,
82
+ stop_threshold=0.8,
83
+ max_iterations=5,
84
+ critique_program=None,
85
+ prompt_template=None,
86
+ examples=None,
87
+ hints=None,
88
+ use_inputs_schema=False,
89
+ use_outputs_schema=False,
90
+ name=None,
91
+ description=None,
92
+ trainable=None,
93
+ ):
94
+ super().__init__(
95
+ name=name,
96
+ description=description,
97
+ trainable=trainable,
98
+ )
99
+ if not schema and data_model:
100
+ schema = data_model.schema()
101
+ self.schema = schema
102
+ self.language_model = language_model
103
+ self.backtracking_threshold = backtracking_threshold
104
+ self.stop_threshold = stop_threshold
105
+ self.max_iterations = max_iterations
106
+ self.critique_program = critique_program
107
+ self.prompt_template = prompt_template
108
+ self.examples = examples
109
+ self.hints = hints
110
+ self.use_inputs_schema = use_inputs_schema
111
+ self.use_outputs_schema = use_outputs_schema
112
+ if not self.critique_program:
113
+ # If no critique program is provided
114
+ # We compute the reward in the thinking step
115
+ thinking_data_model = (
116
+ Thinking
117
+ + synalinks.SymbolicDataModel(schema=self.schema)
118
+ + CritiqueWithReward
119
+ )
120
+ else:
121
+ thinking_data_model = Thinking + synalinks.SymbolicDataModel(
122
+ schema=self.schema
123
+ )
124
+ # This is for generating the intermediary steps
125
+ self.thinking = synalinks.Generator(
126
+ data_model=thinking_data_model,
127
+ language_model=self.language_model,
128
+ prompt_template=self.prompt_template,
129
+ examples=self.examples,
130
+ hints=self.hints,
131
+ use_inputs_schema=self.use_inputs_schema,
132
+ use_outputs_schema=self.use_outputs_schema,
133
+ name=self.name + "_thinking_generator",
134
+ )
135
+ # This is going to be the final generator
136
+ self.generator = synalinks.Generator(
137
+ schema=self.schema,
138
+ language_model=self.language_model,
139
+ prompt_template=self.prompt_template,
140
+ examples=self.examples,
141
+ hints=self.hints,
142
+ use_inputs_schema=self.use_inputs_schema,
143
+ use_outputs_schema=self.use_outputs_schema,
144
+ name=self.name + "_generator",
145
+ )
146
+
147
+ async def call(self, inputs, training=False):
148
+ if not inputs:
149
+ # This is to allow logical flows
150
+ # (don't run the module if no inputs provided)
151
+ return None
152
+ for i in self.max_iterations:
153
+ thinking = await self.thinking(inputs)
154
+ reward = 0.0
155
+ if self.critique_program:
156
+ critique = await self.critique_program(thinking)
157
+ reward = critique.get("reward")
158
+ else:
159
+ reward = thinking.get("reward")
160
+ if reward > self.backtracking_threshold:
161
+ if reward > self.stop_threshold:
162
+ break
163
+ inputs = await synalinks.ops.concat(
164
+ inputs,
165
+ thinking,
166
+ name=self.name + f"_inputs_with_thinking_{i}",
167
+ )
168
+ return await self.generator(inputs)
169
+
170
+ async def compute_output_spec(self, _, training=False):
171
+ return synalinks.SymbolicDataModel(self.schema)
172
+
173
+ def get_config(self):
174
+ config = {
175
+ "schema": self.schema,
176
+ "backtracking_threshold": self.backtracking_threshold,
177
+ "stop_threshold": self.stop_threshold,
178
+ "max_iterations": self.max_iterations,
179
+ "prompt_template": self.prompt_template,
180
+ "examples": self.examples,
181
+ "hints": self.hints,
182
+ "use_inputs_schema": self.use_inputs_schema,
183
+ "use_outputs_schema": self.use_outputs_schema,
184
+ "name": self.name,
185
+ "description": self.description,
186
+ "trainable": self.trainable,
187
+ }
188
+ language_model_config = {
189
+ "language_model": synalinks.saving.serialize_synalinks_object(
190
+ self.language_model,
191
+ )
192
+ }
193
+ if self.critique_program:
194
+ critique_program_config = {
195
+ "critique_program": synalinks.saving.serialize_synalinks_object(
196
+ self.critique_program,
197
+ )
198
+ }
199
+ else:
200
+ critique_program_config = {
201
+ "critique_program": None,
202
+ }
203
+ return {**config, **language_model_config, **critique_program_config}
204
+
205
+ @classmethod
206
+ def from_config(cls, config):
207
+ language_model = synalinks.saving.deserialize_synalinks_object(
208
+ config.pop("language_model")
209
+ )
210
+ if config.get("critique_program"):
211
+ critique_program = synalinks.saving.deserialize_synalinks_object(
212
+ config.pop("critique_program")
213
+ )
214
+ else:
215
+ critique_program = None
216
+ return cls(
217
+ language_model=language_model,
218
+ critique_program=critique_program,
219
+ **config,
220
+ )
221
+
222
+ return BacktrackingOfThought, CritiqueWithReward, Thinking
223
+
224
+
225
+ @app.cell(hide_code=True)
226
+ def _(mo):
227
+ mo.md(
228
+ r"""
229
+ ### The `__init__()` function
230
+
231
+ First, let's explain the `__init__()` function. When implementing modules that
232
+ use a `Generator`, you want to externalize the generator's parameters
233
+ (`prompt_template`, `hints`, `examples`, `use_inputs_schema`, `use_outputs_schema`)
234
+ to give maximum flexibility to your module when possible.
235
+ Then, you have to include the default arguments of a module (`name`, `description`, `trainable`)
236
+ that will be provided to the `super().__init__()`.
237
+ Although the name and description are inferred automatically it is a good practice to
238
+ let the user personalize them. The `trainable` argument, will indicate if the module
239
+ is frozen or not, meaning that their variables could be updated by the optimizer,
240
+ by default, a module should be trainable.
241
+
242
+ And finally, you can add any relevant information, weither for the initialization of
243
+ the variables, or a config parameter like here.
244
+
245
+ To add a variable to the module, you have to use the `add_variables` function,
246
+ this function can only be used in the `__init__()` or in the `build()` function.
247
+ The build function is usefull to create variables, or initialize your module/program
248
+ based on the actual inputs, that is not known at this stage, remember the module can
249
+ accept any inputs.
250
+
251
+ ### How to know when using a `Variable`?
252
+
253
+ As a rule of thumb, the variables should be anything that evolve over time during
254
+ inference/training. These variables could be updated by the module itself, or by
255
+ the optimizer if you have an optimizer designed for that. They will be serialized
256
+ when you save your program so you can recover the state of your program by loading
257
+ a JSON file. In this example, the variables are encapsulated in the `Generator`.
258
+
259
+ ### The `call()` function
260
+
261
+ The `call()` function is the core of the `Module` class. It defines the computation
262
+ performed at every call of the module.
263
+ This function takes `inputs` and an optional `training` argument, which indicates
264
+ whether the module is in training mode or not.
265
+
266
+ In the `BacktrackingOfThought` module, the `call()` function implements the
267
+ backtracking logic:
268
+
269
+ - It iterates up to `max_iterations` times.
270
+ - In each iteration, it generates a "thinking" step using the `thinking` generator.
271
+ - It then critiques the generated thinking using either a provided critique program or
272
+ a reward value embedded in the thinking step.
273
+ - If the reward exceeds the `backtracking_threshold`, the thinking step is concatenated
274
+ with the inputs for the next iteration.
275
+ - If the reward exceeds the `stop_threshold`, the iteration stops early.
276
+ - Finally, the `generator` produces the final output based on the accumulated inputs.
277
+
278
+ ### The `compute_output_spec()` function
279
+
280
+ The `compute_output_spec()` function is responsible for defining the output data model
281
+ of the module/program. It allows the system to understand the structure of the data
282
+ produced by this module.
283
+
284
+ In this example, `compute_output_spec()` returns a `SymbolicDataModel` based on the module's
285
+ schema, indicating the expected structure of the output data.
286
+
287
+ As a rule of thumb, if you access a data model field (using `get()`) you will have to
288
+ implement it otherwise, Synalinks will infer the output spec by running the call
289
+ function with symbolic data models. If you have any doubt, do not implement it and the system will
290
+ raise an error if you needs to.
291
+
292
+ ### Serialization and Deserialization
293
+
294
+ To ensure that your module can be saved and loaded correctly, you need to implement serialization
295
+ and deserialization methods. This is crucial for saving the state of your module, including
296
+ any trainable variables, and restoring it later.
297
+
298
+ - The `get_config()` method should return a dictionary containing all the information needed
299
+ to recreate the module. This includes the module's configuration and any serialized
300
+ sub-components like the language model or critique program in this case.
301
+ - The `from_config()` class method should be able to reconstruct the module from the
302
+ configuration dictionary returned by `get_config()`.
303
+
304
+ ## Conclusion
305
+
306
+ By following these guidelines, you can create custom modules in Synalinks that are flexible,
307
+ reusable, and can be integrated into larger programs. The `BacktrackingOfThought` module
308
+ demonstrates how to combine symbolic reasoning with language model predictions to enhance
309
+ the decision-making process.
310
+
311
+ ### Key Takeaways
312
+
313
+ - **Module Class**: The `Module` class in Synalinks encapsulates both state (variables)
314
+ and transformation logic (`call()` method), serving as a foundational abstraction for
315
+ building custom components.
316
+ - **Initialization and Variables**: The __init__() function initializes the module,
317
+ externalizing generator parameters for flexibility. Trainable and non-trainable
318
+ variables are managed using the add_variables function, ensuring that the
319
+ module's state can evolve over time and be serialized.
320
+ - **Call Function**: The `call()` function defines the core computation of the module,
321
+ handling inputs and producing outputs. In `BacktrackingOfThought`, it implements
322
+ backtracking logic, iteratively generating and critiquing thinking steps to refine
323
+ the output.
324
+ - **Output Specification**: The `compute_output_spec()` function defines the output data
325
+ model, allowing the system to understand the structure of the produced data.
326
+ Implementing this function is crucial when accessing data model fields directly.
327
+ - **Serialization**: Proper serialization and deserialization methods (`get_config()`
328
+ and `from_config()`) ensure that the module's state can be saved and restored,
329
+ facilitating reuse and integration into larger programs.
330
+ - **Flexibility and Reusability**: By following these guidelines, you can create
331
+ custom modules that are flexible, reusable, and easily integrated into neuro-symbolic
332
+ programs. The `BacktrackingOfThought` module exemplifies how to combine symbolic
333
+ reasoning with language models to improve decision-making processes.
334
+ """
335
+ )
336
+ return
337
+
338
+
339
+ if __name__ == "__main__":
340
+ app.run()
examples/chatbot.py DELETED
@@ -1,152 +0,0 @@
1
- import marimo
2
-
3
- __generated_with = "0.9.14"
4
- app = marimo.App(width="medium")
5
-
6
-
7
- @app.cell
8
- def __():
9
- import marimo as mo
10
- import os
11
- from huggingface_hub import InferenceClient
12
- return InferenceClient, mo, os
13
-
14
-
15
- @app.cell
16
- def __():
17
- MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
18
- return (MODEL_NAME,)
19
-
20
-
21
- @app.cell(hide_code=True)
22
- def __(MODEL_NAME, mo):
23
- mo.md(f"""
24
- # Chat with **{MODEL_NAME}**
25
- """)
26
- return
27
-
28
-
29
- @app.cell
30
- def __(max_tokens, mo, system_message, temperature, top_p):
31
- mo.hstack(
32
- [
33
- system_message,
34
- mo.vstack([temperature, top_p, max_tokens], align="end"),
35
- ],
36
- )
37
- return
38
-
39
-
40
- @app.cell
41
- def __(mo, respond):
42
- chat = mo.ui.chat(
43
- model=respond,
44
- prompts=["Tell me a joke.", "What is the square root of {{number}}?"],
45
- )
46
- chat
47
- return (chat,)
48
-
49
-
50
- @app.cell
51
- def __(InferenceClient, MODEL_NAME, os):
52
- """
53
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.26.2/en/guides/inference
54
- """
55
-
56
- hf_token = os.environ.get("HF_TOKEN")
57
- if not hf_token:
58
- print("HF_TOKEN not set, may have limited access.")
59
-
60
- client = InferenceClient(
61
- MODEL_NAME,
62
- token=hf_token,
63
- )
64
- return client, hf_token
65
-
66
-
67
- @app.cell
68
- def __(client, mo):
69
- # Create UI controls
70
- system_message = mo.ui.text_area(
71
- value="You are a friendly Chatbot.",
72
- label="System message",
73
- )
74
- max_tokens = mo.ui.slider(
75
- start=1,
76
- stop=2048,
77
- value=512,
78
- step=1,
79
- label="Max new tokens",
80
- show_value=True,
81
- )
82
- temperature = mo.ui.slider(
83
- start=0.1,
84
- stop=4.0,
85
- value=0.7,
86
- step=0.1,
87
- label="Temperature",
88
- show_value=True,
89
- )
90
- top_p = mo.ui.slider(
91
- start=0.1,
92
- stop=1.0,
93
- value=0.95,
94
- step=0.05,
95
- label="Top-p (nucleus sampling)",
96
- show_value=True,
97
- )
98
-
99
- # Add more configuration options if needed.
100
-
101
-
102
- # Create chat callback
103
- def respond(messages: list[mo.ai.ChatMessage], config):
104
- chat_messages = [{"role": "system", "content": system_message.value}]
105
-
106
- for message in messages:
107
- parts = []
108
- # Add text
109
- parts.append({"type": "text", "text": message.content})
110
-
111
- # Add attachments
112
- if message.attachments:
113
- for attachment in message.attachments:
114
- content_type = attachment.content_type or ""
115
- # This example only supports image attachments
116
- if content_type.startswith("image"):
117
- parts.append(
118
- {
119
- "type": "image_url",
120
- "image_url": {"url": attachment.url},
121
- }
122
- )
123
- else:
124
- raise ValueError(
125
- f"Unsupported content type {content_type}"
126
- )
127
-
128
- chat_messages.append({"role": message.role, "content": parts})
129
-
130
- response = client.chat_completion(
131
- chat_messages,
132
- max_tokens=max_tokens.value,
133
- temperature=temperature.value,
134
- top_p=top_p.value,
135
- stream=False,
136
- )
137
-
138
- # You can return strings, markdown, charts, tables, dataframes, and more.
139
- return response.choices[0].message.content
140
- return max_tokens, respond, system_message, temperature, top_p
141
-
142
-
143
- @app.cell
144
- def __():
145
- # If you need to do anything _reactively_ to the chat messages,
146
- # you can access the chat messages using the `chat.value` attribute.
147
- # chat.value
148
- return
149
-
150
-
151
- if __name__ == "__main__":
152
- app.run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/dataset_explorer.py DELETED
@@ -1,108 +0,0 @@
1
- import marimo
2
-
3
- __generated_with = "0.9.18"
4
- app = marimo.App(width="full")
5
-
6
-
7
- @app.cell
8
- def __():
9
- datasets = [
10
- # Add your own HF datasets
11
- "scikit-learn/iris/Iris.csv",
12
- "scikit-learn/adult-census-income/adult.csv",
13
- "scikit-learn/auto-mpg/auto-mpg.csv",
14
- "scikit-learn/credit-card-clients/UCI_Credit_Card.csv",
15
- "scikit-learn/Fish/Fish.csv",
16
- "scikit-learn/tips/tips.csv",
17
- ]
18
- return (datasets,)
19
-
20
-
21
- @app.cell(hide_code=True)
22
- def __(mo):
23
- mo.md(r"""## Select a dataset""")
24
- return
25
-
26
-
27
- @app.cell(hide_code=True)
28
- def __(datasets, mo):
29
- dataset = mo.ui.dropdown(datasets, value=datasets[0], label="Select a dataset")
30
- no_limit = mo.ui.switch(label="Limit 1000", value=True)
31
- mo.hstack([dataset, no_limit])
32
- return dataset, no_limit
33
-
34
-
35
- @app.cell
36
- def __(dataset, mo, no_limit):
37
- explore = mo.sql(
38
- f"""
39
- CREATE OR REPLACE TEMP TABLE explore
40
- AS (FROM 'hf://datasets/{dataset.value}')
41
- {'LIMIT 1000' if no_limit.value else ''};
42
-
43
- FROM explore;
44
- """
45
- )
46
- return (explore,)
47
-
48
-
49
- @app.cell(hide_code=True)
50
- def __(mo):
51
- mo.md(r"""## Summary""")
52
- return
53
-
54
-
55
- @app.cell(hide_code=True)
56
- def __(explore, mo):
57
- _schema = mo.accordion({"Schema": explore.schema})
58
-
59
- mo.md(f"""
60
- * Total rows: **{len(explore):,}**
61
- * Total columns: **{len(explore.columns)}**
62
-
63
- {_schema}
64
- """)
65
- return
66
-
67
-
68
- @app.cell
69
- def __(explore):
70
- explore.describe()
71
- return
72
-
73
-
74
- @app.cell(hide_code=True)
75
- def __(mo):
76
- mo.md("""## Manipulate the data""")
77
- return
78
-
79
-
80
- @app.cell
81
- def __(explore, mo):
82
- transformed = mo.ui.dataframe(explore)
83
- transformed
84
- return (transformed,)
85
-
86
-
87
- @app.cell(hide_code=True)
88
- def __(mo):
89
- mo.md(r"""## Explore the data""")
90
- return
91
-
92
-
93
- @app.cell
94
- def __(mo, transformed):
95
- mo.ui.data_explorer(transformed.value)
96
- return
97
-
98
-
99
- @app.cell(hide_code=True)
100
- def __():
101
- # Imports
102
- import marimo as mo
103
- import polars
104
- return mo, polars
105
-
106
-
107
- if __name__ == "__main__":
108
- app.run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,10 +1,4 @@
1
  huggingface-hub==0.26.2
2
  marimo[sql]
3
- polars
4
- altair
5
- openai
6
- pyarrow
7
- # Or a specific version
8
- # marimo>=0.9.0
9
-
10
- # Add other dependencies as needed
 
1
  huggingface-hub==0.26.2
2
  marimo[sql]
3
+ datasets
4
+ synalinks