synalinks-noteboooks

Running

App Files Files Community

YoanSallami commited on 23 days ago

Commit

fe643f6

1 Parent(s): 10db5b3

Add notebooks

Browse files

Files changed (12) hide show

Dockerfile +6 -4
README.md +5 -5
code_examples/1_basics/1_first_steps.py +192 -0
code_examples/1_basics/2_first_programs.py +308 -0
code_examples/1_basics/3_control_flow.py +487 -0
code_examples/1_basics/4_conversational_applications.py +176 -0
code_examples/1_basics/5_rewards_metrics_and_optimizers.py +194 -0
code_examples/1_basics/6_training_programs.py +335 -0
code_examples/2_advanced/1_implementing_custom_modules_and_programs_via_subclassing.py +340 -0
examples/chatbot.py +0 -152
examples/dataset_explorer.py +0 -108
requirements.txt +2 -8

Dockerfile CHANGED Viewed

@@ -10,6 +10,9 @@ ENV VIRTUAL_ENV=/opt/venv \
   PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
   HOME=/home/user
 # Install dependencies
 COPY --chown=user:user ./requirements.txt requirements.txt
 RUN uv venv $VIRTUAL_ENV \
@@ -31,7 +34,7 @@ RUN --mount=type=secret,id=MARIMO_PASSWORD \
 WORKDIR /data
 # Copy examples
-COPY --chown=user:user ./examples ./examples
 # Set user
 USER user
@@ -47,7 +50,6 @@ dataframes = "rich"
 theme = "light"
 ENDCONFIG
 # Uncomment to enable password protection
-# CMD marimo edit /data --host=0.0.0.0 --port=7860 --token-password=$(cat $HOME/.marimo_password)
-CMD marimo edit /data --host=0.0.0.0 --port=7860 --no-token

   PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
   HOME=/home/user
+# Install graphviz for dot visualization
+RUN apt update && apt install -y graphviz
 # Install dependencies
 COPY --chown=user:user ./requirements.txt requirements.txt
 RUN uv venv $VIRTUAL_ENV \
 WORKDIR /data
 # Copy examples
+COPY --chown=user:user ./code_examples ./code_examples
 # Set user
 USER user
 theme = "light"
 ENDCONFIG
 # Uncomment to enable password protection
+# CMD ["marimo", "edit", "/data", "--host=0.0.0.0", "--port=7860", "--token-password=$(cat $HOME/.marimo_password)"]
+CMD ["marimo", "edit", "/data", "--host=0.0.0.0", "--port=7860", "--no-token"]

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: marimo server template
-emoji: 📝
 colorFrom: yellow
 colorTo: blue
 sdk: docker
 pinned: true
 license: mit
-short_description: A marimo Space to edit marimo notebooks
 ---
-Check out marimo at <https://github.com/marimo-team/marimo>
-Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

 ---
+title: synalinks notebooks
+emoji: 🧠🔗
 colorFrom: yellow
 colorTo: blue
 sdk: docker
 pinned: true
 license: mit
+short_description: A marimo Space to edit Synalinks 🧠🔗 notebooks
 ---
+Check out the documentation at <https://synalinks.github.io/synalinks>
+Check out the repository at <https://github.com/SynaLinks/synalinks>

code_examples/1_basics/1_first_steps.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # First Steps
+        First, install Synalinks, the easiest way is using pip:
+        ```shell
+        pip install synalinks
+        ```
+        Or uv (recommended):
+        ```shell
+        uv pip install synalinks
+        ```
+        If you want to install it from source (for contributors), then do:
+        ```shell
+        git clone https://github.com/SynaLinks/Synalinks
+        cd Synalinks
+        ./shell/uv.sh # Install uv
+        ./shell/install.sh # Create the virtual env and install Synalinks
+        ```
+        After this, open a python file or notebook and check the install:
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    print(synalinks.__version__)
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        Synalinks use a global context to ensure that each variable/module
+        have a unique name. Clear it at the beginning of your scripts to
+        ensure naming reproductability.
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    synalinks.backend.clear_session()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        Addtionally, you can install Ollama [here](https://ollama.com/) to run
+        Language Models (LMs) locally. You can run these notebooks locally by
+        ## Prompting
+        You will notice that there is no traditional prompting involved in
+        Synalinks, everything is described as data models in and out.
+        However we use a prompt template, that will tell the system how to
+        construct the prompt automatically.
+        The prompt template is a jinja2 template that describe how to render
+        the examples, hints and how to convert them into chat messages:
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    print(synalinks.default_prompt_template())
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        If you are making a conversational application, we provide the following template to use.
+        To use it, provide this template to the `prompt_template` argument of your `Generator` module.
+        Note that this template only works if your module has a `ChatMessages` input.
+        """
+    )
+@app.cell
+def _(synalinks):
+    print(synalinks.chat_prompt_template())
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        The template use the XML tags `<system>...</system>`, `<user>...</user>` and
+        `<assistant>...</assistant>` to know how to convert the prompt template
+        into messages. You can modify the default template used by using the
+        `prompt_template` argument in Synalinks modules. You can notice also,
+        that we send the inputs's and output's JSON schema to instruct the LMs
+        how to answer, you can enable/disable that behavior by using `use_inputs_schema`
+        and `use_outputs_schema` in Synalinks modules. Synalinks use constrained
+        structured output ensuring that the LMs answer respect the data models
+        specification (the JSON schema), and is ready to parse, so in theory
+        we don't need it, except if you use it to provide additional information
+        to the LMs. You can find more information in the
+        [`Generator`](https://synalinks.github.io/synalinks/Synalinks%20API/Modules%20API/Core%20Modules/Generator%20module/) documentation.
+        ## Data Models
+        To provide additional information to the LMs, you can use the data models
+        `Field`. You can notice that Synalinks use Pydantic as default data backend.
+        Allowing Synalinks to be compatible out-of-the-box with structured output
+        and FastAPI.
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    class AnswerWithThinking(synalinks.DataModel):
+        thinking: str = synalinks.Field(
+            description="Your step by step thinking process",
+        )
+        answer: str = synalinks.Field(
+            description="The correct answer",
+        )
+    return (AnswerWithThinking,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Conclusion
+        Usually that will be enough to instruct the LMs, you don't need to modify
+        the prompt template. Just by adding additional descriptions to the data
+        models fields you can instruct your system to behave as you want.
+        If the system needs general instructions about how to behave, you can
+        use the `hints` argument in Synalinks modules that will be formatted as
+        presented in the prompt template.
+        ### Key Takeaways
+        - **Ease of Integration**: Synalinks seamlessly integrates with existing
+            Python projects, making it easy to incorporate advanced language
+            model capabilities without extensive modifications.
+        - **Structured Outputs**: By using data models and JSON schemas, Synalinks
+            ensures that the LMs responses are structured and ready for parsing,
+            reducing the need for additional post-processing.
+        - **Customizable Prompts**: The prompt templates in Synalinks are highly
+            customizable, allowing you to tailor the instructions provided to
+            the LMs based on your specific use case.
+        - **Compatibility**: Synalinks use Pydantic as the default data backend
+            ensures compatibility with structured output and FastAPI.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

code_examples/1_basics/2_first_programs.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Your first programs
+        The main concept of Synalinks, is that an application (we call it a `Program`)
+        is a computation graph with JSON data (called `JsonDataModel`) as edges and
+        `Operation`s as nodes. What set apart Synalinks from other similar frameworks
+        like DSPy or AdalFlow is that we focus on graph-based systems but also that
+        it allow users to declare the computation graph using a Functional API inherited
+        from [Keras](https://keras.io/).
+        About modules, similar to layers in deep learning applications, modules are
+        composable blocks that you can assemble in multiple ways. Providing a modular
+        and composable architecture to experiment and unlock creativity.
+        Note that each `Program` is also a `Module`! Allowing you to encapsulate them
+        as you want.
+        Many people think that what enabled the Deep Learning revolution was compute
+        and data, but in reality, frameworks also played a pivotal role as they enabled
+        researchers and engineers to create complex architectures without having to
+        re-implement everything from scatch.
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    # Now we can define the data models that we are going to use in the notebook.
+    # Note that Synalinks use Pydantic as default data backend, which is compatible with FastAPI and structured output.
+    class Query(synalinks.DataModel):
+        query: str = synalinks.Field(
+            description="The user query",
+        )
+    class AnswerWithThinking(synalinks.DataModel):
+        thinking: str = synalinks.Field(
+            description="Your step by step thinking process",
+        )
+        answer: str = synalinks.Field(
+            description="The correct answer",
+        )
+    return AnswerWithThinking, Query
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Functional API
+        You can program your application using 3 different ways, let's start with the
+        Functional way.
+        In this case, you start from `Input` and you chain modules calls to specify the
+        programs's structure, and finally, you create your program from inputs and outputs:
+        """
+    )
+    return
+@app.cell
+async def _(AnswerWithThinking, Query, synalinks):
+    language_model = synalinks.LanguageModel(
+        model="openai/gpt-4o-mini",
+    )
+    _x0 = synalinks.Input(data_model=Query)
+    _x1 = await synalinks.Generator(
+        data_model=AnswerWithThinking,
+        language_model=language_model,
+    )(_x0)
+    program = synalinks.Program(
+        inputs=_x0,
+        outputs=_x1,
+        name="chain_of_thought",
+        description="Usefull to answer in a step by step manner.",
+    )
+    return language_model, program
+@app.cell
+def _(program):
+    # You can print a summary of your program in a table format
+    # which is really usefull to have a quick overview of your application
+    program.summary()
+    return
+@app.cell
+def _(mo, program, synalinks):
+    # Or plot your program in a graph format
+    synalinks.utils.plot_program(
+        program,
+        show_module_names=True,
+        show_trainable=True,
+        show_schemas=True,
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Subclassing the `Program` class
+        Now let's try to program it using another method, subclassing the `Program`
+        class.
+        In that case, you should define your modules in `__init__()` and you should
+        implement the program's structure in `call()`.
+        """
+    )
+    return
+@app.cell
+def _(AnswerWithThinking, language_model, synalinks):
+    class ChainOfThought(synalinks.Program):
+        """Usefull to answer in a step by step manner.
+        The first line of the docstring is provided as description for the program
+        if not provided in the `super().__init__()`. In a similar way the name is
+        automatically infered based on the class name if not provided.
+        """
+        def __init__(self, language_model=None):
+            super().__init__()
+            self.answer = synalinks.Generator(
+                data_model=AnswerWithThinking, language_model=language_model
+            )
+        async def call(self, inputs, training=False):
+            x = await self.answer(inputs)
+            return x
+        def get_config(self):
+            config = {
+                "name": self.name,
+                "description": self.description,
+                "trainable": self.trainable,
+            }
+            language_model_config = {
+                "language_model": synalinks.saving.serialize_synalinks_object(
+                    self.language_model
+                )
+            }
+            return {**config, **language_model_config}
+        @classmethod
+        def from_config(cls, config):
+            language_model = synalinks.saving.deserialize_synalinks_object(
+                config.pop("language_model")
+            )
+            return cls(language_model=language_model, **config)
+    program_1 = ChainOfThought(language_model=language_model)
+    return ChainOfThought, program_1
+@app.cell
+def _(program_1):
+    program_1.summary()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        Note that the program isn't actually built, this behavior is intended its
+        means that it can accept any king of input, making the program truly
+        generalizable. Now we can explore the last way of programming as well as
+        illustrate one of the key feature of Synalinks, composability.
+        ## Using `Sequential` program
+        In addition to the other ways of programming, `Sequential` is a special
+        case of programs where the program is purely a stack of single-input,
+        single-output modules.
+        In this example, we are going to re-use the `ChainOfThought` program that
+        we defined previously, illustrating the modularity of the framework.
+        """
+    )
+    return
+@app.cell
+def _(ChainOfThought, Query, language_model, synalinks):
+    program_2 = synalinks.Sequential(
+        [
+            synalinks.Input(data_model=Query),
+            ChainOfThought(language_model=language_model),
+        ],
+        name="chain_of_thought",
+        description="Usefull to answer in a step by step manner.",
+    )
+    program_2.summary()
+    return (program_2,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Running your programs
+        In order to run your program, you just have to call it with the input data model
+        as argument.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
+    openai_api_key
+    return
+@app.cell(hide_code=True)
+def _(mo, openai_api_key):
+    import os
+    mo.stop(not openai_api_key.value)
+    os.environ["OPENAI_API_KEY"] = openai_api_key.value
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    run_button = mo.ui.run_button(label="Run program")
+    run_button.center()
+    return run_button
+@app.cell
+async def _(Query, program_2):
+    mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
+    mo.stop(not run_button.value, mo.md("Click on the run button above"))
+    result = await program_2(
+        Query(query="What are the key aspects of human cognition?"),
+    )
+    print(result.pretty_json())
+    return (result,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Conclusion
+        Congratulations! You've successfully explored the fundamental concepts of programming
+        applications using Synalinks. By understanding and implementing the Functional API,
+        subclassing the `Program` class, and using `Sequential` programs, you've gained a
+        solid foundation in creating modular and composable applications.
+        Now that we know how to program applications, you can learn how to control
+        the data flow in the next notebook.
+        ### Key Takeaways
+        - **Functional API**: Allows you to chain modules to define the program's structure,
+            providing a clear and intuitive way to build applications.
+        - **Subclassing**: Offers flexibility and control by defining modules and implementing
+            the program's structure from scratch within a class.
+        - **Sequential Programs**: Simplifies the creation of linear workflows, making it easy
+            to stack single-input, single-output modules.
+        - **Modularity and Composability**: Enables the reuse of components, fostering creativity
+            and efficiency in application development.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

code_examples/1_basics/3_control_flow.py ADDED Viewed

	@@ -0,0 +1,487 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Control Flow
+        Controlling the flow of information in a program is an essential feature of any LM framework.
+        In Synalinks, we implemented it in circuit-like fashion, where the flow of information can be
+        conditionaly or logically restricted to only flow in a subset of a computation graph.
+        ## Parallel Branches
+        To create parallel branches, all you need to do is using the same inputs when declaring the modules.
+        Then Synalinks will automatically detect them and run them in parrallel with asyncio.
+        """
+    )
+    return
+@app.cell
+async def _(synalinks):
+    class Query(synalinks.DataModel):
+        query: str = synalinks.Field(
+            description="The user query",
+        )
+    class AnswerWithThinking(synalinks.DataModel):
+        thinking: str = synalinks.Field(
+            description="Your step by step thinking process",
+        )
+        answer: str = synalinks.Field(
+            description="The correct answer",
+        )
+    language_model = synalinks.LanguageModel(model="ollama_chat/deepseek-r1")
+    _x0 = synalinks.Input(data_model=Query)
+    _x1 = await synalinks.Generator(
+        data_model=AnswerWithThinking,
+        language_model=language_model,
+    )(_x0)
+    _x2 = await synalinks.Generator(
+        data_model=AnswerWithThinking,
+        language_model=language_model,
+    )(_x0)
+    program = synalinks.Program(
+        inputs=_x0,
+        outputs=[_x1, _x2],
+        name="parallel_branches",
+        description="Illustrate the use of parallel branching",
+    )
+    return AnswerWithThinking, Query, language_model, program, synalinks
+@app.cell
+def _(mo, program, synalinks):
+    synalinks.utils.plot_program(
+        program,
+        show_module_names=True,
+        show_schemas=True,
+        show_trainable=True,
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Decisions
+        Decisions in Synalinks can be viewed as a single label classification, they allow
+        the system to classify the inputs based on a question and labels to choose from.
+        The labels are used to create on the fly a Enum schema that ensure, thanks to
+        constrained structured output, that the system will answer one of the provided labels.
+        """
+    )
+    return
+@app.cell
+async def _(Query, language_model, synalinks):
+    _x0 = synalinks.Input(data_model=Query)
+    _x1 = await synalinks.Decision(
+        question="Evaluate the difficulty to answer the provided query",
+        labels=["easy", "difficult"],
+        language_model=language_model,
+    )(_x0)
+    program_1 = synalinks.Program(
+        inputs=_x0,
+        outputs=_x1,
+        name="decision_making",
+        description="Illustrate the decision making process",
+    )
+    return (program_1,)
+@app.cell
+def _(mo, program_1, synalinks):
+    synalinks.utils.plot_program(
+        program_1,
+        show_module_names=True,
+        show_schemas=True,
+        show_trainable=True,
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Conditional Branches
+        To make conditional branches, we will need the help of a core module: The Branch
+        module. This module use a decision and route the input data model to the selected
+        branch. When a branch is not selected, that branch output a None.
+        """
+    )
+    return
+@app.cell
+async def _(AnswerWithThinking, Query, language_model, synalinks):
+    class Answer(synalinks.DataModel):
+        answer: str = synalinks.Field(
+            description="The correct answer",
+        )
+    _x0 = synalinks.Input(data_model=Query)
+    (_x1, _x2) = await synalinks.Branch(
+        question="Evaluate the difficulty to answer the provided query",
+        labels=["easy", "difficult"],
+        branches=[
+            synalinks.Generator(
+                data_model=Answer,
+                language_model=language_model,
+            ),
+            synalinks.Generator(
+                data_model=AnswerWithThinking,
+                language_model=language_model,
+            ),
+        ],
+    )(_x0)
+    program_2 = synalinks.Program(
+        inputs=_x0,
+        outputs=[_x1, _x2],
+        name="conditional_branches",
+        description="Illustrate the conditional branches",
+    )
+    return Answer, program_2
+@app.cell
+def _(mo, program_2, synalinks):
+    synalinks.utils.plot_program(
+        program_2,
+        show_module_names=True,
+        show_schemas=True,
+        show_trainable=True,
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Data Models Operators
+        Synalinks implement few operators that works with data models, some of them are
+        straightforward, like the concatenation, implemented in the Python `+` operator.
+        But others like the `logical_and` and `logical_or` implemented respectively
+        in the `&` and `|` operator are more difficult to grasp at first. As explained
+        above, in the conditional branches, the branch not selected will have a None
+        as output. To account that fact and to implement logical flows, we need operators
+        that can work with them. See the [Ops API](https://synalinks.github.io/synalinks/Synalinks%20API/Ops%20API/)
+        section for an extensive list of all data model operations.
+        ### Concatenation
+        The concatenation, consist in creating a data model that have the fields of both
+        inputs. When one of the input is `None`, it raise an exception. Note that you can
+        use the concatenation, like any other operator, at a meta-class level, meaning
+        you can actually concatenate data model types.
+        ### Concatenation Table
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        | `x1`   | `x2`   | Concat (`+`)      |
+        | ------ | ------ | ----------------- |
+        | `x1`   | `x2`   | `x1 + x2`         |
+        | `x1`   | `None` | `Exception`       |
+        | `None` | `x2`   | `Exception`       |
+        | `None` | `None` | `Exception`       |
+        """
+    ).center()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Concatenation Example
+        """
+    )
+    return
+@app.cell
+async def _(AnswerWithThinking, Query, language_model, synalinks):
+    _x0 = synalinks.Input(data_model=Query)
+    _x1 = await synalinks.Generator(
+        data_model=AnswerWithThinking,
+        language_model=language_model,
+    )(_x0)
+    _x2 = await synalinks.Generator(
+        data_model=AnswerWithThinking,
+        language_model=language_model,
+    )(_x0)
+    _x3 = _x1 + _x2
+    program_3 = synalinks.Program(
+        inputs=_x0,
+        outputs=_x3,
+        name="concatenation",
+        description="Illustrate the use of concatenate",
+    )
+    return (program_3,)
+@app.cell
+def _(mo, program_3, synalinks):
+    synalinks.utils.plot_program(
+        program_3,
+        show_module_names=True,
+        show_schemas=True,
+        show_trainable=True,
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Logical And
+        The `logical_and` is a concatenation that instead of raising an `Exception`,
+        output a `None`. This operator should be used, when you have to concatenate
+        a data model with an another one that can be `None`, like a `Branch` output.
+        ### Logical And Table
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        | `x1`   | `x2`   | Logical And (`&`) |
+        | ------ | ------ | ----------------- |
+        | `x1`   | `x2`   | `x1 + x2`         |
+        | `x1`   | `None` | `None`            |
+        | `None` | `x2`   | `None`            |
+        | `None` | `None` | `None`            |
+        """
+    ).center()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Logical And Example
+        """
+    )
+    return
+@app.cell
+async def _(Answer, AnswerWithThinking, Query, language_model, synalinks):
+    class Critique(synalinks.DataModel):
+        critique: str = synalinks.Field(
+            description="The critique of the answer",
+        )
+    _x0 = synalinks.Input(data_model=Query)
+    (_x1, _x2) = await synalinks.Branch(
+        question="Evaluate the difficulty to answer the provided query",
+        labels=["easy", "difficult"],
+        branches=[
+            synalinks.Generator(
+                data_model=Answer,
+                language_model=language_model,
+            ),
+            synalinks.Generator(
+                data_model=AnswerWithThinking,
+                language_model=language_model,
+            ),
+        ],
+        return_decision=False,
+    )(_x0)
+    _x3 = _x0 & _x1
+    _x4 = _x0 & _x2
+    _x5 = await synalinks.Generator(
+        data_model=Critique,
+        language_model=language_model,
+        return_inputs=True,
+    )(_x3)
+    _x6 = await synalinks.Generator(
+        data_model=Critique,
+        language_model=language_model,
+        return_inputs=True,
+    )(_x4)
+    _x7 = _x5 | _x6
+    _x8 = await synalinks.Generator(
+        data_model=Answer,
+        language_model=language_model,
+    )(_x7)
+    program_4 = synalinks.Program(
+        inputs=_x0,
+        outputs=_x8,
+        name="logical_and",
+        description="Illustrate the use of logical and",
+    )
+    return Critique, program_4
+@app.cell
+def _(mo, program_4, synalinks):
+    synalinks.utils.plot_program(
+        program_4,
+        show_module_names=True,
+        show_schemas=True,
+        show_trainable=True,
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Logical Or
+        The `logical_or` is used when you want to combine two data models, but you can
+        accomodate that one of them is `None`. Another use, is to gather the outputs of
+        a `Branch`, as only one branch is active, it allows you merge the branches outputs
+        into a unique data model.
+        ### Logical Or Table
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        | `x1`   | `x2`   | Logical Or (`|`) |
+        | ------ | ------ | ---------------- |
+        | `x1`   | `x2`   | `x1 + x2`        |
+        | `x1`   | `None` | `x1`             |
+        | `None` | `x2`   | `x2`             |
+        | `None` | `None` | `None`           |
+        """
+    ).center()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Logical Or Example
+        """
+    )
+    return
+@app.cell
+async def _(Answer, AnswerWithThinking, Query, language_model, synalinks):
+    _x0 = synalinks.Input(data_model=Query)
+    (_x1, _x2) = await synalinks.Branch(
+        question="Evaluate the difficulty to answer the provided query",
+        labels=["easy", "difficult"],
+        branches=[
+            synalinks.Generator(
+                data_model=Answer,
+                language_model=language_model,
+            ),
+            synalinks.Generator(
+                data_model=AnswerWithThinking, language_model=language_model
+            ),
+        ],
+        return_decision=False,
+    )(_x0)
+    _x3 = _x1 | _x2
+    program_5 = synalinks.Program(
+        inputs=_x0,
+        outputs=_x3,
+        name="logical_or",
+        description="Illustrate the use of logical or",
+    )
+    return (program_5,)
+@app.cell
+def _(mo, program_5, synalinks):
+    synalinks.utils.plot_program(
+        program_5,
+        show_module_names=True,
+        show_schemas=True,
+        show_trainable=True,
+    )
+    return
+@app.cell(hide_code=True)
+async def _(mo):
+    mo.md(
+        r"""
+        ## Conclusion
+        In this notebook, we explored the fundamental concepts of controlling information
+        flow within Synalinks programs. We intoduced the creation of parallel branches,
+        decision-making processes, and conditional branching, all of which are essential
+        for building dynamic and robust applications.
+        ### Key Takeaways
+        - **Parallel Branches**: We demonstrated how to run modules in parallel using
+            the same inputs, leveraging asyncio for concurrent execution.
+            This approach enhances performance and allows for simultaneous processing of tasks.
+        - **Decision-Making**: We introduced decision-making as a form of single-label
+            classification, enabling the system to classify inputs based on predefined
+            questions and labels. This ensures that the system's responses are structured
+            and adhere to the specified schemas.
+        - **Conditional Branching**: We explored the use of the Branch module to route
+            input data models based on decisions, allowing for conditional execution of
+            branches. This feature is essential for creating adaptive and context-aware
+            applications.
+        - **Data Model Operators**: We discussed various data model operators, such as
+            concatenation, logical AND, and logical OR. These operators enable
+            sophisticated data manipulation and flow control, ensuring robust program
+            execution even when branches output None.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

code_examples/1_basics/4_conversational_applications.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Conversational Applications
+        Synalinks is designed to handle conversational applications as well as
+        query-based systems. In the case of a conversational applications, the
+        input data model is a list of chat messages, and the output an individual
+        chat message. The `Program` is in that case responsible of handling a
+        **single conversation turn**.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        Now we can program our application like you would do with any `Program`. For this example,
+        we are going to make a very simple chatbot.
+        By default, if no data_model/schema is provided to the `Generator` it will output a `ChatMessage` like output.
+        If the data model is `None`, then you can enable streaming.
+        **Note:** Streaming is disabled during training and should only be used in the **last** `Generator` of your pipeline.
+        """
+    )
+    return
+@app.cell
+async def _(synalinks):
+    from synalinks.backend import ChatMessage
+    from synalinks.backend import ChatRole
+    from synalinks.backend import ChatMessages
+    language_model = synalinks.LanguageModel(
+        model="openai/gpt-4o-mini",
+    )
+    _x0 = synalinks.Input(data_model=ChatMessages)
+    _x1 = await synalinks.Generator(
+        language_model=language_model,
+        prompt_template=synalinks.chat_prompt_template(),
+        streaming=False,  # Marimo chat don't handle streaming yet
+    )(_x0)
+    program = synalinks.Program(
+        inputs=_x0,
+        outputs=_x1,
+    )
+    # Let's plot this program to understand it
+    synalinks.utils.plot_program(
+        program,
+        show_module_names=True,
+        show_trainable=True,
+        show_schemas=True,
+    )
+    return ChatMessage, ChatMessages, ChatRole, language_model, program
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Running the chatbot inside the notebook
+        In this example, we will show you how to run the conversational application inside this reactive notebook.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
+    openai_api_key
+    return
+@app.cell(hide_code=True)
+def _(mo, openai_api_key):
+    import os
+    mo.stop(not openai_api_key.value)
+    os.environ["OPENAI_API_KEY"] = openai_api_key.value
+    return
+@app.cell(hide_code=True)
+def _(ChatMessage, ChatMessages, ChatRole, mo, program):
+    mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
+    def cleanup_assistant_message(msg):
+        start_tok = '<span class="paragraph">'
+        end_tok = "</span>"
+        if msg.content.find(start_tok) > 0:
+            msg.content = msg.content[msg.content.find(start_tok) + len(start_tok) :]
+        if msg.content.find(end_tok, 1) > 0:
+            msg.content = msg.content[: msg.content.find(end_tok, 1)]
+        return msg
+    async def synalinks_program(messages, config):
+        chat_history = ChatMessages()
+        for msg in messages:
+            if msg.role == "user":
+                chat_history.messages.append(
+                    ChatMessage(
+                        role=ChatRole.USER,
+                        content=msg.content,
+                    )
+                )
+            else:
+                msg = cleanup_assistant_message(msg)
+                chat_history.messages.append(
+                    ChatMessage(
+                        role=ChatRole.ASSISTANT,
+                        content=msg.content,
+                    )
+                )
+        result = await program(chat_history)
+        return result.get("content")
+    chat = mo.ui.chat(synalinks_program)
+    chat
+    return chat, cleanup_assistant_message, synalinks_program
+@app.cell(hide_code=True)
+async def _(mo):
+    mo.md(
+        r"""
+        ## Conclusion
+        In this notebook, we explored how Synalinks handle conversational applications.
+        You have now a solid understanding to create chatbots and conversational agents.
+        ### Key Takeaways
+        - **Conversational Flow Management**: Synalinks effectively manages conversational
+            applications by handling inputs as a list of chat messages and generating
+            individual chat messages as outputs. This structure allows for efficient
+            processing of conversation turns.
+        - **Streaming and Real-Time Interaction**: Synalinks supports streaming for
+            real-time interactions, enhancing user engagement. However, streaming is
+            disabled during training and should be used only in the final `Generator`.
+        - **Customizable Prompt Templates**: The prompt templates can be tailored to fit
+            conversational contexts, guiding the language model to produce coherent and
+            relevant responses.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

code_examples/1_basics/5_rewards_metrics_and_optimizers.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Rewards, Metrics & Optimizers
+        ## Understanding Rewards
+        `Reward`s are an essential part of reinforcement learning frameworks.
+        They are typically float values (usually between 0.0 and 1.0, but they can be
+        negative also) that guide the process into making more efficient decisions or
+        predictions. During training, the goal is to maximize the reward function.
+        The reward gives the system an indication of how well it performed for that task.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.mermaid(
+        r"""
+        graph LR
+        A[Training Data] -->|Provide x:DataModel| B[Program];
+        B -->|Generate y_pred:JsonDataModel| C[Reward];
+        A -->|Provide y_true:DataModel| C;
+        C -->|Compute reward:Float| D[Optimizer];
+        D -->|Update trainable_variable:Variable| B;
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        This reinforcement loop is what makes possible for the system to learn by
+        repeatedly making predictions and refining its knowledge/methodology in order
+        to maximize the reward.
+        All rewards consist of a function or program that takes two inputs:
+        - `y_pred`: The prediction of the program.
+        - `y_true`: The ground truth/target value provided by the training data.
+        In Synalinks, we provide for several built-in rewards but it is also possible to
+        easily create new rewards if you needs to. Overall the choice will depend on the
+        task to perform. You can have a look at the rewards provided in the
+        [API section](https://synalinks.github.io/synalinks/Synalinks%20API/Rewards/).
+        ### Understanding Metrics
+        `Metric`s are scalar values that are monitored during training and evaluation.
+        These values are used to know which program is best, in order to save it. Or to
+        provide additional information to compare different architectures with each others.
+        Unlike `Reward`s, a `Metric` is not used during training, meaning the metric value
+        is not backpropagated. Additionaly every reward function can be used as metric.
+        You can have a look at the metrics provided in the
+        [API section](https://synalinks.github.io/synalinks/Synalinks%20API/Metrics/).
+        ### Predictions Filtering
+        Sometimes, your program have to output a complex JSON but you want to evaluate
+        just part of it. This could be because your training data only include a subset
+        of the JSON, or because the additonal fields were added only to help the LMs.
+        In that case, you have to filter out or filter in your predictions and ground
+        truth. Meaning that you want to remove or keep respectively only specific fields
+        of your JSON data. This can be achieved by adding a `out_mask` or `in_mask` list
+        parameter containing the keys to remove or keep for evaluation. This parameters
+        can be added to both reward and metrics. Like in the above example where we only
+        keep the field `answer` to compute the rewards and metrics.
+        ### Understanding Optimizers
+        Optimizers are systems that handle the update of the module's state in order to
+        make them more performant. They are in charge of backpropagating the rewards
+        from the training process and select or generate examples and hints for the LMs.
+        Here is an example of program compilation, which is how you configure the reward,
+        metrics, and optimizer:
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    class Query(synalinks.DataModel):
+        query: str = synalinks.Field(
+            description="The user query",
+        )
+    class AnswerWithThinking(synalinks.DataModel):
+        thinking: str = synalinks.Field(
+            description="Your step by step thinking process",
+        )
+        answer: str = synalinks.Field(
+            description="The correct answer",
+        )
+    return Query, AnswerWithThinking
+@app.cell
+async def _(synalinks):
+    language_model = synalinks.LanguageModel(
+        model="openai/gpt-4o-mini",
+    )
+    _x0 = synalinks.Input(data_model=Query)
+    _x1 = await synalinks.Generator(
+        data_model=AnswerWithThinking,
+        language_model=language_model,
+    )(_x0)
+    program = synalinks.Program(
+        inputs=_x0,
+        outputs=_x1,
+        name="chain_of_thought",
+        description="Usefull to answer in a step by step manner.",
+    )
+    program.compile(
+        reward=synalinks.rewards.CosineSimilarity(in_mask=["answer"]),
+        optimizer=synalinks.optimizers.RandomFewShot(),
+        metrics=[
+            synalinks.metrics.F1Score(in_mask=["answer"]),
+        ],
+    )
+    return program
+@app.cell(hide_code=True)
+async def _(mo):
+    mo.md(
+        r"""
+        ## Conclusion
+        In this notebook, we explored the fundamental concepts of training and
+        optimizing Synalinks programs using rewards, metrics, and optimizers.
+        These components are crucial for building efficient and adaptive language
+        model applications.
+        ### Key Takeaways
+        - **Rewards**: `Reward`s guide the reinforcement learning process by
+            providing feedback on the system's performance. They are typically
+            float values that indicate how well the system performed a task,
+            with the goal of maximizing the reward function during training.
+            Synalinks offers built-in rewards and allows for custom reward
+            functions to suit specific tasks.
+        - **Metrics**: `Metric`s are scalar values monitored during training
+            and evaluation to determine the best-performing program. Unlike
+            rewards, metrics are not used for backpropagation. They provide
+            additional insights for comparing different architectures and
+            saving the optimal model.
+        - **Optimizers**: `Optimizer`s update the module's state to improve
+            performance. They handle the backpropagation of rewards and
+            select or generate examples and hints for the language models.
+            Proper configuration of optimizers is essential for effective
+            training.
+        - **Filtering Outputs**: When dealing with complex JSON outputs,
+            filtering predictions and ground truths using `out_mask` or
+            `in_mask` parameters ensures that only relevant fields are
+            evaluated. This is particularly useful when the training data
+            includes a subset of the JSON or when additional fields are
+            used to aid the language models.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

code_examples/1_basics/6_training_programs.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Training Programs
+        Like in machine learning, a LM application needs to be trained. In that case, we
+        don't update the weights of the model, but optimize the prompts by automatically
+        picking the best examples or generate hints in order to help the program to
+        perform better on your dataset.
+        For this lesson we are going to work on GSM8k a well known dataset of grade school
+        math word problems. Nowedays, most (all?) public datasets have been leaked, meaning
+        that their test set have been included in the LM trainset. This basically means
+        that the baseline score won't give you much information about the reasoning abilities
+        of the underlying language model (but more about its capability to remember),
+        however it is still interesing to have it as a baseline to evaluate the progress
+        of the programs training and the neuro-symbolic methods used or if you use small
+        models like here.
+        First, let's have a look at the dataset.
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    gsm8k_input_data_model = synalinks.datasets.gsm8k.get_input_data_model()
+    print("GSM8K input schema:\n")
+    print(gsm8k_input_data_model.pretty_schema())
+    return (gsm8k_input_data_model,)
+@app.cell
+def _(synalinks):
+    gsm8k_output_data_model = synalinks.datasets.gsm8k.get_output_data_model()
+    print("GSM8K output schema:\n")
+    print(gsm8k_output_data_model.pretty_schema())
+    return (gsm8k_output_data_model,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Programming the pipeline
+        Now let's make a simple baseline program like in the first lessons
+        For this example we are going to use the data models from GSM8k.
+        """
+    )
+    return
+@app.cell
+async def _(gsm8k_input_data_model, gsm8k_output_data_model, synalinks):
+    language_model = synalinks.LanguageModel(
+        model="openai/gpt-4o-mini",
+    )
+    _x0 = synalinks.Input(data_model=gsm8k_input_data_model)
+    _x1 = await synalinks.Generator(
+        data_model=gsm8k_output_data_model,
+        language_model=language_model,
+    )(_x0)
+    program = synalinks.Program(
+        inputs=_x0,
+        outputs=_x1,
+        name="chain_of_thought",
+        description="Usefull to answer in a step by step manner.",
+    )
+    return language_model, program
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Compiling the program
+        For this example, we are going to select the `RandomFewShot` optimizer.
+        The reward fucntion will be `ExactMatch` masked to match only the numerical answer.
+        While the additional metric will be the `F1Score` masked to process only the LMs thinking.
+        This metric will give us an indication to see if the chain of thought match with the dataset one.
+        """
+    )
+    return
+@app.cell
+def _(program, synalinks):
+    program.compile(
+        optimizer=synalinks.optimizers.RandomFewShot(),
+        reward=synalinks.rewards.ExactMatch(in_mask=["answer"]),
+        metrics=[
+            synalinks.metrics.F1Score(in_mask=["thinking"]),
+        ],
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Training
+        ### What do "sample", "batch", and "epoch" mean?
+        - **Sample**: A sample is one element of a dataset. For example, one DataModel
+            is one sample.
+        - **Batch**: A batch is a set of N samples. The samples in a batch are processed
+            independently, in parallel. During training, a batch result in only one
+            program update. A batch approximates the input distribution better than a
+            single input. The larger the batch, the better the approximation; however a
+            larger batch will take longer to process and still result in only one update.
+        - **Epochs**: A epochs is an arbitrarly cutoff, generally defined as "one pass
+            over the entire dataset", used to separate training into distinct phases,
+            which is usefull for logging and periodic evaluation. When using
+            `validation_split` or `validation_data` with the `fit` method of Synalinks
+            programs, evaluation will be run at the end of every epoch.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    load_data = mo.ui.run_button(label="Load dataset")
+    load_data.center()
+    return (load_data,)
+@app.cell
+def _(load_data, mo, synalinks):
+    mo.stop(not load_data.value, mo.md("Click on the load button above"))
+    # Now we can load the dataset
+    with mo.status.spinner(title="Loading dataset...") as _spinner:
+        (x_train, y_train), (x_test, y_test) = synalinks.datasets.gsm8k.load_data()
+        _spinner.update("Done.")
+    return x_test, x_train, y_test, y_train
+@app.cell(hide_code=True)
+def _(mo, x_test, x_train):
+    epochs = mo.ui.slider(start=1, stop=64, value=5, label="Epochs")
+    batch_size = mo.ui.slider(start=1, stop=64, value=32, label="Batch size")
+    train_samples = mo.ui.slider(
+        start=1, stop=len(x_train), value=50, label="Train Samples"
+    )
+    test_samples = mo.ui.slider(start=1, stop=len(x_test), value=50, label="Test Samples")
+    return batch_size, epochs, test_samples, train_samples
+@app.cell(hide_code=True)
+def _(epochs):
+    mo.hstack([epochs, mo.md(f"Epochs: {epochs.value}")])
+    return
+@app.cell(hide_code=True)
+def _(batch_size):
+    mo.hstack([batch_size, mo.md(f"Batch size: {batch_size.value}")])
+    return
+@app.cell(hide_code=True)
+def _(train_samples):
+    mo.hstack([train_samples, mo.md(f"Nb train samples: {train_samples.value}")])
+    return
+@app.cell(hide_code=True)
+def _(test_samples):
+    mo.hstack([test_samples, mo.md(f"Nb test samples: {test_samples.value}")])
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    openai_api_key = mo.ui.text_area(placeholder="Your OpenAI API key...").form()
+    openai_api_key
+    return
+@app.cell(hide_code=True)
+def _(mo, openai_api_key):
+    import os
+    mo.stop(not openai_api_key.value)
+    os.environ["OPENAI_API_KEY"] = openai_api_key.value
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    train_button = mo.ui.run_button(label="Train")
+    train_button.center()
+    return (train_button,)
+@app.cell
+async def train(
+    batch_size,
+    epochs,
+    mo,
+    program,
+    train_button,
+    synalinks,
+    test_samples,
+    train_samples,
+    x_test,
+    x_train,
+    y_test,
+    y_train,
+):
+    mo.stop(not openai_api_key.value, mo.md("Provide your OpenAI API key"))
+    mo.stop(not train_button.value, mo.md("Click on the train button above"))
+    # Where to save the best performing program
+    checkpoint_filepath = "checkpoint.program.json"
+    _program_checkpoint_callback = synalinks.callbacks.ProgramCheckpoint(
+        filepath=checkpoint_filepath,
+        monitor="val_reward",
+        mode="max",
+        save_best_only=True,
+    )
+    # For the purpose of the tutorial, we'll only train on the first N samples
+    history = await program.fit(
+        epochs=epochs.value,
+        batch_size=batch_size.value,
+        x=x_train[: train_samples.value],
+        y=y_train[: train_samples.value],
+        validation_data=(x_test[: test_samples.value], y_test[: test_samples.value]),
+        callbacks=[_program_checkpoint_callback],
+    )
+    return checkpoint_filepath, history
+@app.cell
+def _(history, synalinks):
+    synalinks.utils.plot_history(history)
+    return
+@app.cell(hide_code=True)
+def _(synalinks):
+    mo.md(
+        r"""
+        ## Evaluate Checkpoint
+        """
+    )
+    return
+@app.cell
+def _(
+        checkpoint_filepath,
+        train,
+        x_test,
+        y_test,
+        test_samples,
+        synalinks,
+    ):
+    # Load the JSON serialized program from disk
+    loaded_program = synalinks.Program.load(checkpoint_filepath)
+    metrics = loaded_program.evaluate(
+        x=x_test[: test_samples],
+        y=y_test[: test_samples],
+    )
+    synalinks.utils.plot_metrics(metrics)
+@app.cell(hide_code=True)
+async def _(mo):
+    mo.md(
+        r"""
+        ## Conclusion
+        In this notebook, we explored the process of training Synalinks programs
+        to optimize their performance on specific datasets. By leveraging the GSM8k
+        dataset of grade school math word problems, we demonstrated how to train a
+        language model application to improve its reasoning abilities and accuracy.
+        ### Key Takeaways
+        - **Rewards**: `Reward`s guide the reinforcement learning process by
+            providing feedback on the system's performance. They are typically
+            float values that indicate how well the system performed a task,
+            with the goal of maximizing the reward function during training.
+            Synalinks offers built-in rewards and allows for custom reward
+            functions to suit specific tasks.
+        - **Metrics**: `Metric`s are scalar values monitored during training
+            and evaluation to determine the best-performing program. Unlike
+            rewards, metrics are not used for backpropagation. They provide
+            additional insights for comparing different architectures and
+            saving the optimal model.
+        - **Optimizers**: `Optimizer`s update the module's state to improve
+            performance. They handle the backpropagation of rewards and
+            select or generate examples and hints for the language models.
+            Proper configuration of optimizers is essential for effective
+            training.
+        - **Filtering Outputs**: When dealing with complex JSON outputs,
+            filtering predictions and ground truths using `out_mask` or
+            `in_mask` parameters ensures that only relevant fields are
+            evaluated. This is particularly useful when the training data
+            includes a subset of the JSON or when additional fields are
+            used to aid the language models.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

code_examples/2_advanced/1_implementing_custom_modules_and_programs_via_subclassing.py ADDED Viewed

	@@ -0,0 +1,340 @@

+import marimo
+__generated_with = "0.11.9"
+app = marimo.App()
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    import synalinks
+    synalinks.backend.clear_session()
+    return mo, synalinks
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Implementing custom modules & programs via subclassing
+        This tutorial, is for more advanced users, it will cover how to
+        create custom modules/programs via subclassing.
+        In this tutorial, we will cover the following themes:
+        - The `Module` class
+        - The `add_variable()` method
+        - Trainable and non-trainable variables
+        - The `compute_output_spec()` and `build()` method
+        - The training argument in `call()`
+        - Making sure your module/program can be serialized
+        ---
+        One of the main abstraction of Synalinks is the `Module` class.
+        A `Module` encapsulate both a state (the module's variables) and
+        a transformation from inputs to outputs (the `call()` method).
+        For this tutorial, we are going to make a simple neuro-symbolic component
+        called `BacktrackingOfThought`. This component is an adaptation of the
+        famous backtracking algorithm, used a lot in symbolic planning/reasoning,
+        combined with chain of thought, nowadays most used technique to enhance
+        the LMs predicitons.
+        The principle is straitforward, the component will have to "think" then
+        we will critique at runtime the thinking and aggregate it to
+        the current chain of thinking only if it is above the given threshold.
+        This mechanism will allow the system to discard bad thinking to resume
+        at the previsous step. Additionally we will add a stop condition.
+        This algorithm a simplified version of the popular `TreeOfThought` that
+        instead of being a tree strucutre, is only a sequential chain of thinking.
+        """
+    )
+    return
+@app.cell
+def _(synalinks):
+    class Thinking(synalinks.DataModel):
+        thinking: str = synalinks.Field(
+            description="Your step by step thinking"
+        )
+    class CritiqueWithReward(synalinks.DataModel):
+        critique: str = synalinks.Field(description="The step by step critique")
+        reward: float = synalinks.Field(
+            description="The reward corresponding to the critique between [0.0, 1.0]",
+            le=1.0,
+            ge=0.0,
+        )
+    class BacktrackingOfThought(synalinks.Module):
+        def __init__(
+            self,
+            schema=None,
+            data_model=None,
+            language_model=None,
+            backtracking_threshold=0.5,
+            stop_threshold=0.8,
+            max_iterations=5,
+            critique_program=None,
+            prompt_template=None,
+            examples=None,
+            hints=None,
+            use_inputs_schema=False,
+            use_outputs_schema=False,
+            name=None,
+            description=None,
+            trainable=None,
+        ):
+            super().__init__(
+                name=name,
+                description=description,
+                trainable=trainable,
+            )
+            if not schema and data_model:
+                schema = data_model.schema()
+            self.schema = schema
+            self.language_model = language_model
+            self.backtracking_threshold = backtracking_threshold
+            self.stop_threshold = stop_threshold
+            self.max_iterations = max_iterations
+            self.critique_program = critique_program
+            self.prompt_template = prompt_template
+            self.examples = examples
+            self.hints = hints
+            self.use_inputs_schema = use_inputs_schema
+            self.use_outputs_schema = use_outputs_schema
+            if not self.critique_program:
+                # If no critique program is provided
+                # We compute the reward in the thinking step
+                thinking_data_model = (
+                    Thinking
+                    + synalinks.SymbolicDataModel(schema=self.schema)
+                    + CritiqueWithReward
+                )
+            else:
+                thinking_data_model = Thinking + synalinks.SymbolicDataModel(
+                    schema=self.schema
+                )
+            # This is for generating the intermediary steps
+            self.thinking = synalinks.Generator(
+                data_model=thinking_data_model,
+                language_model=self.language_model,
+                prompt_template=self.prompt_template,
+                examples=self.examples,
+                hints=self.hints,
+                use_inputs_schema=self.use_inputs_schema,
+                use_outputs_schema=self.use_outputs_schema,
+                name=self.name + "_thinking_generator",
+            )
+            # This is going to be the final generator
+            self.generator = synalinks.Generator(
+                schema=self.schema,
+                language_model=self.language_model,
+                prompt_template=self.prompt_template,
+                examples=self.examples,
+                hints=self.hints,
+                use_inputs_schema=self.use_inputs_schema,
+                use_outputs_schema=self.use_outputs_schema,
+                name=self.name + "_generator",
+            )
+        async def call(self, inputs, training=False):
+            if not inputs:
+                # This is to allow logical flows
+                # (don't run the module if no inputs provided)
+                return None
+            for i in self.max_iterations:
+                thinking = await self.thinking(inputs)
+                reward = 0.0
+                if self.critique_program:
+                    critique = await self.critique_program(thinking)
+                    reward = critique.get("reward")
+                else:
+                    reward = thinking.get("reward")
+                if reward > self.backtracking_threshold:
+                    if reward > self.stop_threshold:
+                        break
+                    inputs = await synalinks.ops.concat(
+                        inputs,
+                        thinking,
+                        name=self.name + f"_inputs_with_thinking_{i}",
+                    )
+            return await self.generator(inputs)
+        async def compute_output_spec(self, _, training=False):
+            return synalinks.SymbolicDataModel(self.schema)
+        def get_config(self):
+            config = {
+                "schema": self.schema,
+                "backtracking_threshold": self.backtracking_threshold,
+                "stop_threshold": self.stop_threshold,
+                "max_iterations": self.max_iterations,
+                "prompt_template": self.prompt_template,
+                "examples": self.examples,
+                "hints": self.hints,
+                "use_inputs_schema": self.use_inputs_schema,
+                "use_outputs_schema": self.use_outputs_schema,
+                "name": self.name,
+                "description": self.description,
+                "trainable": self.trainable,
+            }
+            language_model_config = {
+                "language_model": synalinks.saving.serialize_synalinks_object(
+                    self.language_model,
+                )
+            }
+            if self.critique_program:
+                critique_program_config = {
+                    "critique_program": synalinks.saving.serialize_synalinks_object(
+                        self.critique_program,
+                    )
+                }
+            else:
+                critique_program_config = {
+                    "critique_program": None,
+                }
+            return {**config, **language_model_config, **critique_program_config}
+        @classmethod
+        def from_config(cls, config):
+            language_model = synalinks.saving.deserialize_synalinks_object(
+                config.pop("language_model")
+            )
+            if config.get("critique_program"):
+                critique_program = synalinks.saving.deserialize_synalinks_object(
+                    config.pop("critique_program")
+                )
+            else:
+                critique_program = None
+            return cls(
+                language_model=language_model,
+                critique_program=critique_program,
+                **config,
+            )
+    return BacktrackingOfThought, CritiqueWithReward, Thinking
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### The `__init__()` function
+        First, let's explain the `__init__()` function. When implementing modules that
+        use a `Generator`, you want to externalize the generator's parameters
+        (`prompt_template`, `hints`, `examples`, `use_inputs_schema`, `use_outputs_schema`)
+        to give maximum flexibility to your module when possible.
+        Then, you have to include the default arguments of a module (`name`, `description`, `trainable`)
+        that will be provided to the `super().__init__()`.
+        Although the name and description are inferred automatically it is a good practice to
+        let the user personalize them. The `trainable` argument, will indicate if the module
+        is frozen or not, meaning that their variables could be updated by the optimizer,
+        by default, a module should be trainable.
+        And finally, you can add any relevant information, weither for the initialization of
+        the variables, or a config parameter like here.
+        To add a variable to the module, you have to use the `add_variables` function,
+        this function can only be used in the `__init__()` or in the `build()` function.
+        The build function is usefull to create variables, or initialize your module/program
+        based on the actual inputs, that is not known at this stage, remember the module can
+        accept any inputs.
+        ### How to know when using a `Variable`?
+        As a rule of thumb, the variables should be anything that evolve over time during
+        inference/training. These variables could be updated by the module itself, or by
+        the optimizer if you have an optimizer designed for that. They will be serialized
+        when you save your program so you can recover the state of your program by loading
+        a JSON file. In this example, the variables are encapsulated in the `Generator`.
+        ### The `call()` function
+        The `call()` function is the core of the `Module` class. It defines the computation
+        performed at every call of the module.
+        This function takes `inputs` and an optional `training` argument, which indicates
+        whether the module is in training mode or not.
+        In the `BacktrackingOfThought` module, the `call()` function implements the
+        backtracking logic:
+        - It iterates up to `max_iterations` times.
+        - In each iteration, it generates a "thinking" step using the `thinking` generator.
+        - It then critiques the generated thinking using either a provided critique program or
+            a reward value embedded in the thinking step.
+        - If the reward exceeds the `backtracking_threshold`, the thinking step is concatenated
+            with the inputs for the next iteration.
+        - If the reward exceeds the `stop_threshold`, the iteration stops early.
+        - Finally, the `generator` produces the final output based on the accumulated inputs.
+        ### The `compute_output_spec()` function
+        The `compute_output_spec()` function is responsible for defining the output data model
+        of the module/program. It allows the system to understand the structure of the data
+        produced by this module.
+        In this example, `compute_output_spec()` returns a `SymbolicDataModel` based on the module's
+        schema, indicating the expected structure of the output data.
+        As a rule of thumb, if you access a data model field (using `get()`) you will have to
+        implement it otherwise, Synalinks will infer the output spec by running the call
+        function with symbolic data models. If you have any doubt, do not implement it and the system will
+        raise an error if you needs to.
+        ### Serialization and Deserialization
+        To ensure that your module can be saved and loaded correctly, you need to implement serialization
+        and deserialization methods. This is crucial for saving the state of your module, including
+        any trainable variables, and restoring it later.
+        - The `get_config()` method should return a dictionary containing all the information needed
+            to recreate the module. This includes the module's configuration and any serialized
+            sub-components like the language model or critique program in this case.
+        - The `from_config()` class method should be able to reconstruct the module from the
+            configuration dictionary returned by `get_config()`.
+        ## Conclusion
+        By following these guidelines, you can create custom modules in Synalinks that are flexible,
+        reusable, and can be integrated into larger programs. The `BacktrackingOfThought` module
+        demonstrates how to combine symbolic reasoning with language model predictions to enhance
+        the decision-making process.
+        ### Key Takeaways
+        - **Module Class**: The `Module` class in Synalinks encapsulates both state (variables)
+            and transformation logic (`call()` method), serving as a foundational abstraction for
+            building custom components.
+        - **Initialization and Variables**: The __init__() function initializes the module,
+            externalizing generator parameters for flexibility. Trainable and non-trainable
+            variables are managed using the add_variables function, ensuring that the
+            module's state can evolve over time and be serialized.
+        - **Call Function**: The `call()` function defines the core computation of the module,
+            handling inputs and producing outputs. In `BacktrackingOfThought`, it implements
+            backtracking logic, iteratively generating and critiquing thinking steps to refine
+            the output.
+        - **Output Specification**: The `compute_output_spec()` function defines the output data
+            model, allowing the system to understand the structure of the produced data.
+            Implementing this function is crucial when accessing data model fields directly.
+        - **Serialization**: Proper serialization and deserialization methods (`get_config()`
+            and `from_config()`) ensure that the module's state can be saved and restored,
+            facilitating reuse and integration into larger programs.
+        - **Flexibility and Reusability**:  By following these guidelines, you can create
+            custom modules that are flexible, reusable, and easily integrated into neuro-symbolic
+            programs. The `BacktrackingOfThought` module exemplifies how to combine symbolic
+            reasoning with language models to improve decision-making processes.
+        """
+    )
+    return
+if __name__ == "__main__":
+    app.run()

examples/chatbot.py DELETED Viewed

@@ -1,152 +0,0 @@
-import marimo
-__generated_with = "0.9.14"
-app = marimo.App(width="medium")
-@app.cell
-def __():
-    import marimo as mo
-    import os
-    from huggingface_hub import InferenceClient
-    return InferenceClient, mo, os
-@app.cell
-def __():
-    MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
-    return (MODEL_NAME,)
-@app.cell(hide_code=True)
-def __(MODEL_NAME, mo):
-    mo.md(f"""
-    # Chat with **{MODEL_NAME}**
-    """)
-    return
-@app.cell
-def __(max_tokens, mo, system_message, temperature, top_p):
-    mo.hstack(
-        [
-            system_message,
-            mo.vstack([temperature, top_p, max_tokens], align="end"),
-        ],
-    )
-    return
-@app.cell
-def __(mo, respond):
-    chat = mo.ui.chat(
-        model=respond,
-        prompts=["Tell me a joke.", "What is the square root of {{number}}?"],
-    )
-    chat
-    return (chat,)
-@app.cell
-def __(InferenceClient, MODEL_NAME, os):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.26.2/en/guides/inference
-    """
-    hf_token = os.environ.get("HF_TOKEN")
-    if not hf_token:
-        print("HF_TOKEN not set, may have limited access.")
-    client = InferenceClient(
-        MODEL_NAME,
-        token=hf_token,
-    )
-    return client, hf_token
-@app.cell
-def __(client, mo):
-    # Create UI controls
-    system_message = mo.ui.text_area(
-        value="You are a friendly Chatbot.",
-        label="System message",
-    )
-    max_tokens = mo.ui.slider(
-        start=1,
-        stop=2048,
-        value=512,
-        step=1,
-        label="Max new tokens",
-        show_value=True,
-    )
-    temperature = mo.ui.slider(
-        start=0.1,
-        stop=4.0,
-        value=0.7,
-        step=0.1,
-        label="Temperature",
-        show_value=True,
-    )
-    top_p = mo.ui.slider(
-        start=0.1,
-        stop=1.0,
-        value=0.95,
-        step=0.05,
-        label="Top-p (nucleus sampling)",
-        show_value=True,
-    )
-    # Add more configuration options if needed.
-    # Create chat callback
-    def respond(messages: list[mo.ai.ChatMessage], config):
-        chat_messages = [{"role": "system", "content": system_message.value}]
-        for message in messages:
-            parts = []
-            # Add text
-            parts.append({"type": "text", "text": message.content})
-            # Add attachments
-            if message.attachments:
-                for attachment in message.attachments:
-                    content_type = attachment.content_type or ""
-                    # This example only supports image attachments
-                    if content_type.startswith("image"):
-                        parts.append(
-                            {
-                                "type": "image_url",
-                                "image_url": {"url": attachment.url},
-                            }
-                        )
-                    else:
-                        raise ValueError(
-                            f"Unsupported content type {content_type}"
-                        )
-            chat_messages.append({"role": message.role, "content": parts})
-        response = client.chat_completion(
-            chat_messages,
-            max_tokens=max_tokens.value,
-            temperature=temperature.value,
-            top_p=top_p.value,
-            stream=False,
-        )
-        # You can return strings, markdown, charts, tables, dataframes, and more.
-        return response.choices[0].message.content
-    return max_tokens, respond, system_message, temperature, top_p
-@app.cell
-def __():
-    # If you need to do anything _reactively_ to the chat messages,
-    # you can access the chat messages using the `chat.value` attribute.
-    # chat.value
-    return
-if __name__ == "__main__":
-    app.run()

examples/dataset_explorer.py DELETED Viewed

@@ -1,108 +0,0 @@
-import marimo
-__generated_with = "0.9.18"
-app = marimo.App(width="full")
-@app.cell
-def __():
-    datasets = [
-        # Add your own HF datasets
-        "scikit-learn/iris/Iris.csv",
-        "scikit-learn/adult-census-income/adult.csv",
-        "scikit-learn/auto-mpg/auto-mpg.csv",
-        "scikit-learn/credit-card-clients/UCI_Credit_Card.csv",
-        "scikit-learn/Fish/Fish.csv",
-        "scikit-learn/tips/tips.csv",
-    ]
-    return (datasets,)
-@app.cell(hide_code=True)
-def __(mo):
-    mo.md(r"""## Select a dataset""")
-    return
-@app.cell(hide_code=True)
-def __(datasets, mo):
-    dataset = mo.ui.dropdown(datasets, value=datasets[0], label="Select a dataset")
-    no_limit = mo.ui.switch(label="Limit 1000", value=True)
-    mo.hstack([dataset, no_limit])
-    return dataset, no_limit
-@app.cell
-def __(dataset, mo, no_limit):
-    explore = mo.sql(
-        f"""
-        CREATE OR REPLACE TEMP TABLE explore
-        AS (FROM 'hf://datasets/{dataset.value}')
-        {'LIMIT 1000' if no_limit.value else ''};
-        FROM explore;
-        """
-    )
-    return (explore,)
-@app.cell(hide_code=True)
-def __(mo):
-    mo.md(r"""## Summary""")
-    return
-@app.cell(hide_code=True)
-def __(explore, mo):
-    _schema = mo.accordion({"Schema": explore.schema})
-    mo.md(f"""
-    * Total rows: **{len(explore):,}**
-    * Total columns: **{len(explore.columns)}**
-    {_schema}
-    """)
-    return
-@app.cell
-def __(explore):
-    explore.describe()
-    return
-@app.cell(hide_code=True)
-def __(mo):
-    mo.md("""## Manipulate the data""")
-    return
-@app.cell
-def __(explore, mo):
-    transformed = mo.ui.dataframe(explore)
-    transformed
-    return (transformed,)
-@app.cell(hide_code=True)
-def __(mo):
-    mo.md(r"""## Explore the data""")
-    return
-@app.cell
-def __(mo, transformed):
-    mo.ui.data_explorer(transformed.value)
-    return
-@app.cell(hide_code=True)
-def __():
-    # Imports
-    import marimo as mo
-    import polars
-    return mo, polars
-if __name__ == "__main__":
-    app.run()

requirements.txt CHANGED Viewed

@@ -1,10 +1,4 @@
 huggingface-hub==0.26.2
 marimo[sql]
-polars
-altair
-openai
-pyarrow
-# Or a specific version
-# marimo>=0.9.0
-# Add other dependencies as needed

 huggingface-hub==0.26.2
 marimo[sql]
+datasets
+synalinks