File size: 6,144 Bytes
b3d9a99
3d94e2c
 
b3d9a99
 
 
3d94e2c
 
 
b0ba900
 
 
3d94e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ba900
3d94e2c
b0ba900
3d94e2c
 
 
 
1d9995e
3d94e2c
b0ba900
3d94e2c
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
 
 
b0ba900
e05e85c
3d94e2c
b0ba900
 
 
b3d9a99
 
b0ba900
 
 
3d94e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ba900
 
e05e85c
b0ba900
3d94e2c
b0ba900
e05e85c
3d94e2c
e05e85c
3d94e2c
e05e85c
3d94e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
b0ba900
3d94e2c
 
b0ba900
b3d9a99
3d94e2c
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
1d9995e
e05e85c
3d94e2c
1d9995e
3d94e2c
 
 
 
 
e05e85c
3d94e2c
e05e85c
 
3d94e2c
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ba900
3d94e2c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
using System;
using System.Collections.Generic;
using System.Text;
using Newtonsoft.Json;
using Unity.InferenceEngine;
using UnityEngine;

public class RunTinyStories : MonoBehaviour
{
    public ModelAsset modelAsset;
    public TextAsset vocabAsset;
    public TextAsset mergesAsset;
    const BackendType backend = BackendType.GPUCompute;

    //string outputString = "Once upon a time, there were three bears";
    string outputString = "One day an alien came down from Mars. It saw a chicken";

    // This is how many tokens you want. It can be adjusted.
    const int maxTokens = 100;

    //Make this smaller for more randomness
    const float predictability = 5f;

    //Special tokens
    const int END_OF_TEXT = 50256;

    //Store the vocabulary
    string[] tokens;

    Worker engine;

    int currentToken;
    int[] outputTokens = new int[maxTokens];

    // Used for special character decoding
    int[] whiteSpaceCharacters = new int[256];
    int[] encodedCharacters = new int[256];

    bool runInference;

    //stop after this many tokens
    const int stopAfter = 100;

    int totalTokens;

    string[] merges;
    Dictionary<string, int> vocab;

    void Start()
    {
        SetupWhiteSpaceShifts();

        LoadVocabulary();

        var model1 = ModelLoader.Load(modelAsset);
        //Create a new model to select the random token:

        var graph = new FunctionalGraph();
        var input = graph.AddInput(model1, 0);
        var currentTokenInput = graph.AddInput<int>(new TensorShape(), "currentToken");
        var row = Functional.Select(Functional.Forward(model1, input)[0], 1, currentTokenInput);
        var output = Functional.Multinomial(predictability * row, 1);
        var model2 = graph.Compile(output);

        engine = new Worker(model2, backend);

        DecodePrompt(outputString);

        runInference = true;
    }

    // Update is called once per frame
    void Update()
    {
        if (runInference)
        {
            RunInference();
        }
    }

    void RunInference()
    {
        using var tokensSoFar = new Tensor<int>(new TensorShape(1, maxTokens), outputTokens);
        using var index = new Tensor<int>(new TensorShape(), new[] { currentToken });

        engine.Schedule(tokensSoFar, index);

        using var probs = (engine.PeekOutput() as Tensor<int>).ReadbackAndClone();
        Debug.Log(probs.shape);

        int ID = probs[0];

        //shift window down if got to the end
        if (currentToken >= maxTokens - 1)
        {
            for (int i = 0; i < maxTokens - 1; i++) outputTokens[i] = outputTokens[i + 1];
            currentToken--;
        }

        outputTokens[++currentToken] = ID;
        totalTokens++;

        if (ID == END_OF_TEXT || totalTokens >= stopAfter)
        {
            runInference = false;
        }
        else outputString += GetUnicodeText(tokens[ID]);

        Debug.Log(outputString);
    }

    void DecodePrompt(string text)
    {
        var inputTokens = GetTokens(text);

        for (int i = 0; i < inputTokens.Count; i++)
        {
            outputTokens[i] = inputTokens[i];
        }
        currentToken = inputTokens.Count - 1;
    }

    void LoadVocabulary()
    {
        var jsonText = vocabAsset.text;
        vocab = JsonConvert.DeserializeObject<Dictionary<string, int>>(jsonText);
        tokens = new string[vocab.Count];
        foreach (var item in vocab)
        {
            tokens[item.Value] = item.Key;
        }

        merges = mergesAsset.text.Split("\r\n");
    }

    // Translates encoded special characters to Unicode
    string GetUnicodeText(string text)
    {
        var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(ShiftCharacterDown(text));
        return Encoding.UTF8.GetString(bytes);
    }
    string GetASCIIText(string newText)
    {
        var bytes = Encoding.UTF8.GetBytes(newText);
        return ShiftCharacterUp(Encoding.GetEncoding("ISO-8859-1").GetString(bytes));
    }

    string ShiftCharacterDown(string text)
    {
        string outText = "";
        foreach (char letter in text)
        {
            outText += (letter <= 256) ? letter : (char)whiteSpaceCharacters[letter - 256];
        }
        return outText;
    }

    string ShiftCharacterUp(string text)
    {
        string outText = "";
        foreach (char letter in text)
        {
            outText += (char)encodedCharacters[letter];
        }
        return outText;
    }

    void SetupWhiteSpaceShifts()
    {
        for (int i = 0, n = 0; i < 256; i++)
        {
            encodedCharacters[i] = i;
            if (IsWhiteSpace(i))
            {
                encodedCharacters[i] = n + 256;
                whiteSpaceCharacters[n++] = i;
            }
        }
    }

    bool IsWhiteSpace(int i)
    {
        //returns true if it is a whitespace character
        return i <= 32 || (i >= 127 && i <= 160) || i == 173;
    }

    List<int> GetTokens(string text)
    {
        text = GetASCIIText(text);

        // Start with a list of single characters
        var inputTokens = new List<string>();
        foreach (var letter in text)
        {
            inputTokens.Add(letter.ToString());
        }

        ApplyMerges(inputTokens);

        //Find the ids of the words in the vocab
        var ids = new List<int>();
        foreach (var token in inputTokens)
        {
            if (vocab.TryGetValue(token, out int id))
            {
                ids.Add(id);
            }
        }

        return ids;
    }

    void ApplyMerges(List<string> inputTokens)
    {
        foreach (var merge in merges)
        {
            string[] pair = merge.Split(' ');
            int n = 0;
            while (n >= 0)
            {
                n = inputTokens.IndexOf(pair[0], n);
                if (n != -1 && n < inputTokens.Count - 1 && inputTokens[n + 1] == pair[1])
                {
                    inputTokens[n] += inputTokens[n + 1];
                    inputTokens.RemoveAt(n + 1);
                }
                if (n != -1) n++;
            }
        }
    }

    void OnDestroy()
    {
        engine?.Dispose();
    }
}