sanjayw Fisharp commited on
Commit
c9c9be5
·
0 Parent(s):

Duplicate from Fisharp/starcoder-playground

Browse files

Co-authored-by: Salvador Fisharp <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+
163
+ # Temporary ignoring notebook files, conda environment yaml and the vs code workspace settings
164
+ .vscode
165
+ **/*.ipynb
166
+ **/*conda*.yml
167
+
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: StarCoder Demo
3
+ emoji: 💫
4
+ colorFrom: gray
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.28.3
8
+ app_file: app.py
9
+ pinned: true
10
+ duplicated_from: Fisharp/starcoder-playground
11
+ ---
12
+
13
+
14
+ # ⭐StarCoder Demo💫
15
+
16
+ ## Code-Completion Playground 💻 with ⭐StarCoder Models
17
+
18
+ This is a demo playground to generate code with the power of ⭐[StarCoder](https://huggingface.co/bigcode/starcoder) a **15B** parameter model for code generation in **80+** programming languages.
19
+
20
+ ℹ️ This is not an instruction model but just a code completion tool.
21
+
22
+ 🗣️For instruction and chatting you can chat with a prompted version of the model directly at the [HuggingFace🤗Chat💬(hf.co/chat)](https://huggingface.co/chat/?model=starcoder)
23
+
24
+ ---
25
+
26
+ **Intended Use**: this app and its [supporting model](https://huggingface.co/bigcode/starcoder) are provided for demonstration purposes only; not to serve as a replacement for human expertise. For more details on the model's limitations in terms of factuality and biases, please refer to the source [model card](hf.co/bigcode)
27
+
28
+ ⚠️ Any use or sharing of this demo constitutes your acceptance of the BigCode [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) License Agreement and the use restrictions included within.
29
+
30
+ ---
31
+
32
+ ## Model Formats
33
+
34
+ The model is pretrained on code and is formatted with special tokens in addition to the pure code data,\
35
+ such as prefixes specifying the source of the file or tokens separating code from a commit message.\
36
+ Use these templates to explore the model's capacities:
37
+
38
+ ### 1. Prefixes 🏷️
39
+
40
+ For pure code files, use any combination of the following prefixes:
41
+
42
+ ```xml
43
+ <reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
44
+ ```
45
+
46
+ STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+
47
+
48
+ ### 2. Commits 💾
49
+
50
+ The commits data is formatted as follows:
51
+
52
+ ```xml
53
+ <commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
54
+ ```
55
+
56
+ ### 3. Jupyter Notebooks 📓
57
+
58
+ The model is trained on Jupyter notebooks as Python scripts and structured formats like:
59
+
60
+ ```xml
61
+ <start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
62
+ ```
63
+
64
+ ### 4. Issues 🐛
65
+
66
+ We also trained on GitHub issues using the following formatting:
67
+
68
+ ```xml
69
+ <issue_start><issue_comment>text<issue_comment>...<issue_closed>
70
+ ```
71
+
72
+ ### 5. Fill-in-the-middle 🧩
73
+
74
+ Fill in the middle requires rearranging the model inputs. The playground handles this for you - all you need is to specify where to fill:
75
+
76
+ ```xml
77
+ code before<FILL_HERE>code after
78
+ ```
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import logging as log
4
+ from typing import Generator
5
+
6
+ import gradio as gr
7
+ from gradio.themes.utils import sizes
8
+ from text_generation import Client
9
+ from src.request import StarCoderRequest, StarCoderRequestConfig
10
+
11
+ from src.utils import (
12
+ get_file_as_string,
13
+ get_sections,
14
+ get_url_from_env_or_default_path,
15
+ preview
16
+ )
17
+ from constants import (
18
+ FIM_MIDDLE,
19
+ FIM_PREFIX,
20
+ FIM_SUFFIX,
21
+ END_OF_TEXT,
22
+ MIN_TEMPERATURE,
23
+ )
24
+ from settings import (
25
+ DEFAULT_PORT,
26
+ DEFAULT_STARCODER_API_PATH,
27
+ DEFAULT_STARCODER_BASE_API_PATH,
28
+ )
29
+
30
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
31
+ # Gracefully exit the app if the HF_TOKEN is not set,
32
+ # printing to system `errout` the error (instead of raising an exception)
33
+ # and the expected behavior
34
+ if not HF_TOKEN:
35
+ ERR_MSG = """
36
+ Please set the HF_TOKEN environment variable with your Hugging Face API token.
37
+ You can get one by signing up at https://huggingface.co/join and then visiting
38
+ https://huggingface.co/settings/tokens."""
39
+ print(ERR_MSG, file=sys.stderr)
40
+ # gr.errors.GradioError(ERR_MSG)
41
+ # gr.close_all(verbose=False)
42
+ sys.exit(1)
43
+
44
+ API_URL_STAR = get_url_from_env_or_default_path("STARCODER_API", DEFAULT_STARCODER_API_PATH)
45
+ API_URL_BASE = get_url_from_env_or_default_path("STARCODER_BASE_API", DEFAULT_STARCODER_BASE_API_PATH)
46
+
47
+ preview("StarCoder Model URL", API_URL_STAR)
48
+ preview("StarCoderBase Model URL", API_URL_BASE)
49
+ preview("HF Token", HF_TOKEN, ofuscate=True)
50
+
51
+ _styles = get_file_as_string("styles.css")
52
+ _script = get_file_as_string("community-btn.js")
53
+ _sharing_icon_svg = get_file_as_string("community-icon.svg")
54
+ _loading_icon_svg = get_file_as_string("loading-icon.svg")
55
+
56
+ # Loads the whole content of the ./README.md file
57
+ # slicing/unpacking its different sections into their proper variables
58
+ readme_file_content = get_file_as_string("README.md", path='./')
59
+ (
60
+ manifest,
61
+ description,
62
+ disclaimer,
63
+ formats,
64
+ ) = get_sections(readme_file_content, "---", up_to=4)
65
+
66
+ theme = gr.themes.Monochrome(
67
+ primary_hue="indigo",
68
+ secondary_hue="blue",
69
+ neutral_hue="slate",
70
+ radius_size=sizes.radius_sm,
71
+ font=[
72
+ gr.themes.GoogleFont("IBM Plex Sans", [400, 600]),
73
+ "ui-sans-serif",
74
+ "system-ui",
75
+ "sans-serif",
76
+ ],
77
+ text_size=sizes.text_lg,
78
+ )
79
+
80
+ HEADERS = {
81
+ "Authorization": f"Bearer {HF_TOKEN}",
82
+ }
83
+ client_star = Client(API_URL_STAR, headers=HEADERS)
84
+ client_base = Client(API_URL_BASE, headers=HEADERS)
85
+
86
+ def get_tokens_collector(request: StarCoderRequest) -> Generator[str, None, None]:
87
+
88
+ model_client = client_star if request.settings.version == "StarCoder" else client_base
89
+ stream = model_client.generate_stream(request.prompt, **request.settings.kwargs())
90
+ for response in stream:
91
+ # print(response.token.id, response.token.text)
92
+ # if token.text != END_OF_TEXT:
93
+ if response.token.id != 0:
94
+ yield response.token.text
95
+
96
+ def get_tokens_accumulator(request: StarCoderRequest) -> Generator[str, None, None]:
97
+ # start with the prefix (if in fim_mode)
98
+ output = request.prefix if request.fim_mode else request.prompt
99
+ for token in get_tokens_collector(request=request):
100
+ output += token
101
+ yield output
102
+ # after the last token, append the suffix (if in fim_mode)
103
+ if request.fim_mode:
104
+ output += request.suffix
105
+ yield output
106
+ # Append an extra line at the end
107
+ yield output + '\n'
108
+
109
+ def get_tokens_linker(request: StarCoderRequest) -> str:
110
+ return "".join(list(get_tokens_collector(request)))
111
+
112
+ def generate(
113
+ prompt: str,
114
+ temperature = 0.9,
115
+ max_new_tokens = 256,
116
+ top_p = 0.95,
117
+ repetition_penalty = 1.0,
118
+ version = "StarCoder",
119
+ ) -> Generator[str, None, None]:
120
+ request = StarCoderRequest(
121
+ prompt=prompt,
122
+ settings=StarCoderRequestConfig(
123
+ version=version,
124
+ temperature=temperature,
125
+ max_new_tokens=max_new_tokens,
126
+ top_p=top_p,
127
+ repetition_penalty=repetition_penalty,
128
+ )
129
+ )
130
+ yield from get_tokens_accumulator(request)
131
+
132
+ def process_example(
133
+ prompt: str,
134
+ temperature = 0.9,
135
+ max_new_tokens = 256,
136
+ top_p = 0.95,
137
+ repetition_penalty = 1.0,
138
+ version = "StarCoder",
139
+ ) -> Generator[str, None, None]:
140
+ request = StarCoderRequest(
141
+ prompt=prompt,
142
+ settings=StarCoderRequestConfig(
143
+ version=version,
144
+ temperature=temperature,
145
+ max_new_tokens=max_new_tokens,
146
+ top_p=top_p,
147
+ repetition_penalty=repetition_penalty,
148
+ )
149
+ )
150
+ yield from get_tokens_linker(request)
151
+
152
+ # todo: move it into the README too
153
+ examples = [
154
+ "X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=0.1)\n\n# Train a logistic regression model, predict the labels on the test set and compute the accuracy score",
155
+ "// Returns every other value in the array as a new array.\nfunction everyOther(arr) {",
156
+ "def alternating(list1, list2):\n results = []\n for i in range(min(len(list1), len(list2))):\n results.append(list1[i])\n results.append(list2[i])\n if len(list1) > len(list2):\n <FILL_HERE>\n else:\n results.extend(list2[i+1:])\n return results",
157
+ ]
158
+
159
+ with gr.Blocks(theme=theme, analytics_enabled=False, css=_styles) as demo:
160
+ with gr.Column():
161
+ gr.Markdown(description)
162
+ with gr.Row():
163
+ with gr.Column():
164
+ instruction = gr.Textbox(
165
+ placeholder="Enter your code here",
166
+ label="Code",
167
+ elem_id="q-input",
168
+ )
169
+ submit = gr.Button("Generate", variant="primary")
170
+ output = gr.Code(elem_id="q-output", lines=30)
171
+ with gr.Row():
172
+ with gr.Column():
173
+ with gr.Accordion("Advanced settings", open=False):
174
+ with gr.Row():
175
+ column_1, column_2 = gr.Column(), gr.Column()
176
+ with column_1:
177
+ temperature = gr.Slider(
178
+ label="Temperature",
179
+ value=0.2,
180
+ minimum=0.0,
181
+ maximum=1.0,
182
+ step=0.05,
183
+ interactive=True,
184
+ info="Higher values produce more diverse outputs",
185
+ )
186
+ max_new_tokens = gr.Slider(
187
+ label="Max new tokens",
188
+ value=256,
189
+ minimum=0,
190
+ maximum=8192,
191
+ step=64,
192
+ interactive=True,
193
+ info="The maximum numbers of new tokens",
194
+ )
195
+ with column_2:
196
+ top_p = gr.Slider(
197
+ label="Top-p (nucleus sampling)",
198
+ value=0.90,
199
+ minimum=0.0,
200
+ maximum=1,
201
+ step=0.05,
202
+ interactive=True,
203
+ info="Higher values sample more low-probability tokens",
204
+ )
205
+ repetition_penalty = gr.Slider(
206
+ label="Repetition penalty",
207
+ value=1.2,
208
+ minimum=1.0,
209
+ maximum=2.0,
210
+ step=0.05,
211
+ interactive=True,
212
+ info="Penalize repeated tokens",
213
+ )
214
+ with gr.Column():
215
+ version = gr.Dropdown(
216
+ ["StarCoderBase", "StarCoder"],
217
+ value="StarCoder",
218
+ label="Version",
219
+ info="",
220
+ )
221
+ gr.Markdown(disclaimer)
222
+ with gr.Group(elem_id="share-btn-container"):
223
+ community_icon = gr.HTML(_sharing_icon_svg, visible=True)
224
+ loading_icon = gr.HTML(_loading_icon_svg, visible=True)
225
+ share_button = gr.Button(
226
+ "Share to community", elem_id="share-btn", visible=True
227
+ )
228
+ gr.Examples(
229
+ examples=examples,
230
+ inputs=[instruction],
231
+ cache_examples=False,
232
+ fn=process_example,
233
+ outputs=[output],
234
+ )
235
+ gr.Markdown(formats)
236
+
237
+ submit.click(
238
+ generate,
239
+ inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, version],
240
+ outputs=[output],
241
+ # preprocess=False,
242
+ max_batch_size=8,
243
+ show_progress=True
244
+ )
245
+ share_button.click(None, [], [], _js=_script)
246
+
247
+ demo.queue(concurrency_count=16).launch(debug=True, server_port=DEFAULT_PORT)
constants.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FIM_PREFIX = "<fim_prefix>"
2
+ FIM_MIDDLE = "<fim_middle>"
3
+ FIM_SUFFIX = "<fim_suffix>"
4
+ END_OF_TEXT = "<|endoftext|>"
5
+
6
+ # Near zero temperature to avoid division by zero
7
+ MIN_TEMPERATURE = 1e-4
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Gradio
2
+ gradio==3.28.2
3
+
4
+ # HuggingFace
5
+ huggingface_hub==0.14.1
6
+ text-generation==0.5.1
7
+ transformers==4.28.1
settings.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # URLs for the StarCoder Models/APIs
2
+ DEFAULT_HUGGINGFACE_MODELS_API_BASE_URL = "https://api-inference.huggingface.co/models/"
3
+ DEFAULT_STARCODER_API_PATH = "bigcode/starcoder/"
4
+ DEFAULT_STARCODER_BASE_API_PATH = "bigcode/starcoderbase/"
5
+ FIM_INDICATOR = "<FILL_HERE>"
6
+ DEFAULT_PORT = 7860
7
+
8
+ STATIC_PATH = "static"
9
+
10
+ DEFAULT_SETTINGS = dict(
11
+ temperature = 0.9,
12
+ max_new_tokens = 256,
13
+ top_p = 0.95,
14
+ repetition_penalty = 1.0,
15
+ version = "StarCoder",
16
+ )
src/request.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Dict, Any, Union
3
+
4
+ from constants import (
5
+ FIM_MIDDLE,
6
+ FIM_PREFIX,
7
+ FIM_SUFFIX,
8
+ MIN_TEMPERATURE,
9
+ )
10
+ from settings import (
11
+ FIM_INDICATOR,
12
+ )
13
+
14
+ @dataclass
15
+ class StarCoderRequestConfig:
16
+ temperature: float
17
+ max_new_tokens: int
18
+ top_p: float
19
+ repetition_penalty: float
20
+ version: str
21
+
22
+ def __post_init__(self):
23
+ self.temperature = min(float(self.temperature), MIN_TEMPERATURE)
24
+ self.max_new_tokens = int(self.max_new_tokens)
25
+ self.top_p = float(self.top_p)
26
+ self.repetition_penalty = float(self.repetition_penalty)
27
+ self.do_sample = True
28
+ self.seed = 42
29
+
30
+ def __repr__(self) -> str:
31
+ """Returns a custom string representation of the Configurations."""
32
+ values = dict(
33
+ model = self.version,
34
+ temp = self.temperature,
35
+ tokens = self.max_new_tokens,
36
+ p = self.top_p,
37
+ penalty = self.repetition_penalty,
38
+ sample = self.do_sample,
39
+ seed = self.seed,
40
+ )
41
+ return f"StarCoderRequestConfig({values})"
42
+
43
+ def kwargs(self) -> Dict[str, Union[Any, float, int]]:
44
+ """
45
+ Returns a custom dictionary representation of the Configurations.
46
+ removing the model version.
47
+ """
48
+ values = vars(self).copy()
49
+ values.pop("version")
50
+ return values
51
+
52
+ @dataclass
53
+ class StarCoderRequest:
54
+ prompt: str
55
+ settings: StarCoderRequestConfig
56
+
57
+ def __post_init__(self):
58
+ self.fim_mode = FIM_INDICATOR in self.prompt
59
+ self.prefix, self.suffix = None, None
60
+ if self.fim_mode:
61
+ try:
62
+ self.prefix, self.suffix = self.prompt.split(FIM_INDICATOR)
63
+ except Exception as err:
64
+ print(str(err))
65
+ raise ValueError(f"Only one {FIM_INDICATOR} allowed in prompt!") from err
66
+ self.prompt = f"{FIM_PREFIX}{self.prefix}{FIM_SUFFIX}{self.suffix}{FIM_MIDDLE}"
67
+
68
+ def __repr__(self) -> str:
69
+ """Returns a custom string representation of the Request."""
70
+ values = dict(
71
+ prompt = self.prompt,
72
+ configuration = self.settings,
73
+ )
74
+ return f"StarCoderRequest({values})"
src/utils.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+ from urllib.parse import urljoin
4
+
5
+ from settings import (
6
+ DEFAULT_HUGGINGFACE_MODELS_API_BASE_URL,
7
+ STATIC_PATH,
8
+ )
9
+
10
+ def masked(value: str, n_shown: int, length: int = None) -> str:
11
+ """Returns a string with the first and last n_shown characters
12
+ and the middle of the string replaced with '*'
13
+
14
+ Args:
15
+ value (str): The string to mask
16
+ n_shown (int): The number of characters to show at the beginning and end of the string
17
+ length (int, optional): The length of the string. If not given, it will be calculated as the length of the value. Defaults to None.
18
+
19
+ Returns:
20
+ str: The masked string
21
+ """
22
+ l = length or len(value)
23
+ return value[0:n_shown] + '*'*(length-2*n_shown) + value[-n_shown:]
24
+
25
+
26
+ def ofuscated(value: str) -> str:
27
+ """Returns a string with the first and last 4 characters
28
+ and the middle of the string replaced with '*'
29
+
30
+ Args:
31
+ value (str): The string to mask
32
+
33
+ Returns:
34
+ str: The masked string
35
+ """
36
+ return masked(value, 4, len(value)//2)
37
+
38
+
39
+ def preview(label:str, value: str, ofuscate=False):
40
+ """Print the variable name and its value in a nice way.
41
+ If ofuscate is True, it will ofuscate the value
42
+
43
+ Args:
44
+ variable_name (str): The name of the variable to print
45
+ ofuscate (bool, optional): If True, it will ofuscate the value. Defaults to False.
46
+ """
47
+ str_value = ofuscated(str(value)) if ofuscate else str(value)
48
+ print(f"{label} = {str_value}")
49
+
50
+ def get_url_from_env_or_default_path(env_name: str, api_path: str) -> str:
51
+ """Takes an url from the env variable (given the env name)
52
+ or combines with urljoin the default models base url
53
+ with the default path (given the path name)
54
+
55
+ Args:
56
+ env_name (str): The name of the environment variable to check
57
+ api_path (str): The default path to use if the environment variable is not set
58
+
59
+ Returns:
60
+ str: The url to use
61
+ """
62
+ return os.environ.get(env_name) or urljoin(
63
+ DEFAULT_HUGGINGFACE_MODELS_API_BASE_URL, api_path
64
+ )
65
+
66
+ def get_file_as_string(file_name, path=STATIC_PATH) -> str:
67
+ """Loads the content of a file given its name
68
+ and returns all of its lines as a single string
69
+ if a file path is given, it will be used
70
+ instead of the default static path (from settings)
71
+
72
+ Args:
73
+ file_name (_type_): The name of the file to load.
74
+ path (str, optional): The path to the file. Defaults to the current directory.
75
+
76
+ Returns:
77
+ str: The content of the file as a single string
78
+ """
79
+ with open(os.path.join(path, file_name), mode='r', encoding='UTF-8') as f:
80
+ return f.read()
81
+
82
+
83
+ def get_sections(string: str, delimiter: str, up_to: int = None) -> List[str]:
84
+ """Splits a string into sections given a delimiter
85
+
86
+ Args:
87
+ string (str): The string to split
88
+ delimiter (str): The delimiter to use
89
+ up_to (int, optional): The maximum number of sections to return.
90
+ Defaults to None (which means all sections)
91
+
92
+ Returns:
93
+ List[str]: The list of sections (up to the given limit, if any provided)
94
+ """
95
+ return [section.strip()
96
+ for section in string.split(delimiter)
97
+ if (section and not section.isspace())][:up_to]
static/community-btn.js ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async () => {
2
+ async function uploadFile(file){
3
+ const UPLOAD_URL = 'https://huggingface.co/uploads';
4
+ const response = await fetch(UPLOAD_URL, {
5
+ method: 'POST',
6
+ headers: {
7
+ 'Content-Type': file.type,
8
+ 'X-Requested-With': 'XMLHttpRequest',
9
+ },
10
+ body: file, /// <- File inherits from Blob
11
+ });
12
+ const url = await response.text();
13
+ return url;
14
+ }
15
+
16
+ async function getInputImgFile(imgEl){
17
+ const res = await fetch(imgEl.src);
18
+ const blob = await res.blob();
19
+ const imgId = Date.now() % 200;
20
+ const isPng = imgEl.src.startsWith(`data:image/png`);
21
+ if(isPng){
22
+ const fileName = `sd-perception-${{imgId}}.png`;
23
+ return new File([blob], fileName, { type: 'image/png' });
24
+ }else{
25
+ const fileName = `sd-perception-${{imgId}}.jpg`;
26
+ return new File([blob], fileName, { type: 'image/jpeg' });
27
+ }
28
+ }
29
+
30
+ // const gradioEl = document.querySelector('body > gradio-app');
31
+ const gradioEl = document.querySelector("gradio-app");
32
+ const inputTxt = gradioEl.querySelector('#q-input textarea').value;
33
+ let outputTxt = gradioEl.querySelector('#q-output .codemirror-wrapper .cm-scroller > div:nth-of-type(2)').innerText;
34
+ outputTxt = `<pre>${outputTxt}</pre>`
35
+
36
+ const titleLength = 150;
37
+ let titleTxt = inputTxt;
38
+ if(titleTxt.length > titleLength){
39
+ titleTxt = titleTxt.slice(0, titleLength) + ' ...';
40
+ }
41
+
42
+ const shareBtnEl = gradioEl.querySelector('#share-btn');
43
+ const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
44
+ const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
45
+
46
+ if(!inputTxt || !outputTxt){
47
+ return;
48
+ };
49
+
50
+ shareBtnEl.style.pointerEvents = 'none';
51
+ shareIconEl.style.display = 'none';
52
+ loadingIconEl.style.removeProperty('display');
53
+
54
+ const descriptionMd = `### Question:
55
+ ${inputTxt}
56
+
57
+ ### Answer:
58
+
59
+ ${outputTxt}`;
60
+
61
+ const params = {
62
+ title: titleTxt,
63
+ description: descriptionMd,
64
+ };
65
+
66
+ const paramsStr = Object.entries(params)
67
+ .map(([key, value]) => `${encodeURIComponent(key)}=${encodeURIComponent(value)}`)
68
+ .join('&');
69
+
70
+ window.open(`https://huggingface.co/spaces/fisharp/starcoder-playground/discussions/new?${paramsStr}`, '_blank');
71
+
72
+ shareBtnEl.style.removeProperty('pointer-events');
73
+ shareIconEl.style.removeProperty('display');
74
+ loadingIconEl.style.display = 'none';
75
+ }
static/community-icon.svg ADDED
static/loading-icon.svg ADDED
static/styles.css ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap');
2
+
3
+ h1, h2 {
4
+ font-family: 'IBM Plex Mono', sans-serif;
5
+ }
6
+
7
+ .generating {
8
+ visibility: hidden
9
+ }
10
+
11
+ .gradio-container {
12
+ color: black
13
+ }
14
+
15
+ /* monospace_css */
16
+ #q-input textarea {
17
+ font-family: monospace, 'Consolas', Courier, monospace;
18
+ }
19
+
20
+ /* Share Button */
21
+
22
+ /* it was hidden directly inside the svg xml content */
23
+ #share-btn-loading-icon {
24
+ display: none;
25
+ }
26
+
27
+ a {
28
+ text-decoration-line: underline;
29
+ font-weight: 600;
30
+ }
31
+
32
+ .animate-spin {
33
+ animation: spin 1s linear infinite;
34
+ }
35
+
36
+ @keyframes spin {
37
+ from {
38
+ transform: rotate(0deg);
39
+ }
40
+ to {
41
+ transform: rotate(360deg);
42
+ }
43
+ }
44
+
45
+ #share-btn-container {
46
+ display: flex;
47
+ padding-left: 0.5rem !important;
48
+ padding-right: 0.5rem !important;
49
+ background-color: #000000;
50
+ justify-content: center;
51
+ align-items: center;
52
+ border-radius: 9999px !important;
53
+ width: 15rem;
54
+ }
55
+
56
+ #share-btn {
57
+ all: initial;
58
+ color: #ffffff;
59
+ font-weight: 600;
60
+ cursor: pointer;
61
+ font-family: 'IBM Plex Sans', sans-serif;
62
+ margin-left: 0.5rem !important;
63
+ padding-top: 0.25rem !important;
64
+ padding-bottom: 0.25rem !important;
65
+ }
66
+
67
+ #share-btn * {
68
+ all: unset;
69
+ }
70
+
71
+ #share-btn-container div:nth-child(-n+2) {
72
+ width: auto !important;
73
+ min-height: 0px !important;
74
+ }
75
+
76
+ #share-btn-container .wrap {
77
+ display: none !important;
78
+ }