Spaces:
Runtime error
Runtime error
update app
Browse files- app.py +11 -21
- utils/languages.json +3 -3
- utils/table_contents.md +1 -1
app.py
CHANGED
|
@@ -39,7 +39,7 @@ def load_model(values, language):
|
|
| 39 |
model = values["model"]
|
| 40 |
if not model:
|
| 41 |
text = f"""No model is available for {language.capitalize()}. If you trained a model on this language, let us know in\
|
| 42 |
-
in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\
|
| 43 |
You can also train your own model on The Stack using the instructions below π"""
|
| 44 |
st.write(text)
|
| 45 |
if st.button("Fine-tune your own model", key=4):
|
|
@@ -50,8 +50,8 @@ def load_model(values, language):
|
|
| 50 |
```python
|
| 51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 52 |
|
| 53 |
-
tokenizer = AutoTokenizer.from_pretrained({model})
|
| 54 |
-
model = AutoModelForCausalLM.from_pretrained({model}, trust_remote_code=True)
|
| 55 |
|
| 56 |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
|
| 57 |
outputs = model.generate(inputs)
|
|
@@ -60,7 +60,6 @@ def load_model(values, language):
|
|
| 60 |
"""
|
| 61 |
st.markdown(text)
|
| 62 |
st.markdown(code)
|
| 63 |
-
st.write(f"The scores of this model are the following: {values['scores']}")
|
| 64 |
|
| 65 |
def generate_code(
|
| 66 |
demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
|
|
@@ -78,31 +77,24 @@ def generate_code(
|
|
| 78 |
generated_text = ""
|
| 79 |
return generated_text
|
| 80 |
|
| 81 |
-
def init_nested_buttons():
|
| 82 |
-
if "Models trained on dataset" not in st.session_state:
|
| 83 |
-
st.session_state["Models trained on dataset"] = False
|
| 84 |
-
|
| 85 |
-
if "Generate code" not in st.session_state:
|
| 86 |
-
st.session_state["Generate code"] = False
|
| 87 |
-
|
| 88 |
-
if st.button("Models trained on dataset"):
|
| 89 |
-
st.session_state["Models trained on dataset"] = not st.session_state["Models trained on dataset"]
|
| 90 |
-
|
| 91 |
-
|
| 92 |
languages = load_languages()
|
| 93 |
|
|
|
|
|
|
|
| 94 |
col1, col2 = st.columns([1, 1.5])
|
| 95 |
with col1:
|
| 96 |
-
selected_language = st.selectbox("
|
| 97 |
|
| 98 |
st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
|
| 99 |
code = how_to_load(selected_language)
|
| 100 |
-
|
|
|
|
| 101 |
st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
|
| 102 |
# we can add some stats about files
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
|
|
|
| 106 |
load_model(languages[selected_language], selected_language)
|
| 107 |
|
| 108 |
if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
|
|
@@ -114,8 +106,6 @@ if st.session_state["Models trained on dataset"]:
|
|
| 114 |
).strip()
|
| 115 |
|
| 116 |
if st.button("Generate code"):
|
| 117 |
-
st.session_state["Generate code"] = not st.session_state["Generate code"]
|
| 118 |
-
if st.session_state["Generate code"]:
|
| 119 |
with st.spinner("Generating code..."):
|
| 120 |
generated_text = generate_code(
|
| 121 |
demo=languages[selected_language]["gradio_demo"],
|
|
|
|
| 39 |
model = values["model"]
|
| 40 |
if not model:
|
| 41 |
text = f"""No model is available for {language.capitalize()}. If you trained a model on this language, let us know in\
|
| 42 |
+
in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\n\
|
| 43 |
You can also train your own model on The Stack using the instructions below π"""
|
| 44 |
st.write(text)
|
| 45 |
if st.button("Fine-tune your own model", key=4):
|
|
|
|
| 50 |
```python
|
| 51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 52 |
|
| 53 |
+
tokenizer = AutoTokenizer.from_pretrained("{model}")
|
| 54 |
+
model = AutoModelForCausalLM.from_pretrained("{model}", trust_remote_code=True)
|
| 55 |
|
| 56 |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
|
| 57 |
outputs = model.generate(inputs)
|
|
|
|
| 60 |
"""
|
| 61 |
st.markdown(text)
|
| 62 |
st.markdown(code)
|
|
|
|
| 63 |
|
| 64 |
def generate_code(
|
| 65 |
demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
|
|
|
|
| 77 |
generated_text = ""
|
| 78 |
return generated_text
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
languages = load_languages()
|
| 81 |
|
| 82 |
+
st.header("Languages of The Stack π")
|
| 83 |
+
st.markdown("The Stack contains over 6TB of permissively-licensed source code files covering 358 programming languages. Select one to get started:")
|
| 84 |
col1, col2 = st.columns([1, 1.5])
|
| 85 |
with col1:
|
| 86 |
+
selected_language = st.selectbox("Programming Language", list(languages.keys()), label_visibility="collapsed", key=1)
|
| 87 |
|
| 88 |
st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
|
| 89 |
code = how_to_load(selected_language)
|
| 90 |
+
|
| 91 |
+
with st.expander("More info about the dataset"):
|
| 92 |
st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
|
| 93 |
# we can add some stats about files
|
| 94 |
|
| 95 |
+
st.header("Models trained on The Stack π€")
|
| 96 |
+
st.write("Here we show models trained on the language you select as part of BigCode project.")
|
| 97 |
+
with st.expander(f"Models trained on {selected_language.capitalize()}"):
|
| 98 |
load_model(languages[selected_language], selected_language)
|
| 99 |
|
| 100 |
if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
|
|
|
|
| 106 |
).strip()
|
| 107 |
|
| 108 |
if st.button("Generate code"):
|
|
|
|
|
|
|
| 109 |
with st.spinner("Generating code..."):
|
| 110 |
generated_text = generate_code(
|
| 111 |
demo=languages[selected_language]["gradio_demo"],
|
utils/languages.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
{"python": {"num_examples": 10, "model": "bigcode/santacoder", "
|
| 2 |
-
"java": {"num_examples": 10, "model": "bigcode/santacoder", "
|
| 3 |
-
"javascript": {"num_examples": 10, "model": "bigcode/santacoder", "
|
| 4 |
"typescript": {"num_examples": 10, "model": ""},
|
| 5 |
"go": {"num_examples": 10, "model": ""},
|
| 6 |
"php": {"num_examples": 10, "model": ""},
|
|
|
|
| 1 |
+
{"python": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
|
| 2 |
+
"java": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
|
| 3 |
+
"javascript": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
|
| 4 |
"typescript": {"num_examples": 10, "model": ""},
|
| 5 |
"go": {"num_examples": 10, "model": ""},
|
| 6 |
"php": {"num_examples": 10, "model": ""},
|
utils/table_contents.md
CHANGED
|
@@ -6,4 +6,4 @@
|
|
| 6 |
|
| 7 |
3 - Demos for code generation
|
| 8 |
|
| 9 |
-
If you trained a model on The Stack, let us know so we can feature it! π
|
|
|
|
| 6 |
|
| 7 |
3 - Demos for code generation
|
| 8 |
|
| 9 |
+
If you trained a model on The Stack, let us know in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) so we can feature it! π
|