hf-model-comparisons / my_app.aiconfig.json
andrewlastmile's picture
Update my_app.aiconfig.json
9961873 verified
raw
history blame
2.28 kB
{
"name": "Hugging Face LLM Comparisons",
"schema_version": "latest",
"metadata": {
"parameters": {
"CoLA_ex_prompt": "Is the sentence grammatical or ungrammatical?\n\n\"This building is than that one.\"",
"SST_2_ex_prompt": "Is the movie review positive, negative, or neutral?\n\n\"The movie is funny, smart, visually inventive, and most of all, alive.\"",
"WNLI_ex_prompt": "Sentence B replaces sentence A's ambiguous pronoun with one of the nouns - is this the correct noun?\n\n\"A) Lily spoke to Donna, breaking her concentration.\nB) Lily spoke to Donna, breaking Lily's concentration.\""
},
"models": {},
"default_model": null,
"model_parsers": null
},
"description": "In this notebook, we compare the individual performance of HF hosted LLMs () on a few example questions from the GLUE benchmarks (https://gluebenchmark.com/tasks).\n\nExample questions taken from \"What is the GLUE Benchmark\" medium post - https://angelina-yang.medium.com/what-is-the-glue-benchmark-for-nlu-systems-61127b3cab3f",
"prompts": [
{
"name": "mistral_7b_instruct_v0.1",
"input": "{{SST_2_ex_prompt}}",
"metadata": {
"model": {
"name": "Text Generation",
"settings": {
"model": "mistralai/Mistral-7B-Instruct-v0.1"
}
},
"tags": null,
"parameters": {}
},
"outputs": [
]
},
{
"name": "google_flan_t5_sm",
"input": "{{SST_2_ex_prompt}}",
"metadata": {
"model": {
"name": "Conversational",
"settings": {
"model": "google/flan-t5-small",
"max_new_tokens": 250,
"stream": false
}
},
"tags": null,
"parameters": {}
},
"outputs": [
]
},
{
"name": "tinyllama-1_1B",
"input": "<|system|>\nYou are to answer the following question by the user</s>\n<|user|>\n{{SST_2_ex_prompt}}</s>\n<|assistant|>",
"metadata": {
"model": {
"name": "Conversational",
"settings": {
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
}
},
"tags": null,
"parameters": {}
},
"outputs": [
]
}
]
}