Modify path to relative path
Browse files- test_pretrained.ipynb +15 -15
test_pretrained.ipynb
CHANGED
|
@@ -48,7 +48,7 @@
|
|
| 48 |
" hugging_face_path = snapshot_download(\n",
|
| 49 |
" repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n",
|
| 50 |
" repo_type=\"model\", \n",
|
| 51 |
-
" allow_patterns=[\"src/*\", \"train-data/*\", \"deepseek-coder-1.3b-instruct/*\"], \n",
|
| 52 |
" )\n",
|
| 53 |
" sys.path.append(hugging_face_path)\n",
|
| 54 |
" current_path = hugging_face_path"
|
|
@@ -166,7 +166,7 @@
|
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"cell_type": "code",
|
| 169 |
-
"execution_count":
|
| 170 |
"metadata": {},
|
| 171 |
"outputs": [
|
| 172 |
{
|
|
@@ -179,7 +179,7 @@
|
|
| 179 |
],
|
| 180 |
"source": [
|
| 181 |
"# Create connection to sqlite3 database\n",
|
| 182 |
-
"connection = sql.connect('
|
| 183 |
"cursor = connection.cursor()\n",
|
| 184 |
"\n",
|
| 185 |
"# Execute query from model output and print result\n",
|
|
@@ -304,7 +304,7 @@
|
|
| 304 |
},
|
| 305 |
{
|
| 306 |
"cell_type": "code",
|
| 307 |
-
"execution_count":
|
| 308 |
"metadata": {},
|
| 309 |
"outputs": [
|
| 310 |
{
|
|
@@ -325,7 +325,7 @@
|
|
| 325 |
}
|
| 326 |
],
|
| 327 |
"source": [
|
| 328 |
-
"less_than_90_df = pd.read_csv(\"
|
| 329 |
"run_evaluation(less_than_90_df, \"Less than 90\")\n",
|
| 330 |
"print(\"Dataset length: \" + str(len(less_than_90_df)))"
|
| 331 |
]
|
|
@@ -339,7 +339,7 @@
|
|
| 339 |
},
|
| 340 |
{
|
| 341 |
"cell_type": "code",
|
| 342 |
-
"execution_count":
|
| 343 |
"metadata": {},
|
| 344 |
"outputs": [
|
| 345 |
{
|
|
@@ -372,7 +372,7 @@
|
|
| 372 |
}
|
| 373 |
],
|
| 374 |
"source": [
|
| 375 |
-
"game_queries = pd.read_csv(\"
|
| 376 |
"run_evaluation(game_queries, \"Queries from game\")\n",
|
| 377 |
"print(\"Dataset length: \" + str(len(game_queries)))"
|
| 378 |
]
|
|
@@ -386,7 +386,7 @@
|
|
| 386 |
},
|
| 387 |
{
|
| 388 |
"cell_type": "code",
|
| 389 |
-
"execution_count":
|
| 390 |
"metadata": {},
|
| 391 |
"outputs": [
|
| 392 |
{
|
|
@@ -406,7 +406,7 @@
|
|
| 406 |
}
|
| 407 |
],
|
| 408 |
"source": [
|
| 409 |
-
"other_stats_queries = pd.read_csv(\"
|
| 410 |
"run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
|
| 411 |
"print(\"Dataset length: \" + str(len(other_stats_queries)))"
|
| 412 |
]
|
|
@@ -420,7 +420,7 @@
|
|
| 420 |
},
|
| 421 |
{
|
| 422 |
"cell_type": "code",
|
| 423 |
-
"execution_count":
|
| 424 |
"metadata": {},
|
| 425 |
"outputs": [
|
| 426 |
{
|
|
@@ -438,7 +438,7 @@
|
|
| 438 |
}
|
| 439 |
],
|
| 440 |
"source": [
|
| 441 |
-
"team_queries = pd.read_csv(\"
|
| 442 |
"run_evaluation(team_queries, \"Queries from team\")\n",
|
| 443 |
"print(\"Dataset length: \" + str(len(team_queries)))"
|
| 444 |
]
|
|
@@ -452,7 +452,7 @@
|
|
| 452 |
},
|
| 453 |
{
|
| 454 |
"cell_type": "code",
|
| 455 |
-
"execution_count":
|
| 456 |
"metadata": {},
|
| 457 |
"outputs": [
|
| 458 |
{
|
|
@@ -472,7 +472,7 @@
|
|
| 472 |
}
|
| 473 |
],
|
| 474 |
"source": [
|
| 475 |
-
"join_queries = pd.read_csv(\"
|
| 476 |
"run_evaluation(join_queries, \"Queries with join\")\n",
|
| 477 |
"print(\"Dataset length: \" + str(len(join_queries)))"
|
| 478 |
]
|
|
@@ -486,7 +486,7 @@
|
|
| 486 |
},
|
| 487 |
{
|
| 488 |
"cell_type": "code",
|
| 489 |
-
"execution_count":
|
| 490 |
"metadata": {},
|
| 491 |
"outputs": [
|
| 492 |
{
|
|
@@ -520,7 +520,7 @@
|
|
| 520 |
}
|
| 521 |
],
|
| 522 |
"source": [
|
| 523 |
-
"no_join_queries = pd.read_csv(\"
|
| 524 |
"run_evaluation(no_join_queries, \"Queries without join\")\n",
|
| 525 |
"print(\"Dataset length: \" + str(len(no_join_queries)))"
|
| 526 |
]
|
|
|
|
| 48 |
" hugging_face_path = snapshot_download(\n",
|
| 49 |
" repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n",
|
| 50 |
" repo_type=\"model\", \n",
|
| 51 |
+
" allow_patterns=[\"src/*\", \"train-data/*\", \"deepseek-coder-1.3b-instruct/*\", \"nba-data/*\"], \n",
|
| 52 |
" )\n",
|
| 53 |
" sys.path.append(hugging_face_path)\n",
|
| 54 |
" current_path = hugging_face_path"
|
|
|
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"cell_type": "code",
|
| 169 |
+
"execution_count": null,
|
| 170 |
"metadata": {},
|
| 171 |
"outputs": [
|
| 172 |
{
|
|
|
|
| 179 |
],
|
| 180 |
"source": [
|
| 181 |
"# Create connection to sqlite3 database\n",
|
| 182 |
+
"connection = sql.connect(get_path('nba-data/nba.sqlite'))\n",
|
| 183 |
"cursor = connection.cursor()\n",
|
| 184 |
"\n",
|
| 185 |
"# Execute query from model output and print result\n",
|
|
|
|
| 304 |
},
|
| 305 |
{
|
| 306 |
"cell_type": "code",
|
| 307 |
+
"execution_count": null,
|
| 308 |
"metadata": {},
|
| 309 |
"outputs": [
|
| 310 |
{
|
|
|
|
| 325 |
}
|
| 326 |
],
|
| 327 |
"source": [
|
| 328 |
+
"less_than_90_df = pd.read_csv(get_path(\"train-data/less_than_90.tsv\"), sep='\\t')\n",
|
| 329 |
"run_evaluation(less_than_90_df, \"Less than 90\")\n",
|
| 330 |
"print(\"Dataset length: \" + str(len(less_than_90_df)))"
|
| 331 |
]
|
|
|
|
| 339 |
},
|
| 340 |
{
|
| 341 |
"cell_type": "code",
|
| 342 |
+
"execution_count": null,
|
| 343 |
"metadata": {},
|
| 344 |
"outputs": [
|
| 345 |
{
|
|
|
|
| 372 |
}
|
| 373 |
],
|
| 374 |
"source": [
|
| 375 |
+
"game_queries = pd.read_csv(get_path(\"train-data/queries_from_game.tsv\"), sep='\\t')\n",
|
| 376 |
"run_evaluation(game_queries, \"Queries from game\")\n",
|
| 377 |
"print(\"Dataset length: \" + str(len(game_queries)))"
|
| 378 |
]
|
|
|
|
| 386 |
},
|
| 387 |
{
|
| 388 |
"cell_type": "code",
|
| 389 |
+
"execution_count": null,
|
| 390 |
"metadata": {},
|
| 391 |
"outputs": [
|
| 392 |
{
|
|
|
|
| 406 |
}
|
| 407 |
],
|
| 408 |
"source": [
|
| 409 |
+
"other_stats_queries = pd.read_csv(get_path(\"train-data/queries_from_other_stats.tsv\"), sep='\\t')\n",
|
| 410 |
"run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
|
| 411 |
"print(\"Dataset length: \" + str(len(other_stats_queries)))"
|
| 412 |
]
|
|
|
|
| 420 |
},
|
| 421 |
{
|
| 422 |
"cell_type": "code",
|
| 423 |
+
"execution_count": null,
|
| 424 |
"metadata": {},
|
| 425 |
"outputs": [
|
| 426 |
{
|
|
|
|
| 438 |
}
|
| 439 |
],
|
| 440 |
"source": [
|
| 441 |
+
"team_queries = pd.read_csv(get_path(\"train-data/queries_from_team.tsv\"), sep='\\t')\n",
|
| 442 |
"run_evaluation(team_queries, \"Queries from team\")\n",
|
| 443 |
"print(\"Dataset length: \" + str(len(team_queries)))"
|
| 444 |
]
|
|
|
|
| 452 |
},
|
| 453 |
{
|
| 454 |
"cell_type": "code",
|
| 455 |
+
"execution_count": null,
|
| 456 |
"metadata": {},
|
| 457 |
"outputs": [
|
| 458 |
{
|
|
|
|
| 472 |
}
|
| 473 |
],
|
| 474 |
"source": [
|
| 475 |
+
"join_queries = pd.read_csv(get_path(\"train-data/with_join.tsv\"), sep='\\t')\n",
|
| 476 |
"run_evaluation(join_queries, \"Queries with join\")\n",
|
| 477 |
"print(\"Dataset length: \" + str(len(join_queries)))"
|
| 478 |
]
|
|
|
|
| 486 |
},
|
| 487 |
{
|
| 488 |
"cell_type": "code",
|
| 489 |
+
"execution_count": null,
|
| 490 |
"metadata": {},
|
| 491 |
"outputs": [
|
| 492 |
{
|
|
|
|
| 520 |
}
|
| 521 |
],
|
| 522 |
"source": [
|
| 523 |
+
"no_join_queries = pd.read_csv(get_path(\"train-data/without_join.tsv\"), sep='\\t')\n",
|
| 524 |
"run_evaluation(no_join_queries, \"Queries without join\")\n",
|
| 525 |
"print(\"Dataset length: \" + str(len(no_join_queries)))"
|
| 526 |
]
|