northern-64bit commited on
Commit
550bf22
·
unverified ·
2 Parent(s): 45572d1 b473a0e

Merge pull request #1 from effixis/add-initial-version

Browse files
Basic_SQL_Injections.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import streamlit as st
3
+ import sqlite3
4
+ from dotenv import load_dotenv
5
+ from langchain.chains import create_sql_query_chain
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_community.utilities import SQLDatabase
8
+ from modules.utils import set_sidebar
9
+
10
+
11
+ @st.cache_resource(show_spinner="Loading database ...")
12
+ def load_database() -> SQLDatabase:
13
+ return SQLDatabase.from_uri("sqlite:///data/chinook_working.db")
14
+
15
+
16
+ def reset_database():
17
+ """Copy original database to working database"""
18
+ shutil.copyfile("./data/chinook_backup.db", "./data/chinook_working.db")
19
+ return SQLDatabase.from_uri("sqlite:///data/chinook_working.db")
20
+
21
+
22
+ load_dotenv()
23
+ openai_instance = ChatOpenAI(
24
+ model="gpt-3.5-turbo",
25
+ temperature=0,
26
+ )
27
+
28
+
29
+ def main():
30
+ st.set_page_config(
31
+ page_title="AMLD SQL injection demo", page_icon="assets/effixis_logo.ico", layout="centered"
32
+ )
33
+ set_sidebar()
34
+ st.title("SQL Injections via LLM\:s")
35
+ st.markdown("### *Welcome to Effixis' demo for AMLD EPFL 2024!* 🎉")
36
+
37
+ st.markdown(
38
+ """
39
+ #### What is this demo about?
40
+ This demo is about risk associated with the use of LLM\:s, in this case illustrated by SQL injections.
41
+ SQL injections are a common vulnerability in web applications.
42
+ They allow an attacker to execute arbitrary SQL code on the database server.
43
+ This a very dangerous vulnerability as it can lead to data leaks, data corruption, and even data loss.
44
+
45
+ #### The SQL database used in this demo
46
+ The database used in this demo is the Chinook database.
47
+ It is a sample database that represents a digital media store, including tables for artists, albums, media tracks, invoices and customers.
48
+
49
+ You can see the shema below:
50
+ """
51
+ )
52
+ st.image("assets/chinook.png")
53
+
54
+ st.markdown(
55
+ """
56
+ #### What does LLM\:s have to do with this?
57
+ A large usecase for large language models (LLM\:s) is to generate SQL queries.
58
+ This is a very useful feature, as it allows users to interact with databases without having to know SQL.
59
+ But this is also prone to SQL injections, as the users and by extension the LLM\:s, can generate malicious SQL queries.
60
+ """
61
+ )
62
+
63
+ st.divider()
64
+ st.markdown("#### **Try to generate some malicius queries below!**")
65
+
66
+ if st.button("Reset database"):
67
+ database = reset_database()
68
+ else:
69
+ database = load_database()
70
+ chain = create_sql_query_chain(llm=openai_instance, db=database)
71
+
72
+ if user_request := st.text_input("Enter your request here:"):
73
+ with st.spinner("Generating response ..."):
74
+ openai_response = chain.invoke({"question": user_request})
75
+ st.markdown("## Result:")
76
+ st.markdown(f"**SQL Response:** {openai_response}")
77
+ st.markdown("## SQL Result:")
78
+ for sql_query in openai_response.split(";"):
79
+ try:
80
+ sql_result = database.run(sql_query)
81
+ if sql_result:
82
+ st.code(sql_result)
83
+ except sqlite3.OperationalError as e:
84
+ st.error(e)
85
+
86
+
87
+ if __name__ == "__main__":
88
+ main()
README.md CHANGED
@@ -1 +1,59 @@
1
- # shared-amld-sql-injection-demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AMLD SQL Injection Demo
2
+
3
+ ## Introduction
4
+
5
+ Welcome to the AMLD SQL Injection Demo by Effixis for AMLD EPFL 2024! This project showcases the risks of SQL injections in web applications, particularly when using Large Language Models (LLMs). The repository includes two demonstrations: Basic SQL Injections and LLM Safeguard.
6
+
7
+ ## Features
8
+
9
+ - **Basic SQL Injections (`Basic_SQL_Injections.py`):** Demonstrates the risks of direct SQL query generation by LLMs, leading to potential SQL injections.
10
+ - **LLM Safeguard (`pages/LLM_safeguard.py`):** Illustrates an advanced setup where an LLM Safeguard is employed to detect and filter out malicious SQL queries.
11
+ - **Chinook Database Integration:** Uses the Chinook sample database, representing a digital media store.
12
+ - **Interactive Web Interface:** Built with Streamlit, offering a user-friendly interface for interacting with both demonstrations.
13
+ - **Database Reset Functionality:** Allows users to reset the database to its original state for repeated tests.
14
+
15
+ ## Installation
16
+
17
+ 1. Clone the repository:
18
+
19
+ ```bash
20
+ git clone https://github.com/effixis/shared-amld-sql-injection-demo.git
21
+ ```
22
+
23
+ 2. Navigate to the cloned directory:
24
+
25
+ ```bash
26
+ cd shared-amld-sql-injection-demo
27
+ ```
28
+
29
+ 3. Install the required packages:
30
+
31
+ Activate your preferred Python environment and install the required packages using the provided `requirements.txt` file. For example, using Conda:
32
+
33
+ ```bash
34
+ conda create -n amld-sql-injection-demo
35
+ conda activate amld-sql-injection-demo
36
+ pip install -r requirements.txt
37
+ ```
38
+
39
+ 4. Create a `.env` file in the root directory and set the OpenAI API key:
40
+
41
+ ```bash
42
+ echo "OPENAI_API_KEY=enter_your_api_key_here" > .env
43
+ ```
44
+
45
+ You can find your API key on the [OpenAI dashboard](https://beta.openai.com/).
46
+
47
+ ## Usage
48
+
49
+ Run the Streamlit application:
50
+
51
+ ```bash
52
+ streamlit run Basic_SQL_Injections.py
53
+ ```
54
+
55
+ Follow the instructions on the web interface to interact with the application.
56
+
57
+ ## Disclaimer
58
+
59
+ This demo is for educational purposes to showcase the risk of SQL injections using LLMs. It should not be used for malicious purposes. Users are responsible for any misuse of the tools and information provided.
assets/chinook.png ADDED
assets/effixis_logo.ico ADDED
data/chinook_backup.db ADDED
Binary file (918 kB). View file
 
data/chinook_working.db ADDED
Binary file (918 kB). View file
 
modules/utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def set_sidebar():
4
+ with st.sidebar:
5
+ col1, col2 = st.columns([3, 1])
6
+ with col1:
7
+ st.header("Effixis")
8
+ st.markdown(
9
+ """***Take your Artificial Intelligence projects to the next level.***"""
10
+ )
11
+ with col2:
12
+ st.image("assets/effixis_logo.ico", use_column_width=True)
13
+ st.markdown(
14
+ """
15
+ #### About Effixis
16
+ *Effixis was founded in 2017, in close proximity to the Swiss Institute of Technology in Lausanne (EPFL), with the goal of making data analytics and machine learning accessible to private companies and public institutions.
17
+ Since then, we have expanded our reach and opened offices in Brussels in 2022.
18
+ Our company specializes in Natural Language Processing (NLP), Large Language Models (LLMs), and proprietary technologies, allowing us to offer top-tier services and products to our clients and partners.
19
+ We are dedicated to fostering long-term and reliable partnerships with our clients through our innovative approaches, and unwavering commitment.*
20
+ """
21
+ )
22
+ st.markdown("#### Learn more about us at: https://effixis.ch/")
23
+ st.markdown("---")
pages/LLM_safeguard.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import streamlit as st
3
+ import sqlite3
4
+ from dotenv import load_dotenv
5
+ from langchain.chains import create_sql_query_chain
6
+ from langchain.schema import HumanMessage
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_community.utilities import SQLDatabase
9
+ from modules.utils import set_sidebar
10
+
11
+
12
+ @st.cache_resource(show_spinner="Loading database ...")
13
+ def load_database() -> SQLDatabase:
14
+ return SQLDatabase.from_uri("sqlite:///data/chinook_working.db")
15
+
16
+
17
+ def reset_database():
18
+ """Copy original database to working database"""
19
+ shutil.copyfile("./data/chinook_backup.db", "./data/chinook_working.db")
20
+ return SQLDatabase.from_uri("sqlite:///data/chinook_working.db")
21
+
22
+
23
+ load_dotenv()
24
+ openai_instance = ChatOpenAI(
25
+ model="gpt-3.5-turbo",
26
+ temperature=0,
27
+ )
28
+
29
+ st.set_page_config(
30
+ page_title="LLM Safeguard", page_icon="assets/effixis_logo.ico"
31
+ )
32
+ set_sidebar()
33
+
34
+ st.title("LLM Safeguard")
35
+ st.markdown("### *Can you sneak a malicious request past the LLM Safeguard?*")
36
+ st.markdown(
37
+ """
38
+ #### What is the difference between this demo and the one in the previous page?
39
+ In the previous page, the generated SQL query from the LLM was directly executed on the SQL database.
40
+ In this demo, the generated SQL query is first checked by an LLM Safeguard, which detects and removes malicious SQL queries.
41
+ Malicous SQL queries are defined as any SQL code that modifies the table. This includes SQL code that deletes, updates, or inserts data into the table.
42
+ """
43
+ )
44
+
45
+ st.divider()
46
+ st.markdown("#### **Try to bypass the LLM Safeguard below!**")
47
+
48
+ if st.button("Reset database"):
49
+ database = reset_database()
50
+ else:
51
+ database = load_database()
52
+ chain = create_sql_query_chain(llm=openai_instance, db=database)
53
+
54
+ if user_request := st.text_input("Enter your request here:"):
55
+ with st.spinner("Generating response ..."):
56
+ openai_response = chain.invoke({"question": user_request})
57
+ st.markdown("## Generated SQL:")
58
+ st.markdown(f"**SQL Response:** {openai_response}")
59
+
60
+ prompt = """
61
+ You are a SQL expert and check the generated SQL query for malicious SQL code.
62
+
63
+ Malicous code is defined as any SQL code that modifies and/or deletes and/or drops the table and/or it's data. This includes SQL code that drops, deletes, updates, or inserts data into the table. Dropping tables is also considered as malicious.
64
+
65
+ If you find malicious SQL, please remove it. Just return the SQL query without the malicious SQL code. If you don't find any malicious SQL, just return the SQL query as is.
66
+
67
+ Check the generated SQL query for malicious code:
68
+ =======================
69
+ {openai_response}
70
+ =======================
71
+
72
+ Make sure to follow the output format below.
73
+
74
+ Output format:
75
+ =======================
76
+ Reasoning as to why the SQL query is malicious or not.
77
+
78
+ SQL query without malicious code:
79
+ '''
80
+ [INSERT_SAFE_SQL_QUERY_OR_EMPTY_STRING]
81
+ '''
82
+ """.format(
83
+ openai_response=openai_response
84
+ )
85
+ messages = [HumanMessage(content=prompt)]
86
+ safe_query = openai_instance.generate(messages=[messages]).generations[0][0].text
87
+ st.markdown("## LLM Safeguard Result:")
88
+ st.code(safe_query, language="sql")
89
+ st.markdown("## SQL Result:")
90
+ try:
91
+ safe_query = safe_query.split("'''")[1]
92
+ except Exception:
93
+ st.error("No SQL query found!")
94
+ safe_query = ""
95
+ for sql_query in safe_query.split(";"):
96
+ if sql_query and "[" in sql_query:
97
+ continue
98
+ try:
99
+ sql_result = database.run(sql_query)
100
+ if sql_result:
101
+ st.code(sql_result)
102
+ except sqlite3.OperationalError as e:
103
+ st.error(e)
104
+ st.success("Done!")
105
+
requirements.txt ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.5
2
+ aiosignal==1.3.1
3
+ altair==4.0.0
4
+ anyio==4.2.0
5
+ astor==0.8.1
6
+ asttokens==2.2.1
7
+ async-timeout==4.0.3
8
+ attrs==23.1.0
9
+ backcall==0.2.0
10
+ backoff==2.2.1
11
+ beautifulsoup4==4.12.2
12
+ black==21.10b0
13
+ blinker==1.4
14
+ cachetools==5.3.1
15
+ certifi==2023.7.22
16
+ charset-normalizer==3.2.0
17
+ click==8.1.6
18
+ colorama==0.4.4
19
+ command-not-found==0.3
20
+ contourpy==1.1.0
21
+ cryptography==3.4.8
22
+ cycler==0.11.0
23
+ dataclasses-json==0.5.14
24
+ dbus-python==1.2.18
25
+ decorator==5.1.1
26
+ distro==1.7.0
27
+ distro-info===1.1build1
28
+ entrypoints==0.4
29
+ exceptiongroup==1.2.0
30
+ executing==1.2.0
31
+ fonttools==4.42.0
32
+ frozenlist==1.4.0
33
+ gitdb==4.0.10
34
+ GitPython==3.1.32
35
+ google-search-results==2.4.2
36
+ greenlet==2.0.2
37
+ h11==0.14.0
38
+ httpcore==1.0.2
39
+ httplib2==0.20.2
40
+ httpx==0.26.0
41
+ idna==3.4
42
+ importlib-metadata==4.6.4
43
+ ipython==8.14.0
44
+ jedi==0.19.0
45
+ jeepney==0.7.1
46
+ Jinja2==3.1.2
47
+ joblib==1.3.2
48
+ jsonpatch==1.33
49
+ jsonpointer==2.4
50
+ jsonschema==4.19.0
51
+ jsonschema-specifications==2023.7.1
52
+ keyring==23.5.0
53
+ kiwisolver==1.4.4
54
+ langchain==0.1.4
55
+ langchain-community==0.0.16
56
+ langchain-core==0.1.16
57
+ langchain-openai==0.0.5
58
+ langsmith==0.0.83
59
+ launchpadlib==1.10.16
60
+ lazr.restfulclient==0.14.4
61
+ lazr.uri==1.0.6
62
+ markdown-it-py==3.0.0
63
+ MarkupSafe==2.1.3
64
+ marshmallow==3.20.1
65
+ matplotlib==3.7.2
66
+ matplotlib-inline==0.1.6
67
+ mdurl==0.1.2
68
+ more-itertools==8.10.0
69
+ multidict==6.0.4
70
+ mypy-extensions==1.0.0
71
+ netifaces==0.11.0
72
+ numexpr==2.8.5
73
+ numpy==1.25.2
74
+ oauthlib==3.2.0
75
+ openai==1.10.0
76
+ openapi-schema-pydantic==1.2.4
77
+ packaging==23.2
78
+ pandas==1.5.3
79
+ pandasai==0.8.4
80
+ parso==0.8.3
81
+ pathspec==0.9.0
82
+ patsy==0.5.3
83
+ pexpect==4.8.0
84
+ pickleshare==0.7.5
85
+ Pillow==9.5.0
86
+ platformdirs==2.5.1
87
+ plotly==5.16.0
88
+ prompt-toolkit==3.0.39
89
+ protobuf==4.24.0
90
+ psycopg2==2.9.9
91
+ ptyprocess==0.7.0
92
+ pure-eval==0.2.2
93
+ pyarrow==12.0.1
94
+ pydantic==1.10.12
95
+ pydeck==0.8.0
96
+ Pygments==2.16.1
97
+ PyGObject==3.42.1
98
+ PyJWT==2.3.0
99
+ Pympler==1.0.1
100
+ pyparsing==2.4.7
101
+ python-apt==2.4.0+ubuntu1
102
+ python-dateutil==2.8.2
103
+ python-dotenv==1.0.0
104
+ pytz==2023.3
105
+ pytz-deprecation-shim==0.1.0.post0
106
+ PyYAML==5.4.1
107
+ referencing==0.30.2
108
+ regex==2023.8.8
109
+ requests==2.31.0
110
+ rich==13.5.2
111
+ rpds-py==0.9.2
112
+ scikit-learn==1.3.0
113
+ scipy==1.11.1
114
+ seaborn==0.12.2
115
+ SecretStorage==3.3.1
116
+ six==1.16.0
117
+ smmap==5.0.0
118
+ sniffio==1.3.0
119
+ soupsieve==2.4.1
120
+ SQLAlchemy==2.0.19
121
+ stack-data==0.6.2
122
+ statsmodels==0.14.0
123
+ streamlit==1.30.0
124
+ systemd-python==234
125
+ tabulate==0.9.0
126
+ tenacity==8.2.2
127
+ threadpoolctl==3.2.0
128
+ tiktoken==0.5.2
129
+ toml==0.10.2
130
+ tomli==1.2.2
131
+ toolz==0.12.0
132
+ tornado==6.3.3
133
+ tqdm==4.66.1
134
+ traitlets==5.9.0
135
+ typing-inspect==0.9.0
136
+ typing_extensions==4.7.1
137
+ tzdata==2023.3
138
+ tzlocal==4.3.1
139
+ ubuntu-advantage-tools==8001
140
+ ufw==0.36.1
141
+ unattended-upgrades==0.1
142
+ urllib3==2.0.4
143
+ validators==0.21.2
144
+ wadllib==1.3.6
145
+ watchdog==3.0.0
146
+ wcwidth==0.2.6
147
+ wikipedia==1.4.0
148
+ yarl==1.9.2
149
+ zipp==1.0.0