abdullahmubeen10 commited on
Commit
1c03780
·
verified ·
1 Parent(s): 547ffff

Upload 5 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+
4
+ from sparknlp.base import *
5
+ from sparknlp.annotator import *
6
+ from pyspark.ml import Pipeline
7
+
8
+ # Page configuration
9
+ st.set_page_config(
10
+ layout="wide",
11
+ initial_sidebar_state="auto"
12
+ )
13
+
14
+ # CSS for styling
15
+ st.markdown("""
16
+ <style>
17
+ .main-title {
18
+ font-size: 36px;
19
+ color: #4A90E2;
20
+ font-weight: bold;
21
+ text-align: center;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 10px;
26
+ border-radius: 10px;
27
+ margin-top: 10px;
28
+ }
29
+ .section p, .section ul {
30
+ color: #666666;
31
+ }
32
+ </style>
33
+ """, unsafe_allow_html=True)
34
+
35
+ @st.cache_resource
36
+ def init_spark():
37
+ return sparknlp.start()
38
+
39
+ @st.cache_resource
40
+ def create_pipeline():
41
+ documentAssembler = DocumentAssembler() \
42
+ .setInputCol("text") \
43
+ .setOutputCol("documents")
44
+
45
+ t5 = T5Transformer.pretrained("t5_grammar_error_corrector") \
46
+ .setTask("gec:") \
47
+ .setInputCols(["documents"])\
48
+ .setMaxOutputLength(200)\
49
+ .setOutputCol("corrections")
50
+
51
+ pipeline = Pipeline().setStages([documentAssembler, t5])
52
+ return pipeline
53
+
54
+ def fit_data(pipeline, data):
55
+ df = spark.createDataFrame([[data]]).toDF("text")
56
+ result = pipeline.fit(df).transform(df)
57
+ return result.select('corrections.result').collect()
58
+
59
+ # Sidebar content
60
+ model = st.sidebar.selectbox(
61
+ "Choose the pretrained model",
62
+ ['t5_grammar_error_corrector'],
63
+ help="For more info about the models visit: https://sparknlp.org/models"
64
+ )
65
+
66
+ # Set up the page layout
67
+ title = "Correct Sentences Grammar"
68
+ sub_title = "This demo uses a text-to-text model fine-tuned to correct grammatical errors when the task is set to “gec:”. It is based on Prithiviraj Damodaran’s Gramformer model."
69
+
70
+ st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
71
+ st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
72
+
73
+ # Reference notebook link in sidebar
74
+ link = """
75
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb#scrollTo=QAZ3vOX_SW7B">
76
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
77
+ </a>
78
+ """
79
+ st.sidebar.markdown('Reference notebook:')
80
+ st.sidebar.markdown(link, unsafe_allow_html=True)
81
+
82
+ # Define the exampless
83
+ examples = [
84
+ "She don't knows nothing about what's happening in the office.",
85
+ "They was playing soccer yesterday when it start raining heavily.",
86
+ "This car are more faster than that one, but it costed less money.",
87
+ "I seen him go to the store, but he don't buy nothing from there.",
88
+ "We was going to the park but it start raining before we could leave."
89
+ ]
90
+
91
+ # Text selection and analysis
92
+ selected_text = st.selectbox("Select an example", examples)
93
+ custom_input = st.text_input("Try it with your own sentence!")
94
+
95
+ text_to_analyze = custom_input if custom_input else selected_text
96
+
97
+ st.write('Text to be converted to SQL query:')
98
+ HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
99
+ st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
100
+
101
+ # Initialize Spark and create pipeline
102
+ spark = init_spark()
103
+ pipeline = create_pipeline()
104
+ output = fit_data(pipeline, text_to_analyze)
105
+
106
+ # Display transformed sentence
107
+ st.write("Predicted Sentence:")
108
+ output_text = "".join(output[0][0])
109
+ st.markdown(f'<div class="scroll">{output_text}</div>', unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
9
+
10
+ # Install required packages
11
+ RUN apt-get update && apt-get install -y \
12
+ tar \
13
+ wget \
14
+ bash \
15
+ rsync \
16
+ gcc \
17
+ libfreetype6-dev \
18
+ libhdf5-serial-dev \
19
+ libpng-dev \
20
+ libzmq3-dev \
21
+ python3 \
22
+ python3-dev \
23
+ python3-pip \
24
+ unzip \
25
+ pkg-config \
26
+ software-properties-common \
27
+ graphviz \
28
+ openjdk-8-jdk \
29
+ ant \
30
+ ca-certificates-java \
31
+ && apt-get clean \
32
+ && update-ca-certificates -f
33
+
34
+ # Install Python 3.8 and pip
35
+ RUN add-apt-repository ppa:deadsnakes/ppa \
36
+ && apt-get update \
37
+ && apt-get install -y python3.8 python3-pip \
38
+ && apt-get clean
39
+
40
+ # Set up JAVA_HOME
41
+ RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
42
+ && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
43
+ # Create a new user named "jovyan" with user ID 1000
44
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
45
+
46
+ # Switch to the "jovyan" user
47
+ USER ${NB_USER}
48
+
49
+ # Set home and path variables for the user
50
+ ENV HOME=/home/${NB_USER} \
51
+ PATH=/home/${NB_USER}/.local/bin:$PATH
52
+
53
+ # Set up PySpark to use Python 3.8 for both driver and workers
54
+ ENV PYSPARK_PYTHON=/usr/bin/python3.8
55
+ ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
56
+
57
+ # Set the working directory to the user's home directory
58
+ WORKDIR ${HOME}
59
+
60
+ # Upgrade pip and install Python dependencies
61
+ RUN python3.8 -m pip install --upgrade pip
62
+ COPY requirements.txt /tmp/requirements.txt
63
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
64
+
65
+ # Copy the application code into the container at /home/jovyan
66
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
67
+
68
+ # Expose port for Streamlit
69
+ EXPOSE 7860
70
+
71
+ # Define the entry point for the container
72
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS for better styling
4
+ st.markdown("""
5
+ <style>
6
+ .main-title {
7
+ font-size: 36px;
8
+ color: #4A90E2;
9
+ font-weight: bold;
10
+ text-align: center;
11
+ }
12
+ .sub-title {
13
+ font-size: 24px;
14
+ color: #4A90E2;
15
+ margin-top: 20px;
16
+ }
17
+ .section {
18
+ background-color: #f9f9f9;
19
+ padding: 15px;
20
+ border-radius: 10px;
21
+ margin-top: 20px;
22
+ }
23
+ .section h2 {
24
+ font-size: 22px;
25
+ color: #4A90E2;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .link {
31
+ color: #4A90E2;
32
+ text-decoration: none;
33
+ }
34
+ </style>
35
+ """, unsafe_allow_html=True)
36
+
37
+ # Title
38
+ st.markdown('<div class="main-title">Correct Sentences Grammar</div>', unsafe_allow_html=True)
39
+
40
+ # Introduction Section
41
+ st.markdown("""
42
+ <div class="section">
43
+ <p>Ensuring correct grammar in sentences is essential for clear and effective communication. Whether writing an email, an academic paper, or a casual message, proper grammar ensures that your message is understood as intended.</p>
44
+ <p>This page demonstrates how to implement a grammar correction pipeline using advanced NLP models. We utilize the T5 Transformer model, fine-tuned for grammar error correction, to automatically correct sentences and enhance their grammatical accuracy.</p>
45
+ </div>
46
+ """, unsafe_allow_html=True)
47
+
48
+ # T5 Transformer Overview
49
+ st.markdown('<div class="sub-title">Understanding the T5 Transformer for Grammar Correction</div>', unsafe_allow_html=True)
50
+
51
+ st.markdown("""
52
+ <div class="section">
53
+ <p>The T5 (Text-To-Text Transfer Transformer) model, developed by Google, is a versatile tool for various NLP tasks, including grammar correction. By processing input sentences and applying the appropriate grammar corrections, T5 generates outputs that maintain the original meaning while correcting errors.</p>
54
+ <p>This is particularly useful for applications in writing assistance, automated editing, and educational tools, where grammatical accuracy is crucial.</p>
55
+ </div>
56
+ """, unsafe_allow_html=True)
57
+
58
+ # Performance Section
59
+ st.markdown('<div class="sub-title">Performance and Use Cases</div>', unsafe_allow_html=True)
60
+
61
+ st.markdown("""
62
+ <div class="section">
63
+ <p>The T5 model has shown strong performance in grammar correction tasks. It consistently produces accurate and contextually appropriate corrections, making it a valuable tool for improving written communication across various settings.</p>
64
+ <p>This capability is beneficial for students, professionals, and anyone who needs to ensure their writing is grammatically correct. The T5 model’s efficiency in correcting errors makes it a powerful asset for enhancing the quality of written content.</p>
65
+ </div>
66
+ """, unsafe_allow_html=True)
67
+
68
+ # Implementation Section
69
+ st.markdown('<div class="sub-title">Implementing Grammar Correction</div>', unsafe_allow_html=True)
70
+
71
+ st.markdown("""
72
+ <div class="section">
73
+ <p>The following example demonstrates how to implement a grammar correction pipeline using Spark NLP. The pipeline includes a document assembler and the T5 model for performing grammar corrections.</p>
74
+ </div>
75
+ """, unsafe_allow_html=True)
76
+
77
+ st.code('''
78
+ import sparknlp
79
+ from sparknlp.base import *
80
+ from sparknlp.annotator import *
81
+ from pyspark.ml import Pipeline
82
+
83
+ # Initialize Spark NLP
84
+ spark = sparknlp.start()
85
+
86
+ # Define the pipeline stages
87
+ documentAssembler = DocumentAssembler()\\
88
+ .setInputCol("text")\\
89
+ .setOutputCol("documents")
90
+
91
+ t5 = T5Transformer.pretrained("t5_grammar_error_corrector")\\
92
+ .setTask("gec:")\\
93
+ .setInputCols(["documents"])\\
94
+ .setMaxOutputLength(200)\\
95
+ .setOutputCol("corrections")
96
+
97
+ pipeline = Pipeline().setStages([documentAssembler, t5])
98
+
99
+ # Input data example
100
+ data = spark.createDataFrame([["She don't knows nothing about what's happening in the office."]]).toDF("text")
101
+
102
+ # Apply the pipeline for grammar correction
103
+ result = pipeline.fit(data).transform(data)
104
+ result.select("corrections.result").show(truncate=False)
105
+ ''', language='python')
106
+
107
+ # Example Output
108
+ st.text("""
109
+ +---------------------------------------------------------------+
110
+ |corrections.result |
111
+ +---------------------------------------------------------------+
112
+ |[She doesn't know anything about what's happening in the office.]|
113
+ +---------------------------------------------------------------+
114
+ """)
115
+
116
+ # Model Info Section
117
+ st.markdown('<div class="sub-title">Choosing the Right T5 Model for Grammar Correction</div>', unsafe_allow_html=True)
118
+
119
+ st.markdown("""
120
+ <div class="section">
121
+ <p>For correcting grammar errors, we use the model: "t5_grammar_error_corrector". This model is fine-tuned to detect and correct various types of grammatical errors in English sentences.</p>
122
+ <p>Explore other T5 models tailored for different NLP tasks on the <a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Models Hub</a> to find the best fit for your specific needs.</p>
123
+ </div>
124
+ """, unsafe_allow_html=True)
125
+
126
+ # References Section
127
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
128
+
129
+ st.markdown("""
130
+ <div class="section">
131
+ <ul>
132
+ <li><a class="link" href="https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html" target="_blank">Google AI Blog</a>: Exploring Transfer Learning with T5</li>
133
+ <li><a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Model Hub</a>: Explore T5 models</li>
134
+ <li>Model used for Grammar Correction: <a class="link" href="https://sparknlp.org/2022/11/28/t5_grammar_error_corrector_en.html" target="_blank">t5_grammar_error_corrector</a></li>
135
+ <li><a class="link" href="https://github.com/google-research/text-to-text-transfer-transformer" target="_blank">GitHub</a>: T5 Transformer repository</li>
136
+ <li><a class="link" href="https://arxiv.org/abs/1910.10683" target="_blank">T5 Paper</a>: Detailed insights from the developers</li>
137
+ </ul>
138
+ </div>
139
+ """, unsafe_allow_html=True)
140
+
141
+ # Community & Support Section
142
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
143
+
144
+ st.markdown("""
145
+ <div class="section">
146
+ <ul>
147
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
148
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
149
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
150
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
151
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
152
+ </ul>
153
+ </div>
154
+ """, unsafe_allow_html=True)
155
+
156
+ # Quick Links Section
157
+ st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
158
+
159
+ st.markdown("""
160
+ <div class="section">
161
+ <ul>
162
+ <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
163
+ <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
164
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
165
+ <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
166
+ </ul>
167
+ </div>
168
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ streamlit-tags
4
+ pandas
5
+ numpy
6
+ spark-nlp
7
+ pyspark