abdullahmubeen10 commited on
Commit
20270d1
·
verified ·
1 Parent(s): af62891

Upload 5 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+
4
+ from sparknlp.base import *
5
+ from sparknlp.annotator import *
6
+ from pyspark.ml import Pipeline
7
+
8
+ # Page configuration
9
+ st.set_page_config(
10
+ layout="wide",
11
+ initial_sidebar_state="auto"
12
+ )
13
+
14
+ # CSS for styling
15
+ st.markdown("""
16
+ <style>
17
+ .main-title {
18
+ font-size: 36px;
19
+ color: #4A90E2;
20
+ font-weight: bold;
21
+ text-align: center;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 10px;
26
+ border-radius: 10px;
27
+ margin-top: 10px;
28
+ }
29
+ .section p, .section ul {
30
+ color: #666666;
31
+ }
32
+ .scroll {
33
+ overflow-x: auto;
34
+ border: 1px solid #e6e9ef;
35
+ border-radius: 0.25rem;
36
+ padding: 1rem;
37
+ margin-bottom: 2.5rem;
38
+ white-space: pre-wrap;
39
+ }
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ @st.cache_resource
44
+ def init_spark():
45
+ return sparknlp.start()
46
+
47
+ @st.cache_resource
48
+ def create_pipeline(model, task):
49
+ documentAssembler = DocumentAssembler() \
50
+ .setInputCol("text") \
51
+ .setOutputCol("documents")
52
+
53
+ t5 = T5Transformer.pretrained(model) \
54
+ .setTask(task) \
55
+ .setInputCols(["documents"]) \
56
+ .setMaxOutputLength(200) \
57
+ .setOutputCol("transfers")
58
+
59
+ pipeline = Pipeline().setStages([documentAssembler, t5])
60
+ return pipeline
61
+
62
+ def fit_data(pipeline, data):
63
+ df = spark.createDataFrame([[data]]).toDF("text")
64
+ result = pipeline.fit(df).transform(df)
65
+ return result.select('transfers.result').collect()
66
+
67
+ # Sidebar setup
68
+ model = st.sidebar.selectbox(
69
+ "Choose the Pretrained Model",
70
+ ['t5_informal_to_formal_styletransfer', 't5_formal_to_informal_styletransfer'],
71
+ help="Select the model you want to use for style transfer."
72
+ )
73
+
74
+ # Reference notebook link in sidebar
75
+ st.sidebar.markdown('Reference notebook:')
76
+ st.sidebar.markdown(
77
+ """
78
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb">
79
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
80
+ </a>
81
+ """,
82
+ unsafe_allow_html=True
83
+ )
84
+
85
+ examples = {
86
+ "t5_informal_to_formal_styletransfer": [
87
+ "Who gives a crap about that anyway? It's not like it matters!",
88
+ "Hiya, how ya doing? I haven't seen ya in forever!",
89
+ "btw - ur face looks really familiar, have we met before?",
90
+ "I looooooooooooooove going to the movies! It's my absolute favorite thing to do!",
91
+ "Hey, what's up? Wanna grab a bite to eat later?",
92
+ "Nah, I'm good. Don't feel like going out tonight.",
93
+ "Yo, that was totally awesome! Can't believe we pulled it off!",
94
+ "Check this out, it's totally epic! You've gotta see it!",
95
+ "I'm so stoked for the weekend, can't wait to just chill!",
96
+ "Dude, that party was lit! Had the best time ever!"
97
+ ],
98
+ "t5_formal_to_informal_styletransfer": [
99
+ "Please leave the room now, as your presence is no longer required.",
100
+ "Thank you very much, sir! Your kindness is greatly appreciated.",
101
+ "It's a pleasure to meet you, and I look forward to our collaboration.",
102
+ "I appreciate your assistance with this matter. It was very helpful.",
103
+ "She understood the complex instructions very quickly and efficiently.",
104
+ "He contracted a fever after returning from his overseas trip.",
105
+ "He investigated his accountant thoroughly before making any decisions.",
106
+ "Kindly refrain from making any noise during the presentation.",
107
+ "She expressed her gratitude for the opportunity to work on this project.",
108
+ "He was extremely punctual and arrived precisely at the scheduled time."
109
+ ]
110
+ }
111
+
112
+ task_descriptions = {
113
+ "t5_informal_to_formal_styletransfer": "transfer Casual to Formal:",
114
+ "t5_formal_to_informal_styletransfer": "transfer Formal to Casual:"
115
+ }
116
+
117
+ # Set up the page layout
118
+ title = "T5 for Informal to Formal Style Transfer"
119
+ sub_title = "Effortlessly Transform Sentences and Explore Different Writing Styles"
120
+
121
+ st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
122
+ st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
123
+
124
+ # Text selection and analysis
125
+ selected_text = st.selectbox("Select an example", examples[model])
126
+ custom_input = st.text_input("Try it with your own sentence!")
127
+
128
+ text_to_analyze = custom_input if custom_input else selected_text
129
+
130
+ st.write('Text to analyze:')
131
+ st.markdown(f'<div class="scroll">{text_to_analyze}</div>', unsafe_allow_html=True)
132
+
133
+ # Initialize Spark and create pipeline
134
+ spark = init_spark()
135
+ pipeline = create_pipeline(model, task_descriptions[model])
136
+ output = fit_data(pipeline, text_to_analyze)
137
+
138
+ # Display transformed sentence
139
+ st.write("Predicted Sentence:")
140
+ output_text = "".join(output[0][0])
141
+ st.markdown(f'<div class="scroll">{output_text.title()}</div>', unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
9
+
10
+ # Install required packages
11
+ RUN apt-get update && apt-get install -y \
12
+ tar \
13
+ wget \
14
+ bash \
15
+ rsync \
16
+ gcc \
17
+ libfreetype6-dev \
18
+ libhdf5-serial-dev \
19
+ libpng-dev \
20
+ libzmq3-dev \
21
+ python3 \
22
+ python3-dev \
23
+ python3-pip \
24
+ unzip \
25
+ pkg-config \
26
+ software-properties-common \
27
+ graphviz \
28
+ openjdk-8-jdk \
29
+ ant \
30
+ ca-certificates-java \
31
+ && apt-get clean \
32
+ && update-ca-certificates -f
33
+
34
+ # Install Python 3.8 and pip
35
+ RUN add-apt-repository ppa:deadsnakes/ppa \
36
+ && apt-get update \
37
+ && apt-get install -y python3.8 python3-pip \
38
+ && apt-get clean
39
+
40
+ # Set up JAVA_HOME
41
+ RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
42
+ && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
43
+ # Create a new user named "jovyan" with user ID 1000
44
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
45
+
46
+ # Switch to the "jovyan" user
47
+ USER ${NB_USER}
48
+
49
+ # Set home and path variables for the user
50
+ ENV HOME=/home/${NB_USER} \
51
+ PATH=/home/${NB_USER}/.local/bin:$PATH
52
+
53
+ # Set up PySpark to use Python 3.8 for both driver and workers
54
+ ENV PYSPARK_PYTHON=/usr/bin/python3.8
55
+ ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
56
+
57
+ # Set the working directory to the user's home directory
58
+ WORKDIR ${HOME}
59
+
60
+ # Upgrade pip and install Python dependencies
61
+ RUN python3.8 -m pip install --upgrade pip
62
+ COPY requirements.txt /tmp/requirements.txt
63
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
64
+
65
+ # Copy the application code into the container at /home/jovyan
66
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
67
+
68
+ # Expose port for Streamlit
69
+ EXPOSE 7860
70
+
71
+ # Define the entry point for the container
72
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sparknlp.base import *
3
+ from sparknlp.annotator import *
4
+ from pyspark.ml import Pipeline
5
+
6
+ # Page configuration
7
+ st.set_page_config(
8
+ layout="wide",
9
+ initial_sidebar_state="auto"
10
+ )
11
+
12
+ # Custom CSS for better styling
13
+ st.markdown("""
14
+ <style>
15
+ .main-title {
16
+ font-size: 36px;
17
+ color: #4A90E2;
18
+ font-weight: bold;
19
+ text-align: center;
20
+ }
21
+ .sub-title {
22
+ font-size: 24px;
23
+ color: #4A90E2;
24
+ margin-top: 20px;
25
+ }
26
+ .section {
27
+ background-color: #f9f9f9;
28
+ padding: 15px;
29
+ border-radius: 10px;
30
+ margin-top: 20px;
31
+ }
32
+ .section h2 {
33
+ font-size: 22px;
34
+ color: #4A90E2;
35
+ }
36
+ .section p, .section ul {
37
+ color: #666666;
38
+ }
39
+ .link {
40
+ color: #4A90E2;
41
+ text-decoration: none;
42
+ }
43
+ </style>
44
+ """, unsafe_allow_html=True)
45
+
46
+ # Title
47
+ st.markdown('<div class="main-title">Switch Between Informal and Formal Style</div>', unsafe_allow_html=True)
48
+
49
+ # Introduction Section
50
+ st.markdown("""
51
+ <div class="section">
52
+ <p>Switching between informal and formal styles is a crucial skill in effective communication. Informal style is often used in casual conversations, while formal style is reserved for professional or official contexts. Understanding how to adapt your language to different audiences can greatly enhance the clarity and impact of your message.</p>
53
+ <p>On this page, we explore how to implement a pipeline that automatically switches between informal and formal styles using advanced NLP models. We utilize the T5 Transformer model, fine-tuned for style transfer, to seamlessly convert sentences between these two styles.</p>
54
+ </div>
55
+ """, unsafe_allow_html=True)
56
+
57
+ # T5 Transformer Overview
58
+ st.markdown('<div class="sub-title">Understanding the T5 Transformer for Style Transfer</div>', unsafe_allow_html=True)
59
+
60
+ st.markdown("""
61
+ <div class="section">
62
+ <p>The T5 (Text-To-Text Transfer Transformer) model, developed by Google, is a versatile tool for a wide range of NLP tasks. For style transfer, T5 can be fine-tuned to convert text between different styles, such as informal to formal and vice versa.</p>
63
+ <p>By processing input sentences and applying the appropriate style transfer, T5 generates outputs that adjust the tone while preserving the original meaning. This is especially useful for applications in writing assistance, automated editing, and communication training.</p>
64
+ </div>
65
+ """, unsafe_allow_html=True)
66
+
67
+ # Performance Section
68
+ st.markdown('<div class="sub-title">Performance and Use Cases</div>', unsafe_allow_html=True)
69
+
70
+ st.markdown("""
71
+ <div class="section">
72
+ <p>The T5 model has demonstrated strong performance in text transformation tasks, including style transfer between informal and formal language. It consistently produces accurate and contextually appropriate results, making it a valuable tool for enhancing communication in various settings.</p>
73
+ <p>This capability is beneficial for writers, editors, and professionals who need to adapt text to different audiences or contexts. The T5 model’s ability to perform these transformations efficiently makes it a powerful asset for improving written communication.</p>
74
+ </div>
75
+ """, unsafe_allow_html=True)
76
+
77
+ # Implementation Section
78
+ st.markdown('<div class="sub-title">Implementing Informal-Formal Style Switching</div>', unsafe_allow_html=True)
79
+
80
+ st.markdown("""
81
+ <div class="section">
82
+ <p>The following example demonstrates how to implement a style transfer pipeline using Spark NLP to switch between informal and formal styles. The pipeline includes a document assembler and the T5 model for performing the transformations in both directions.</p>
83
+ </div>
84
+ """, unsafe_allow_html=True)
85
+
86
+ st.code('''
87
+ from sparknlp.base import *
88
+ from sparknlp.annotator import *
89
+ from pyspark.ml import Pipeline
90
+
91
+ # Initialize Spark NLP
92
+ spark = sparknlp.start()
93
+
94
+ # Define the pipeline stages
95
+ document_assembler = DocumentAssembler()\\
96
+ .setInputCol("text")\\
97
+ .setOutputCol("documents")
98
+
99
+ # Informal to Formal transformation
100
+ t5_informal_to_formal = T5Transformer()\\
101
+ .pretrained("t5_informal_to_formal_styletransfer")\\
102
+ .setTask("transfer Casual to Formal:")\\
103
+ .setInputCols(["documents"])\\
104
+ .setOutputCol("formal")
105
+
106
+ # Formal to Informal transformation
107
+ t5_formal_to_informal = T5Transformer()\\
108
+ .pretrained("t5_formal_to_informal_styletransfer")\\
109
+ .setTask("transfer Formal to Casual:")\\
110
+ .setInputCols(["documents"])\\
111
+ .setOutputCol("informal")
112
+
113
+ pipeline_informal_to_formal = Pipeline().setStages([document_assembler, t5_informal_to_formal])
114
+ pipeline_formal_to_informal = Pipeline().setStages([document_assembler, t5_formal_to_informal])
115
+
116
+ # Input data examples
117
+ data_informal = spark.createDataFrame([["Hey, what’s up? Wanna hang out later?"]]).toDF("text")
118
+ data_formal = spark.createDataFrame([["I would like to inquire about your availability for a meeting."]]).toDF("text")
119
+
120
+ # Apply the pipeline for informal to formal
121
+ result_informal_to_formal = pipeline_informal_to_formal.fit(data_informal).transform(data_informal)
122
+ result_informal_to_formal.select("formal.result").show(truncate=False)
123
+
124
+ # Apply the pipeline for formal to informal
125
+ result_formal_to_informal = pipeline_formal_to_informal.fit(data_formal).transform(data_formal)
126
+ result_formal_to_informal.select("informal.result").show(truncate=False)
127
+ ''', language='python')
128
+
129
+ # Example Output
130
+ st.text("""
131
+ +---------------------------------------------------------------+
132
+ |formal.result |
133
+ +---------------------------------------------------------------+
134
+ |[I would like to know if you are available to meet later.] |
135
+ +---------------------------------------------------------------+
136
+
137
+ +---------------------------------------------------------------+
138
+ |informal.result |
139
+ +---------------------------------------------------------------+
140
+ |[Hey, wanna hang out later?] |
141
+ +---------------------------------------------------------------+
142
+ """)
143
+
144
+ # Model Info Section
145
+ st.markdown('<div class="sub-title">Choosing the Right T5 Model for Style Transfer</div>', unsafe_allow_html=True)
146
+
147
+ st.markdown("""
148
+ <div class="section">
149
+ <p>For switching between informal and formal styles, we use the models: "t5_informal_to_formal_styletransfer" for informal-to-formal conversion and "t5_formal_to_informal_styletransfer" for formal-to-informal conversion.</p>
150
+ <p>Explore other T5 models tailored for different style transfer tasks on the <a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Models Hub</a> to find the best fit for your specific needs.</p>
151
+ </div>
152
+ """, unsafe_allow_html=True)
153
+
154
+ # References Section
155
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
156
+
157
+ st.markdown("""
158
+ <div class="section">
159
+ <ul>
160
+ <li><a class="link" href="https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html" target="_blank">Google AI Blog</a>: Exploring Transfer Learning with T5</li>
161
+ <li><a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Model Hub</a>: Explore T5 models</li>
162
+ <li>Model used for Informal to Formal: <a class="link" href="https://sparknlp.org/2022/05/31/t5_informal_to_formal_styletransfer_en_3_0.html" target="_blank">t5_informal_to_formal_styletransfer</a></li>
163
+ <li>Model used for Formal to Informal: <a class="link" href="https://sparknlp.org/2022/05/31/t5_formal_to_informal_styletransfer_en_3_0.html" target="_blank">t5_formal_to_informal_styletransfer</a></li>
164
+ <li><a class="link" href="https://github.com/google-research/text-to-text-transfer-transformer" target="_blank">GitHub</a>: T5 Transformer repository</li>
165
+ <li><a class="link" href="https://arxiv.org/abs/1910.10683" target="_blank">T5 Paper</a>: Detailed insights from the developers</li>
166
+ </ul>
167
+ </div>
168
+ """, unsafe_allow_html=True)
169
+
170
+ # Community & Support Section
171
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
172
+
173
+ st.markdown("""
174
+ <div class="section">
175
+ <ul>
176
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
177
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
178
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
179
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
180
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
181
+ </ul>
182
+ </div>
183
+ """, unsafe_allow_html=True)
184
+
185
+ # Quick Links Section
186
+ st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
187
+
188
+ st.markdown("""
189
+ <div class="section">
190
+ <ul>
191
+ <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
192
+ <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
193
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
194
+ <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
195
+ </ul>
196
+ </div>
197
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ streamlit-tags
4
+ pandas
5
+ numpy
6
+ spark-nlp
7
+ pyspark