Spaces:
Build error
Build error
Commit
·
b473578
1
Parent(s):
1fda8b6
Update app.py
Browse files
app.py
CHANGED
@@ -17,13 +17,13 @@ pragformer_private = transformers.AutoModel.from_pretrained("Pragformer/PragForm
|
|
17 |
pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
|
18 |
|
19 |
|
20 |
-
#Event Listeners
|
21 |
with_omp_str = 'Should contain a parallel work-sharing loop construct'
|
22 |
without_omp_str = 'Should not contain a parallel work-sharing loop construct'
|
23 |
name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
|
24 |
-
|
25 |
tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
|
26 |
|
|
|
27 |
with open('c_data.json', 'r') as f:
|
28 |
data = json.load(f)
|
29 |
|
@@ -87,49 +87,61 @@ def is_reduction(code_txt):
|
|
87 |
return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
|
88 |
|
89 |
|
90 |
-
def
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
return torch.nn.Softmax(dim=1)(torch.tensor(probas)).numpy()
|
111 |
-
|
112 |
-
|
113 |
-
def lime_explain(code_txt):
|
114 |
-
class_names = ['Without OpenMP', 'With OpenMP']
|
115 |
-
SAMPLES = 35
|
116 |
-
exp = []
|
117 |
|
118 |
-
|
119 |
-
return gr.update(visible=False)
|
120 |
|
121 |
-
|
122 |
-
exp = explainer.explain_instance(code_txt, predictor, num_features=20, num_samples=SAMPLES)
|
123 |
|
124 |
-
return gr.update(visible=True, value=exp.as_pyplot_figure())
|
125 |
|
|
|
|
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
|
|
|
|
|
|
|
|
131 |
return gr.update(visible=True)
|
132 |
-
|
133 |
|
134 |
def activate_c(lang_pred):
|
135 |
langs = lang_pred.split('\n')
|
@@ -149,7 +161,7 @@ def activate_button(lang_pred):
|
|
149 |
return gr.update(visible=False)
|
150 |
else:
|
151 |
return gr.update(visible=True)
|
152 |
-
|
153 |
|
154 |
def lang_predict(code_txt):
|
155 |
res = {}
|
@@ -168,13 +180,14 @@ def lang_predict(code_txt):
|
|
168 |
|
169 |
# Define GUI
|
170 |
|
|
|
171 |
with gr.Blocks() as pragformer_gui:
|
172 |
|
173 |
gr.Markdown(
|
174 |
"""
|
175 |
# PragFormer Pragma Classifiction
|
176 |
-
|
177 |
-
Pragformer is a tool that analyzes C code to determine whether it would benefit from being placed in a
|
178 |
the use of data-sharing attribute clauses (e.g. private and reduction) to improve performance. It also provides explainability through the use of LIME.
|
179 |
""")
|
180 |
|
@@ -208,7 +221,9 @@ with gr.Blocks() as pragformer_gui:
|
|
208 |
reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
|
209 |
|
210 |
explain_title = gr.Markdown("## LIME Explainability", visible=False)
|
211 |
-
|
|
|
|
|
212 |
|
213 |
|
214 |
code_in.change(fn=lang_predict, inputs=code_in, outputs=[lang_pred])
|
@@ -218,7 +233,9 @@ with gr.Blocks() as pragformer_gui:
|
|
218 |
submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
|
219 |
submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
|
220 |
submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)
|
221 |
-
submit_btn.click(fn=
|
|
|
|
|
222 |
submit_btn.click(fn=lime_title, inputs=code_in, outputs=explain_title)
|
223 |
sample_btn.click(fn=fill_code, inputs=drop, outputs=[pragma, code_in])
|
224 |
|
@@ -255,12 +272,11 @@ with gr.Blocks() as pragformer_gui:
|
|
255 |
We train several transformer models, named PragFormer, for these tasks, and show that they outperform statistically-trained baselines and automatic S2S parallelization
|
256 |
compilers in both classifying the overall need for an OpenMP directive and the introduction of private and reduction clauses.
|
257 |
|
258 |
-

|
261 |
|
262 |
|
263 |
|
264 |
-
|
265 |
pragformer_gui.launch()
|
266 |
|
|
|
17 |
pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
|
18 |
|
19 |
|
20 |
+
# Event Listeners
|
21 |
with_omp_str = 'Should contain a parallel work-sharing loop construct'
|
22 |
without_omp_str = 'Should not contain a parallel work-sharing loop construct'
|
23 |
name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
|
|
|
24 |
tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
|
25 |
|
26 |
+
|
27 |
with open('c_data.json', 'r') as f:
|
28 |
data = json.load(f)
|
29 |
|
|
|
87 |
return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
|
88 |
|
89 |
|
90 |
+
def get_predictor(model):
|
91 |
+
def predictor(texts):
|
92 |
+
tokenized = tokenizer.batch_encode_plus(
|
93 |
+
texts,
|
94 |
+
max_length = 150,
|
95 |
+
pad_to_max_length = True,
|
96 |
+
truncation = True
|
97 |
+
)
|
98 |
+
test_seq = torch.tensor(tokenized['input_ids'])
|
99 |
+
test_mask = torch.tensor(tokenized['attention_mask'])
|
100 |
+
test_y = torch.tensor([1]*len(texts))
|
101 |
+
test_data = TensorDataset(test_seq, test_mask, test_y)
|
102 |
+
test_sampler = SequentialSampler(test_seq)
|
103 |
+
test_dataloader = DataLoader(test_data, sampler = test_sampler, batch_size = len(texts))
|
104 |
+
total_probas = []
|
105 |
+
for step, batch in enumerate(test_dataloader):
|
106 |
+
sent_id, mask, labels = batch
|
107 |
+
outputs = model(sent_id, mask)
|
108 |
+
probas = outputs.detach().numpy()
|
109 |
+
total_probas.extend(probas)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
+
return torch.nn.Softmax(dim=1)(torch.tensor(probas)).numpy()
|
|
|
112 |
|
113 |
+
return predictor
|
|
|
114 |
|
|
|
115 |
|
116 |
+
def get_lime_explain(filename):
|
117 |
+
def lime_explain(code_txt):
|
118 |
|
119 |
+
SAMPLES = 10 #40
|
120 |
+
exp = []
|
121 |
+
|
122 |
+
if filename == 'Loop':
|
123 |
+
model = pragformer
|
124 |
+
class_names = ['Without OpenMP', 'With OpenMP']
|
125 |
+
elif filename == 'Private':
|
126 |
+
model = pragformer_private
|
127 |
+
class_names = ['Without Private', 'With Private']
|
128 |
+
else:
|
129 |
+
model = pragformer_reduction
|
130 |
+
class_names = ['Without Reduction', 'With Reduction']
|
131 |
+
|
132 |
+
explainer = LimeTextExplainer(class_names=class_names, split_expression=r"\s+")
|
133 |
+
exp = explainer.explain_instance(code_txt, get_predictor(model), num_features=20, num_samples=SAMPLES)
|
134 |
+
|
135 |
+
exp.save_to_file(f'{filename.lower()}_explanation.html')
|
136 |
+
|
137 |
+
return gr.update(visible=True, value=f'{filename.lower()}_explanation.html')
|
138 |
|
139 |
+
return lime_explain
|
140 |
+
|
141 |
+
|
142 |
+
def lime_title(code_txt):
|
143 |
return gr.update(visible=True)
|
144 |
+
|
145 |
|
146 |
def activate_c(lang_pred):
|
147 |
langs = lang_pred.split('\n')
|
|
|
161 |
return gr.update(visible=False)
|
162 |
else:
|
163 |
return gr.update(visible=True)
|
164 |
+
|
165 |
|
166 |
def lang_predict(code_txt):
|
167 |
res = {}
|
|
|
180 |
|
181 |
# Define GUI
|
182 |
|
183 |
+
|
184 |
with gr.Blocks() as pragformer_gui:
|
185 |
|
186 |
gr.Markdown(
|
187 |
"""
|
188 |
# PragFormer Pragma Classifiction
|
189 |
+
|
190 |
+
Pragformer is a tool that analyzes C code to determine whether it would benefit from being placed in a work-sharing loop construct and, if necessary, suggests
|
191 |
the use of data-sharing attribute clauses (e.g. private and reduction) to improve performance. It also provides explainability through the use of LIME.
|
192 |
""")
|
193 |
|
|
|
221 |
reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
|
222 |
|
223 |
explain_title = gr.Markdown("## LIME Explainability", visible=False)
|
224 |
+
loop_explanation = gr.File(label='Work-sharing loop construct prediction explanation', interactive=False, visible=False)
|
225 |
+
private_explanation = gr.File(label='Data-sharing attribute private prediction explanation', interactive=False, visible=False)
|
226 |
+
reduction_explanation = gr.File(label='Data-sharing attribute reduction prediction explanation', interactive=False, visible=False)
|
227 |
|
228 |
|
229 |
code_in.change(fn=lang_predict, inputs=code_in, outputs=[lang_pred])
|
|
|
233 |
submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
|
234 |
submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
|
235 |
submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)
|
236 |
+
submit_btn.click(fn=get_lime_explain('Loop'), inputs=code_in, outputs=loop_explanation)
|
237 |
+
submit_btn.click(fn=get_lime_explain('Private'), inputs=code_in, outputs=private_explanation)
|
238 |
+
submit_btn.click(fn=get_lime_explain('Reduction'), inputs=code_in, outputs=reduction_explanation)
|
239 |
submit_btn.click(fn=lime_title, inputs=code_in, outputs=explain_title)
|
240 |
sample_btn.click(fn=fill_code, inputs=drop, outputs=[pragma, code_in])
|
241 |
|
|
|
272 |
We train several transformer models, named PragFormer, for these tasks, and show that they outperform statistically-trained baselines and automatic S2S parallelization
|
273 |
compilers in both classifying the overall need for an OpenMP directive and the introduction of private and reduction clauses.
|
274 |
|
275 |
+

|
276 |
+
|
277 |
""")
|
278 |
|
279 |
|
280 |
|
|
|
281 |
pragformer_gui.launch()
|
282 |
|