Spaces:

Pragformer
/

PragFormer-demo

Build error

App Files Files Community

Pragformer commited on Jan 9, 2023

Commit

b473578

1 Parent(s): 1fda8b6

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -46

app.py CHANGED Viewed

@@ -17,13 +17,13 @@ pragformer_private = transformers.AutoModel.from_pretrained("Pragformer/PragForm
 pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
-#Event Listeners
 with_omp_str = 'Should contain a parallel work-sharing loop construct'
 without_omp_str = 'Should not contain a parallel work-sharing loop construct'
 name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
 tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
 with open('c_data.json', 'r') as f:
     data = json.load(f)
@@ -87,49 +87,61 @@ def is_reduction(code_txt):
   return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
-def predictor(texts):
-    tokenized = tokenizer.batch_encode_plus(
-        texts,
-        max_length = 150,
-        pad_to_max_length = True,
-        truncation = True
-    )
-    test_seq = torch.tensor(tokenized['input_ids'])
-    test_mask = torch.tensor(tokenized['attention_mask'])
-    test_y = torch.tensor([1]*len(texts))
-    test_data = TensorDataset(test_seq, test_mask, test_y)
-    test_sampler = SequentialSampler(test_seq)
-    test_dataloader = DataLoader(test_data, sampler = test_sampler, batch_size = len(texts))
-    total_probas = []
-    for step, batch in enumerate(test_dataloader):
-        sent_id, mask, labels = batch
-        outputs = pragformer(sent_id, mask)
-        probas = outputs.detach().numpy()
-        total_probas.extend(probas)
-    return torch.nn.Softmax(dim=1)(torch.tensor(probas)).numpy()
-def lime_explain(code_txt):
-  class_names = ['Without OpenMP', 'With OpenMP']
-  SAMPLES = 35
-  exp = []
-  if predict(code_txt)[0] == without_omp_str:
-      return gr.update(visible=False)
-  explainer = LimeTextExplainer(class_names=class_names, split_expression=r"\s+")
-  exp = explainer.explain_instance(code_txt, predictor, num_features=20, num_samples=SAMPLES)
-  return gr.update(visible=True, value=exp.as_pyplot_figure())
-def lime_title(code_txt):
-  if predict(code_txt)[0] == without_omp_str:
-      return gr.update(visible=False)
   return gr.update(visible=True)
 def activate_c(lang_pred):
   langs = lang_pred.split('\n')
@@ -149,7 +161,7 @@ def activate_button(lang_pred):
     return gr.update(visible=False)
   else:
     return gr.update(visible=True)
 def lang_predict(code_txt):
     res = {}
@@ -168,13 +180,14 @@ def lang_predict(code_txt):
 # Define GUI
 with gr.Blocks() as pragformer_gui:
     gr.Markdown(
         """
         # PragFormer Pragma Classifiction
-        Pragformer is a tool that analyzes C code to determine whether it would benefit from being placed in a parallel for construct and, if necessary, suggests
         the use of data-sharing attribute clauses (e.g. private and reduction) to improve performance. It also provides explainability through the use of LIME.
         """)
@@ -208,7 +221,9 @@ with gr.Blocks() as pragformer_gui:
             reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
         explain_title = gr.Markdown("## LIME Explainability", visible=False)
-        explanation = gr.Plot(visible=False)
     code_in.change(fn=lang_predict, inputs=code_in, outputs=[lang_pred])
@@ -218,7 +233,9 @@ with gr.Blocks() as pragformer_gui:
     submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
     submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
     submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)
-    submit_btn.click(fn=lime_explain, inputs=code_in, outputs=explanation)
     submit_btn.click(fn=lime_title, inputs=code_in, outputs=explain_title)
     sample_btn.click(fn=fill_code, inputs=drop, outputs=[pragma, code_in])
@@ -255,12 +272,11 @@ with gr.Blocks() as pragformer_gui:
     We train several transformer models, named PragFormer, for these tasks, and show that they outperform statistically-trained baselines and automatic S2S parallelization
     compilers in both classifying the overall need for an OpenMP directive and the introduction of private and reduction clauses.
-    ![](https://user-images.githubusercontent.com/104314626/165228036-d7fadd8d-768a-4e94-bd57-0a77e1330082.png)
     """)
 pragformer_gui.launch()

 pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
+# Event Listeners
 with_omp_str = 'Should contain a parallel work-sharing loop construct'
 without_omp_str = 'Should not contain a parallel work-sharing loop construct'
 name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
 tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
 with open('c_data.json', 'r') as f:
     data = json.load(f)
   return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
+def get_predictor(model):
+  def predictor(texts):
+      tokenized = tokenizer.batch_encode_plus(
+          texts,
+          max_length = 150,
+          pad_to_max_length = True,
+          truncation = True
+      )
+      test_seq = torch.tensor(tokenized['input_ids'])
+      test_mask = torch.tensor(tokenized['attention_mask'])
+      test_y = torch.tensor([1]*len(texts))
+      test_data = TensorDataset(test_seq, test_mask, test_y)
+      test_sampler = SequentialSampler(test_seq)
+      test_dataloader = DataLoader(test_data, sampler = test_sampler, batch_size = len(texts))
+      total_probas = []
+      for step, batch in enumerate(test_dataloader):
+          sent_id, mask, labels = batch
+          outputs = model(sent_id, mask)
+          probas = outputs.detach().numpy()
+          total_probas.extend(probas)
+      return torch.nn.Softmax(dim=1)(torch.tensor(probas)).numpy()
+  return predictor
+def get_lime_explain(filename):
+  def lime_explain(code_txt):
+    SAMPLES = 10 #40
+    exp = []
+    if filename == 'Loop':
+      model = pragformer
+      class_names = ['Without OpenMP', 'With OpenMP']
+    elif filename == 'Private':
+      model = pragformer_private
+      class_names = ['Without Private', 'With Private']
+    else:
+      model = pragformer_reduction
+      class_names = ['Without Reduction', 'With Reduction']
+    explainer = LimeTextExplainer(class_names=class_names, split_expression=r"\s+")
+    exp = explainer.explain_instance(code_txt, get_predictor(model), num_features=20, num_samples=SAMPLES)
+    exp.save_to_file(f'{filename.lower()}_explanation.html')
+    return gr.update(visible=True, value=f'{filename.lower()}_explanation.html')
+  return lime_explain
+def lime_title(code_txt):
   return gr.update(visible=True)
 def activate_c(lang_pred):
   langs = lang_pred.split('\n')
     return gr.update(visible=False)
   else:
     return gr.update(visible=True)
 def lang_predict(code_txt):
     res = {}
 # Define GUI
 with gr.Blocks() as pragformer_gui:
     gr.Markdown(
         """
         # PragFormer Pragma Classifiction
+        Pragformer is a tool that analyzes C code to determine whether it would benefit from being placed in a work-sharing loop construct and, if necessary, suggests
         the use of data-sharing attribute clauses (e.g. private and reduction) to improve performance. It also provides explainability through the use of LIME.
         """)
             reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
         explain_title = gr.Markdown("## LIME Explainability", visible=False)
+        loop_explanation = gr.File(label='Work-sharing loop construct prediction explanation', interactive=False, visible=False)
+        private_explanation = gr.File(label='Data-sharing attribute private prediction explanation', interactive=False, visible=False)
+        reduction_explanation = gr.File(label='Data-sharing attribute reduction prediction explanation', interactive=False, visible=False)
     code_in.change(fn=lang_predict, inputs=code_in, outputs=[lang_pred])
     submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
     submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
     submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)
+    submit_btn.click(fn=get_lime_explain('Loop'), inputs=code_in, outputs=loop_explanation)
+    submit_btn.click(fn=get_lime_explain('Private'), inputs=code_in, outputs=private_explanation)
+    submit_btn.click(fn=get_lime_explain('Reduction'), inputs=code_in, outputs=reduction_explanation)
     submit_btn.click(fn=lime_title, inputs=code_in, outputs=explain_title)
     sample_btn.click(fn=fill_code, inputs=drop, outputs=[pragma, code_in])
     We train several transformer models, named PragFormer, for these tasks, and show that they outperform statistically-trained baselines and automatic S2S parallelization
     compilers in both classifying the overall need for an OpenMP directive and the introduction of private and reduction clauses.
+    ![](https://user-images.githubusercontent.com/48416212/211221896-b4f50ec7-7d6e-47eb-b418-903cf9b31060.png)
     """)
 pragformer_gui.launch()