jorgemarcc commited on
Commit
cadb900
·
verified ·
1 Parent(s): 795c1ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -1,10 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- [Martinez-Gil2025] Martinez-Gil, J. (2025).
4
- Augmenting the Interpretability of GraphCodeBERT for Code Similarity Tasks.
5
- International Journal of Software Engineering and Knowledge Engineering, 35(05), 657-678.
6
-
7
- @author: Jorge Martinez-Gil
8
  """
9
 
10
  import numpy as np
@@ -16,11 +13,11 @@ import gradio as gr
16
  from io import BytesIO
17
  from PIL import Image
18
 
19
- # Load GraphCodeBERT model
20
- tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base")
21
- model = RobertaModel.from_pretrained("microsoft/graphcodebert-base")
22
 
23
- # Define sorting algorithms as strings
24
  sorting_algorithms = {
25
  "Bubble_Sort": """
26
  def bubble_sort(arr):
@@ -61,10 +58,8 @@ def merge_sort(arr):
61
  mid = len(arr) // 2
62
  L = arr[:mid]
63
  R = arr[mid:]
64
-
65
  merge_sort(L)
66
  merge_sort(R)
67
-
68
  i = j = k = 0
69
  while i < len(L) and j < len(R):
70
  if L[i] < R[j]:
@@ -74,12 +69,10 @@ def merge_sort(arr):
74
  arr[k] = R[j]
75
  j += 1
76
  k += 1
77
-
78
  while i < len(L):
79
  arr[k] = L[i]
80
  i += 1
81
  k += 1
82
-
83
  while j < len(R):
84
  arr[k] = R[j]
85
  j += 1
@@ -97,7 +90,6 @@ def partition(arr, low, high):
97
  arr[i], arr[j] = arr[j], arr[i]
98
  arr[i+1], arr[high] = arr[high], arr[i+1]
99
  return (i + 1)
100
-
101
  def quick_sort(arr, low, high):
102
  if low < high:
103
  pi = partition(arr, low, high)
@@ -116,7 +108,7 @@ def get_token_embeddings(code):
116
  tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze())
117
  return token_embeddings, tokens
118
 
119
- # Compare two algorithms and return PCA scatter plot
120
  def compare_algorithms(algo1_name, algo2_name):
121
  code1 = sorting_algorithms[algo1_name]
122
  code2 = sorting_algorithms[algo2_name]
@@ -155,3 +147,4 @@ interface = gr.Interface(
155
  if __name__ == "__main__":
156
  interface.launch()
157
 
 
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ Martinez-Gil, J. (2025). Augmenting the Interpretability of GraphCodeBERT for Code Similarity Tasks.
4
+ International Journal of Software Engineering and Knowledge Engineering, 35(05), 657–678.
 
 
 
5
  """
6
 
7
  import numpy as np
 
13
  from io import BytesIO
14
  from PIL import Image
15
 
16
+ # Load GraphCodeBERT from Hugging Face (with cache)
17
+ tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
18
+ model = RobertaModel.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
19
 
20
+ # Define sorting algorithms
21
  sorting_algorithms = {
22
  "Bubble_Sort": """
23
  def bubble_sort(arr):
 
58
  mid = len(arr) // 2
59
  L = arr[:mid]
60
  R = arr[mid:]
 
61
  merge_sort(L)
62
  merge_sort(R)
 
63
  i = j = k = 0
64
  while i < len(L) and j < len(R):
65
  if L[i] < R[j]:
 
69
  arr[k] = R[j]
70
  j += 1
71
  k += 1
 
72
  while i < len(L):
73
  arr[k] = L[i]
74
  i += 1
75
  k += 1
 
76
  while j < len(R):
77
  arr[k] = R[j]
78
  j += 1
 
90
  arr[i], arr[j] = arr[j], arr[i]
91
  arr[i+1], arr[high] = arr[high], arr[i+1]
92
  return (i + 1)
 
93
  def quick_sort(arr, low, high):
94
  if low < high:
95
  pi = partition(arr, low, high)
 
108
  tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze())
109
  return token_embeddings, tokens
110
 
111
+ # Plot comparison between two algorithms
112
  def compare_algorithms(algo1_name, algo2_name):
113
  code1 = sorting_algorithms[algo1_name]
114
  code2 = sorting_algorithms[algo2_name]
 
147
  if __name__ == "__main__":
148
  interface.launch()
149
 
150
+