jorgemarcc commited on
Commit
5b7503b
·
verified ·
1 Parent(s): ea88e6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -75
app.py CHANGED
@@ -17,70 +17,21 @@ from PIL import Image
17
  tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
18
  model = RobertaModel.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
19
 
20
- # Define sorting algorithms
21
- sorting_algorithms = {
22
- "Bubble_Sort": """
23
- def bubble_sort(arr):
24
  n = len(arr)
25
  for i in range(n):
26
  for j in range(0, n-i-1):
27
  if arr[j] > arr[j+1]:
28
  arr[j], arr[j+1] = arr[j+1], arr[j]
29
- return arr
30
- """,
31
 
32
- "Selection_Sort": """
33
- def selection_sort(arr):
34
- for i in range(len(arr)):
35
- min_idx = i
36
- for j in range(i+1, len(arr)):
37
- if arr[j] < arr[min_idx]:
38
- min_idx = j
39
- arr[i], arr[min_idx] = arr[min_idx], arr[i]
40
- return arr
41
- """,
42
-
43
- "Insertion_Sort": """
44
- def insertion_sort(arr):
45
- for i in range(1, len(arr)):
46
- key = arr[i]
47
- j = i-1
48
- while j >= 0 and key < arr[j]:
49
- arr[j + 1] = arr[j]
50
- j -= 1
51
- arr[j + 1] = key
52
- return arr
53
- """,
54
-
55
- "Merge_Sort": """
56
- def merge_sort(arr):
57
- if len(arr) > 1:
58
- mid = len(arr) // 2
59
- L = arr[:mid]
60
- R = arr[mid:]
61
- merge_sort(L)
62
- merge_sort(R)
63
- i = j = k = 0
64
- while i < len(L) and j < len(R):
65
- if L[i] < R[j]:
66
- arr[k] = L[i]
67
- i += 1
68
- else:
69
- arr[k] = R[j]
70
- j += 1
71
- k += 1
72
- while i < len(L):
73
- arr[k] = L[i]
74
- i += 1
75
- k += 1
76
- while j < len(R):
77
- arr[k] = R[j]
78
- j += 1
79
- k += 1
80
- return arr
81
- """,
82
 
83
- "Quick_Sort": """
84
  def partition(arr, low, high):
85
  i = (low - 1)
86
  pivot = arr[high]
@@ -89,15 +40,7 @@ def partition(arr, low, high):
89
  i += 1
90
  arr[i], arr[j] = arr[j], arr[i]
91
  arr[i+1], arr[high] = arr[high], arr[i+1]
92
- return (i + 1)
93
- def quick_sort(arr, low, high):
94
- if low < high:
95
- pi = partition(arr, low, high)
96
- quick_sort(arr, low, pi - 1)
97
- quick_sort(arr, pi + 1, high)
98
- return arr
99
- """
100
- }
101
 
102
  # Get token embeddings for a code snippet
103
  def get_token_embeddings(code):
@@ -109,10 +52,7 @@ def get_token_embeddings(code):
109
  return token_embeddings, tokens
110
 
111
  # Plot comparison between two algorithms
112
- def compare_algorithms(algo1_name, algo2_name):
113
- code1 = sorting_algorithms[algo1_name]
114
- code2 = sorting_algorithms[algo2_name]
115
-
116
  emb1, tokens1 = get_token_embeddings(code1)
117
  emb2, tokens2 = get_token_embeddings(code2)
118
 
@@ -121,8 +61,8 @@ def compare_algorithms(algo1_name, algo2_name):
121
  coords = pca.fit_transform(combined)
122
 
123
  plt.figure(figsize=(6, 5), dpi=150)
124
- plt.scatter(coords[:len(tokens1), 0], coords[:len(tokens1), 1], color='red', label=algo1_name, s=20)
125
- plt.scatter(coords[len(tokens1):, 0], coords[len(tokens1):, 1], color='blue', label=algo2_name, s=20)
126
  plt.legend()
127
  plt.xticks([]); plt.yticks([]); plt.grid(False)
128
 
@@ -136,15 +76,16 @@ def compare_algorithms(algo1_name, algo2_name):
136
  interface = gr.Interface(
137
  fn=compare_algorithms,
138
  inputs=[
139
- gr.Dropdown(choices=list(sorting_algorithms.keys()), label="Algorithm 1"),
140
- gr.Dropdown(choices=list(sorting_algorithms.keys()), label="Algorithm 2")
141
  ],
142
  outputs=gr.Image(type="pil", label="Token Embedding PCA"),
143
  title="GraphCodeBERT Token Embedding Comparison",
144
- description="Visual comparison of token-level embeddings from GraphCodeBERT for classical sorting algorithms."
145
  )
146
 
147
  if __name__ == "__main__":
148
  interface.launch()
149
 
150
 
 
 
17
  tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
18
  model = RobertaModel.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
19
 
20
+ # Default sorting algorithm code snippets
21
+ default_code_1 = """def bubble_sort(arr):
 
 
22
  n = len(arr)
23
  for i in range(n):
24
  for j in range(0, n-i-1):
25
  if arr[j] > arr[j+1]:
26
  arr[j], arr[j+1] = arr[j+1], arr[j]
27
+ return arr"""
 
28
 
29
+ default_code_2 = """def quick_sort(arr, low, high):
30
+ if low < high:
31
+ pi = partition(arr, low, high)
32
+ quick_sort(arr, low, pi - 1)
33
+ quick_sort(arr, pi + 1, high)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
 
35
  def partition(arr, low, high):
36
  i = (low - 1)
37
  pivot = arr[high]
 
40
  i += 1
41
  arr[i], arr[j] = arr[j], arr[i]
42
  arr[i+1], arr[high] = arr[high], arr[i+1]
43
+ return (i + 1)"""
 
 
 
 
 
 
 
 
44
 
45
  # Get token embeddings for a code snippet
46
  def get_token_embeddings(code):
 
52
  return token_embeddings, tokens
53
 
54
  # Plot comparison between two algorithms
55
+ def compare_algorithms(code1, code2):
 
 
 
56
  emb1, tokens1 = get_token_embeddings(code1)
57
  emb2, tokens2 = get_token_embeddings(code2)
58
 
 
61
  coords = pca.fit_transform(combined)
62
 
63
  plt.figure(figsize=(6, 5), dpi=150)
64
+ plt.scatter(coords[:len(tokens1), 0], coords[:len(tokens1), 1], color='red', label="Code 1", s=20)
65
+ plt.scatter(coords[len(tokens1):, 0], coords[len(tokens1):, 1], color='blue', label="Code 2", s=20)
66
  plt.legend()
67
  plt.xticks([]); plt.yticks([]); plt.grid(False)
68
 
 
76
  interface = gr.Interface(
77
  fn=compare_algorithms,
78
  inputs=[
79
+ gr.Textbox(lines=15, label="Code 1", value=default_code_1, language="python"),
80
+ gr.Textbox(lines=15, label="Code 2", value=default_code_2, language="python")
81
  ],
82
  outputs=gr.Image(type="pil", label="Token Embedding PCA"),
83
  title="GraphCodeBERT Token Embedding Comparison",
84
+ description="Edit or paste two Python code snippets. This tool compares their token-level embeddings using GraphCodeBERT and PCA."
85
  )
86
 
87
  if __name__ == "__main__":
88
  interface.launch()
89
 
90
 
91
+