nouf-sst commited on
Commit
17cde60
·
1 Parent(s): d570802

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +619 -0
app.py ADDED
@@ -0,0 +1,619 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ import json
4
+ import nltk
5
+ import stanza
6
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline
7
+ from sentence_transformers import CrossEncoder
8
+ from autocorrect import Speller
9
+ from transformers import BertTokenizer, BertForSequenceClassification
10
+ import torch
11
+ from torch.nn.utils.rnn import pad_sequence
12
+ import numpy as np
13
+ from stanza.server import CoreNLPClient
14
+
15
+ # ********************* Setting up Stanford CoreNLP *********************
16
+
17
+ # Download the Stanford CoreNLP package with Stanza's installation command
18
+ # This'll take several minutes, depending on the network speed
19
+ corenlp_dir = './corenlp'
20
+ stanza.install_corenlp(dir=corenlp_dir)
21
+
22
+ # Set the CORENLP_HOME environment variable to point to the installation location
23
+ import os
24
+ os.environ["CORENLP_HOME"] = corenlp_dir
25
+
26
+ # Construct a CoreNLPClient with some basic annotators, a memory allocation of 4GB, and port number 9001
27
+ client = CoreNLPClient(
28
+ annotators=['tokenize','ssplit', 'pos', 'lemma', 'ner', 'parse'],
29
+ memory='4G',
30
+ endpoint='http://localhost:9001',
31
+ be_quiet=True)
32
+ print(client)
33
+
34
+ # Start the background server and wait for some time
35
+ # Note that in practice this is totally optional, as by default the server will be started when the first annotation is performed
36
+ client.start()
37
+ #import time; time.sleep(10)
38
+
39
+ # ************************************************************************
40
+
41
+
42
+ # ***************************** TGRL Parsing *****************************
43
+
44
+ def parse_tgrl(file_obj):
45
+
46
+ with open(file_obj.name, 'r') as f:
47
+ tgrl_text = f.read()
48
+ tgrl_text = tgrl_text.replace('\t', '')
49
+ tgrl_text = tgrl_text.replace('\n', '')
50
+
51
+ return tgrl_text
52
+
53
+ def extract_elements(tgrl_text):
54
+
55
+ # Extract actors
56
+ actors = re.findall("(?:.*?actor\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s-]*)(?:\")", tgrl_text)
57
+ # Extract goals
58
+ goals = re.findall("(?:.*?goal\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s]*)(?:\")", tgrl_text)
59
+ # Extract softGoals
60
+ softGoals = re.findall("(?:.*?softGoal\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s]*)(?:\")", tgrl_text)
61
+ # Extract tasks
62
+ tasks = re.findall("(?:.*?task\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s]*)(?:\")", tgrl_text)
63
+ # Extract resources
64
+ resources = re.findall("(?:.*?resource\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s]*)(?:\")", tgrl_text)
65
+
66
+ elements = {
67
+ "actors": actors,
68
+ "goals": goals,
69
+ "softGoals": softGoals,
70
+ "tasks": tasks,
71
+ "resources": resources
72
+ }
73
+
74
+ # get elements per actor
75
+ elements_per_actor = {}
76
+
77
+ for goal in goals:
78
+ corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(goal))
79
+ corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1]
80
+ if corresponding_actor not in elements_per_actor:
81
+ elements_per_actor[corresponding_actor] = []
82
+ elements_per_actor[corresponding_actor].append(goal)
83
+
84
+ for softGoal in softGoals:
85
+ corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(softGoal))
86
+ corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1]
87
+ if corresponding_actor not in elements_per_actor:
88
+ elements_per_actor[corresponding_actor] = []
89
+ elements_per_actor[corresponding_actor].append(softGoal)
90
+
91
+ for task in tasks:
92
+ corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(task))
93
+ corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1]
94
+ if corresponding_actor not in elements_per_actor:
95
+ elements_per_actor[corresponding_actor] = []
96
+ elements_per_actor[corresponding_actor].append(task)
97
+
98
+ # get decomposed elements
99
+
100
+ new_lines = tgrl_text
101
+ decomposed_elements = {}
102
+
103
+ main_elements = re.findall("\w+(?=\s+decomposedBy)", new_lines)
104
+
105
+ for main_element in main_elements:
106
+
107
+ sub_elements = []
108
+
109
+ sub_element = (re.findall(main_element+"(?: decomposedBy )([A-Za-z\s]*)", new_lines)[0])
110
+ sub_elements.append(sub_element)
111
+ new_lines = new_lines.replace(sub_element+', ', '')
112
+
113
+ temp = main_element + " decomposedBy "
114
+ for idx, sub_element in enumerate(sub_elements):
115
+ if idx+1 == len (sub_elements):
116
+ temp = temp + sub_element + ";"
117
+ else:
118
+ temp = temp + sub_element + ", "
119
+
120
+ while temp not in tgrl_text:
121
+
122
+ sub_element = (re.findall(main_element+"(?: decomposedBy )([A-Za-z\s]*)", new_lines)[0])
123
+ sub_elements.append(sub_element)
124
+ new_lines = new_lines.replace(sub_element+', ', '')
125
+
126
+ temp = main_element + " decomposedBy "
127
+ for idx, sub_element in enumerate(sub_elements):
128
+ if idx+1 == len (sub_elements):
129
+ temp = temp + sub_element + ";"
130
+ else:
131
+ temp = temp + sub_element + ", "
132
+
133
+ decomposed_elements[main_element] = sub_elements
134
+
135
+ # Replace elements IDs with names
136
+ new_decomposed_elements = {}
137
+
138
+ for key, _ in decomposed_elements.items():
139
+
140
+ new_key = re.findall("(?:"+key+" {\s*name\s=\s\")([A-Za-z\s]*)", tgrl_text)[0]
141
+ new_values = []
142
+
143
+ for element in decomposed_elements[key]:
144
+ new_value = re.findall("(?:"+element+" {\s*name\s=\s\")([A-Za-z\s]*)", tgrl_text)[0]
145
+ new_values.append(new_value)
146
+
147
+ new_decomposed_elements[new_key] = new_values
148
+
149
+ return elements, elements_per_actor, new_decomposed_elements
150
+
151
+ # ************************************************************************
152
+
153
+ # ************************* Bad Smells Detection *************************
154
+
155
+ # ########### Long Elements ###########
156
+ def get_long_elements(elements): # Using RegEx
157
+
158
+ long_elements = []
159
+
160
+ for key, value in elements.items():
161
+ for i in range(0, len(elements[key])):
162
+ if len(re. findall(r'\w+', elements[key][i])) > 4:
163
+ long_elements.append(elements[key][i])
164
+
165
+ if long_elements:
166
+ long_elements = "\n".join(long_elements)
167
+ return "Long elements:\n" + long_elements
168
+ else:
169
+ return "Long elements:\nNone."
170
+ # #####################################
171
+
172
+ # ######### Complex Sentences #########
173
+ # Complex sentences
174
+
175
+ def get_verb_phrases(t):
176
+ verb_phrases = []
177
+ num_children = len(t)
178
+ num_VP = sum(1 if t[i].label() == "VP" else 0 for i in range(0, num_children))
179
+
180
+ if t.label() != "VP":
181
+ for i in range(0, num_children):
182
+ if t[i].height() > 2:
183
+ verb_phrases.extend(get_verb_phrases(t[i]))
184
+ elif t.label() == "VP" and num_VP > 1:
185
+ for i in range(0, num_children):
186
+ if t[i].label() == "VP":
187
+ if t[i].height() > 2:
188
+ verb_phrases.extend(get_verb_phrases(t[i]))
189
+ else:
190
+ verb_phrases.append(' '.join(t.leaves()))
191
+
192
+ return verb_phrases
193
+
194
+ def get_pos(t):
195
+ vp_pos = []
196
+ sub_conj_pos = []
197
+ num_children = len(t)
198
+ children = [t[i].label() for i in range(0,num_children)]
199
+
200
+ flag = re.search(r"(S|SBAR|SBARQ|SINV|SQ)", ' '.join(children))
201
+
202
+ if "VP" in children and not flag:
203
+ for i in range(0, num_children):
204
+ if t[i].label() == "VP":
205
+ vp_pos.append(t[i].treeposition())
206
+ elif not "VP" in children and not flag:
207
+ for i in range(0, num_children):
208
+ if t[i].height() > 2:
209
+ temp1,temp2 = get_pos(t[i])
210
+ vp_pos.extend(temp1)
211
+ sub_conj_pos.extend(temp2)
212
+ # comment this "else" part, if want to include subordinating conjunctions
213
+ else:
214
+ for i in range(0, num_children):
215
+ if t[i].label() in ["S","SBAR","SBARQ","SINV","SQ"]:
216
+ temp1, temp2 = get_pos(t[i])
217
+ vp_pos.extend(temp1)
218
+ sub_conj_pos.extend(temp2)
219
+ else:
220
+ sub_conj_pos.append(t[i].treeposition())
221
+
222
+ return (vp_pos,sub_conj_pos)
223
+
224
+ # get all clauses
225
+ def get_clause_list(sent):
226
+
227
+ parser = client.annotate(sent, properties={"annotators":"parse","outputFormat": "json"})
228
+ sent_tree = nltk.tree.ParentedTree.fromstring(parser["sentences"][0]["parse"])
229
+ #print(sent_tree)
230
+ clause_level_list = ["S","SBAR","SBARQ","SINV","SQ"]
231
+ clause_list = []
232
+ sub_trees = []
233
+ #sent_tree.pretty_print()
234
+
235
+ # break the tree into subtrees of clauses using
236
+ # clause levels "S","SBAR","SBARQ","SINV","SQ"
237
+ for sub_tree in reversed(list(sent_tree.subtrees())):
238
+ if sub_tree.label() in clause_level_list:
239
+ if sub_tree.parent().label() in clause_level_list:
240
+ continue
241
+
242
+ if (len(sub_tree) == 1 and sub_tree.label() == "S" and sub_tree[0].label() == "VP"
243
+ and not sub_tree.parent().label() in clause_level_list):
244
+ continue
245
+
246
+ sub_trees.append(sub_tree)
247
+ del sent_tree[sub_tree.treeposition()]
248
+
249
+ #print(sub_trees)
250
+
251
+ # for each clause level subtree, extract relevant simple sentence
252
+ for t in sub_trees:
253
+ # get verb phrases from the new modified tree
254
+ verb_phrases = get_verb_phrases(t)
255
+ #print(verb_phrases)
256
+
257
+ # get tree without verb phrases (mainly subject)
258
+ # remove subordinating conjunctions
259
+ vp_pos,sub_conj_pos = get_pos(t)
260
+ for i in vp_pos:
261
+ del t[i]
262
+ for i in sub_conj_pos:
263
+ del t[i]
264
+
265
+ subject_phrase = ' '.join(t.leaves())
266
+
267
+ # update the clause_list
268
+ for i in verb_phrases:
269
+ clause_list.append(subject_phrase + " " + i)
270
+
271
+ return clause_list
272
+
273
+ def get_complex_sentences(elements):
274
+
275
+ complex_sentences = []
276
+
277
+ for key, value in elements.items():
278
+ for i in range(0, len(elements[key])):
279
+ if len(get_clause_list(re.sub(r"(\.|,|\?|\(|\)|\[|\])"," ", elements[key][i]))) > 1:
280
+ complex_sentences.append(elements[key][i])
281
+
282
+ if complex_sentences:
283
+ complex_sentences = "\n".join(complex_sentences)
284
+ return "Complex sentences:\n" + complex_sentences
285
+ else:
286
+ return "Complex sentences:\nNone."
287
+ # #################################
288
+
289
+ # ########## Punctuations #########
290
+ def get_punctuations(elements):
291
+
292
+ punctuations = []
293
+
294
+ for key, value in elements.items():
295
+ for i in range(0, len(elements[key])):
296
+ if len(re.findall("[^\s\w\d-]", elements[key][i])) > 0:
297
+ punctuations.append(elements[key][i])
298
+
299
+ if punctuations:
300
+ punctuations = "\n".join(punctuations)
301
+ return "Punctuations:\n" + punctuations
302
+ else:
303
+ return "Punctuations:\nNone."
304
+ # #################################
305
+
306
+ # ########## Incorrect Actor Syntax ##########
307
+ def find_non_NPs(sentences):
308
+
309
+ model_name = "QCRI/bert-base-multilingual-cased-pos-english"
310
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
311
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
312
+
313
+ pipeline = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
314
+
315
+ outputs = pipeline(sentences)
316
+
317
+ Non_NPs = []
318
+
319
+ for idx, output in enumerate(outputs):
320
+ if not output[0]['entity'].startswith('N'):
321
+ Non_NPs.append(sentences[idx])
322
+
323
+ return Non_NPs
324
+
325
+ def check_actor_syntax(actors):
326
+
327
+ incorrect_actor_syntax = find_non_NPs(actors)
328
+
329
+ if incorrect_actor_syntax:
330
+ incorrect_actor_syntax = "\n".join(incorrect_actor_syntax)
331
+ return "Incorrect Actors Syntax:\n" + incorrect_actor_syntax
332
+ else:
333
+ return "All actors are syntactically correct."
334
+ # ############################################
335
+
336
+ # ########## Incorrect Goal Syntax ###########
337
+ def check_goal_syntax(goals):
338
+
339
+ incorrect_goal_syntax = find_non_NPs(goals)
340
+
341
+ if incorrect_goal_syntax:
342
+ incorrect_goal_syntax = "\n".join(incorrect_goal_syntax)
343
+ return "Incorrect Goals Syntax:\n" + incorrect_goal_syntax
344
+ else:
345
+ return "All goals are syntactically correct."
346
+ # ############################################
347
+
348
+ # ########## Incorrect Softgoal Syntax ###########
349
+ def check_softgoal_syntax(softgoals):
350
+
351
+ incorrect_softgoal_syntax = find_non_NPs(softgoals)
352
+
353
+ if incorrect_softgoal_syntax:
354
+ incorrect_softgoal_syntax = "\n".join(incorrect_softgoal_syntax)
355
+ return "Incorrect Softgoals Syntax:\n" + incorrect_softgoal_syntax
356
+ else:
357
+ return "All softgoal are syntactically correct."
358
+ # ############################################
359
+
360
+ # ########## Incorrect Task Syntax ###########
361
+ def find_non_VPs(sentences):
362
+
363
+ model_name = "QCRI/bert-base-multilingual-cased-pos-english"
364
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
365
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
366
+
367
+ pipeline = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
368
+
369
+ outputs = pipeline(sentences)
370
+
371
+ Non_VPs = []
372
+
373
+ for idx, output in enumerate(outputs):
374
+ if not output[0]['entity'].startswith('V'):
375
+ Non_VPs.append(sentences[idx])
376
+
377
+ return Non_VPs
378
+
379
+ def check_task_syntax(tasks):
380
+
381
+ incorrect_task_syntax = find_non_VPs(tasks)
382
+
383
+ if incorrect_task_syntax:
384
+ incorrect_task_syntax = "\n".join(incorrect_task_syntax)
385
+ return "Incorrect Tasks Syntax:\n" + incorrect_task_syntax
386
+ else:
387
+ return "All tasks are syntactically correct.""
388
+ # ############################################
389
+
390
+ # ########## Similarity ###########
391
+ def get_similar_elements(elements_per_actor):
392
+
393
+ # Load the pre-trained model
394
+ model = CrossEncoder('cross-encoder/stsb-roberta-base')
395
+
396
+ # Prepare sentence pair array
397
+ sentence_pairs = []
398
+
399
+ for key, value in elements_per_actor.items():
400
+
401
+ for i in range(len(elements_per_actor[key])):
402
+ for j in range(i+1,len(elements_per_actor[key])):
403
+ sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]])
404
+
405
+ # Predict semantic similarity
406
+ semantic_similarity_scores = model.predict(sentence_pairs, show_progress_bar=True)
407
+
408
+ similar_elements = []
409
+ for index, value in enumerate(sentence_pairs):
410
+ if semantic_similarity_scores[index] > 0.5:
411
+ similar_elements.append(value)
412
+ #semantic_similarity["pair_"+str(index+1)] = [value,semantic_similarity_scores[index]]
413
+
414
+ if similar_elements:
415
+ similar_elements = [' and '.join(ele) for ele in similar_elements]
416
+ similar_elements = "\n".join(similar_elements)
417
+ return "The following elements are semantically similar:\n" + similar_elements
418
+ else:
419
+ return "There are no similar elements."
420
+
421
+ return semantic_similarity
422
+ # #################################
423
+
424
+ # ########## Misspelling ###########
425
+ def get_misspelled_words(sentence):
426
+
427
+ spell = Speller(only_replacements=True)
428
+
429
+ misspelled= []
430
+
431
+ for word in sentence.split():
432
+ correct_word = spell(word)
433
+ if word != correct_word:
434
+ misspelled.append([word, correct_word])
435
+
436
+ return misspelled
437
+
438
+ def check_spelling(elements):
439
+
440
+ spelling_mistakes = []
441
+ spelling_mistakes_string = ""
442
+
443
+ for key, value in elements.items():
444
+ for i in range(0, len(elements[key])):
445
+ if get_misspelled_words(elements[key][i]):
446
+ spelling_mistakes.append([elements[key][i], get_misspelled_words(elements[key][i])])
447
+
448
+ for idx, element in enumerate(spelling_mistakes):
449
+ for spelling_mistake in element[1]:
450
+ temp = ' should be written as '.join(spelling_mistake)
451
+ spelling_mistakes_string = spelling_mistakes_string + "\n" + element[0] + ": " + temp
452
+
453
+ return spelling_mistakes_string
454
+ # ##################################
455
+
456
+ # ########## NLI ###########
457
+ def do_nli(premise, hypothesis, model, tokenizer):
458
+
459
+ # Tokenization
460
+ token_ids = []
461
+ seg_ids = []
462
+ mask_ids = []
463
+
464
+ premise_id = tokenizer.encode(premise, add_special_tokens = False)
465
+ hypothesis_id = tokenizer.encode(hypothesis, add_special_tokens = False)
466
+ pair_token_ids = [tokenizer.cls_token_id] + premise_id + [tokenizer.sep_token_id] + hypothesis_id + [tokenizer.sep_token_id]
467
+ premise_len = len(premise_id)
468
+ hypothesis_len = len(hypothesis_id)
469
+
470
+ segment_ids = torch.tensor([0] * (premise_len + 2) + [1] * (hypothesis_len + 1)) # sentence 0 and sentence 1
471
+ attention_mask_ids = torch.tensor([1] * (premise_len + hypothesis_len + 3)) # mask padded values
472
+
473
+ token_ids.append(torch.tensor(pair_token_ids))
474
+ seg_ids.append(segment_ids)
475
+ mask_ids.append(attention_mask_ids)
476
+
477
+ # Forward pass
478
+ token_ids = pad_sequence(token_ids, batch_first=True)
479
+ mask_ids = pad_sequence(mask_ids, batch_first=True)
480
+ seg_ids = pad_sequence(seg_ids, batch_first=True)
481
+
482
+ with torch.no_grad():
483
+ output = model(token_ids,
484
+ token_type_ids=seg_ids,
485
+ attention_mask=mask_ids)
486
+
487
+ # Output predication
488
+ result = ""
489
+ prediction = np.argmax(output.logits.cpu().numpy()).flatten().item()
490
+ if prediction == 0:
491
+ result = "Entailment"
492
+ #print("Entailment")
493
+ elif prediction == 1:
494
+ result = "Contradiction"
495
+ #print("Contradiction")
496
+ elif prediction == 2:
497
+ result = "Neutral"
498
+ #print("Neutral")
499
+
500
+ return result
501
+
502
+ # Entailment
503
+ def check_entailment(decomposed_elements):
504
+
505
+ model = BertForSequenceClassification.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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")
506
+ tokenizer = BertTokenizer.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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", do_lower_case=True)
507
+
508
+ sentence_pairs = []
509
+ non_matching_elements = []
510
+
511
+ for key, value in decomposed_elements.items():
512
+ #print(key, value)
513
+ for i in decomposed_elements[key]:
514
+ #print(key, i)
515
+ sentence_pairs.append([key, i])
516
+
517
+ for sentence_pair in sentence_pairs:
518
+ result = do_nli(sentence_pair[0], sentence_pair[1], model, tokenizer)
519
+ print(result)
520
+ if result != "Entailment":
521
+ non_matching_elements.append(sentence_pair)
522
+
523
+ if non_matching_elements:
524
+ non_matching_elements = [' and '.join(ele) for ele in non_matching_elements]
525
+ non_matching_elements = "\n".join(non_matching_elements)
526
+ return "The following elements are miss matching:\n" + non_matching_elements
527
+ else:
528
+ return "There are no miss matched elements."
529
+
530
+ return result
531
+
532
+ # Contradiction
533
+ def check_contradiction(elements_per_actor):
534
+
535
+ model = BertForSequenceClassification.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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")
536
+ tokenizer = BertTokenizer.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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", do_lower_case=True)
537
+
538
+ sentence_pairs = []
539
+ contradicting_elements = []
540
+
541
+ for key, value in elements_per_actor.items():
542
+
543
+ for i in range(len(elements_per_actor[key])):
544
+ for j in range(i+1,len(elements_per_actor[key])):
545
+ sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]])
546
+
547
+ #print(sentence_pairs)
548
+ # Check contradiction
549
+ for sentence_pair in sentence_pairs:
550
+ result = do_nli(sentence_pair[0], sentence_pair[1], model, tokenizer)
551
+ #print(result)
552
+ if result == "Contradiction":
553
+ contradicting_elements.append(sentence_pair)
554
+
555
+ if contradicting_elements:
556
+ contradicting_elements = [' and '.join(ele) for ele in contradicting_elements]
557
+ contradicting_elements = "\n".join(contradicting_elements)
558
+ return "The following elements are contradicting:\n" + contradicting_elements
559
+ else:
560
+ return "There are no contradicting elements."
561
+ # ##########################
562
+
563
+ # ************************* User Interface *************************
564
+
565
+ def identify_bad_smells(tgrl_file, selected_bad_smells):
566
+
567
+ output = ""
568
+
569
+ tgrl_text = parse_tgrl(tgrl_file)
570
+
571
+ elements, elements_per_actor, decomposed_elements = extract_elements(tgrl_text)
572
+
573
+ if 'Size' in selected_bad_smells:
574
+ output = output + get_long_elements(elements) + "\n\n"
575
+
576
+ if 'Complexity' in selected_bad_smells:
577
+ output = output + get_complex_sentences(elements) + "\n\n"
578
+
579
+ if 'Punctuations' in selected_bad_smells:
580
+ output = output + get_punctuations(elements) + "\n\n"
581
+
582
+ if 'Actors Syntax' in selected_bad_smells:
583
+ output = output + check_actor_syntax(elements['actors']) + "\n\n"
584
+
585
+ if 'Goals Syntax' in selected_bad_smells:
586
+ output = output + check_goal_syntax(elements['goals']) + "\n\n"
587
+
588
+ if 'Softgoals Syntax' in selected_bad_smells:
589
+ output = output + check_softgoal_syntax(elements['softGoals']) + "\n\n"
590
+
591
+ if 'Tasks Syntax' in selected_bad_smells:
592
+ output = output + check_task_syntax(elements['tasks']) + "\n\n"
593
+
594
+ if 'Similar Elements' in selected_bad_smells:
595
+ output = output + get_similar_elements(elements_per_actor) + "\n\n"
596
+
597
+ if 'Spelling Mistakes' in selected_bad_smells:
598
+ output = output + check_spelling(elements) + "\n\n"
599
+
600
+ if 'Goal-Subgoal Mismatch' in selected_bad_smells:
601
+ output = output + check_entailment(decomposed_elements) + "\n\n"
602
+
603
+ if 'Contradicting Elements' in selected_bad_smells:
604
+ output = output + check_contradiction(elements_per_actor) + "\n\n"
605
+
606
+
607
+ return output
608
+
609
+
610
+ interface = gr.Interface(fn = identify_bad_smells,
611
+ inputs = [gr.File(label="TGRL File"),
612
+ gr.CheckboxGroup(["Size", "Complexity", "Punctuations", "Actors Syntax", "Goals Syntax", "Softgoals Syntax", "Tasks Syntax", "Similar Elements", "Spelling Mistakes", "Goal-Subgoal Mismatch", "Contradicting Elements"],
613
+ label="Which bad smells you want to detect?")],
614
+ outputs = ["text"],
615
+ title = "TGRL Bad Smells Detection",
616
+ description = "Upload your .xgrl file and we will find the bad smells for you!")
617
+
618
+ interface.launch(inline = False)
619
+ #interface.launch()