File size: 2,639 Bytes
af22fb8
962932f
 
af22fb8
 
1e73176
 
af22fb8
11a7529
 
1b18ff1
0dd7013
1b18ff1
af22fb8
 
b9a5fb3
e3cd87e
b9a5fb3
34b9836
 
 
af22fb8
 
b9a5fb3
1b18ff1
b9a5fb3
af22fb8
 
 
34b9836
09eccc9
af22fb8
 
34b9836
 
af22fb8
 
 
34b9836
 
e6e676f
af22fb8
 
34b9836
 
 
 
 
 
 
e6e676f
34b9836
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from transformers import T5ForConditionalGeneration, AutoTokenizer
import gradio as gr

# Load the model and tokenizer
model_name = "ejschwartz/hext5"  # Replace with your desired model
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

max_input_length = 4096

examples = [
   "void __fastcall __noreturn <func>(int a1, int a2, char a3, __int64 a4, __int64 a5)\n{\n  __int64 v5; // rdi\n  int v6; // ebx\n  const char *v9; // rsi\n  char *v10; // r12\n  char *v11; // r13\n  char *v12; // rax\n  char v13[42]; // [rsp+Eh] [rbp-2Ah] BYREF\n\n  v5 = (unsigned int)(a1 - 1);\n  v6 = status;\n  if ( (unsigned int)v5 <= 3 )\n  {\n    v9 = (&off_413A60)[v5];\n    if ( a2 < 0 )\n    {\n      v13[0] = a3;\n      v11 = v13;\n      v10 = &asc_412691[-a2];\n      v13[1] = 0;\n    }\n    else\n    {\n      v10 = \"--\";\n      v11 = *(char **)(a4 + 32LL * a2);\n    }\n    v12 = dcgettext(0LL, v9, 5);\n    error(v6, 0, v12, v10, v11, a5);\n    abort();\n  }\n  abort();\n}\n"
]

# predict summary
def predict_summary(code):
    global model
    global tokenizer
    input = tokenizer('summarize: '+code, return_tensors='pt', max_length=max_input_length, truncation=True)
    output = model.generate(**input, max_new_tokens=256)[0]
    return tokenizer.decode(output, skip_special_tokens=True)

# predict identifier (func name)
def predict_identifier(code):
    global model
    global tokenizer
    '''
    code should be like: "unsigned __int8 *__cdecl <func>(int *<var_0>,...){ return <func_1>(1);}"
    '''
    input = tokenizer('identifier_predict: '+code, return_tensors='pt', max_length=max_input_length, truncation=True)
    output = model.generate(**input, max_new_tokens=10000)[0]
    return tokenizer.decode(output)

# Create the Gradio interface for predicting identifiers
identifier_iface = gr.Interface(
    fn=predict_identifier, 
    inputs="text", 
    outputs="text",
    title="Predict Identifiers",
    description="Enter a code snippet and see the model generate function identifiers.",
    examples=examples
)

# Create the Gradio interface for predicting summaries
summary_iface = gr.Interface(
    fn=predict_summary, 
    inputs="text", 
    outputs="text",
    title="Predict Summary",
    description="Enter a code snippet and see the model generate a summary.",
    examples=examples
)

# Combine the interfaces into a single tabbed interface
combined_iface = gr.TabbedInterface(
    interface_list=[identifier_iface, summary_iface],
    tab_names=["Predict Identifiers", "Predict Summary"]
)

# Launch the combined interface
combined_iface.launch()