Spaces:
Sleeping
Sleeping
File size: 2,639 Bytes
af22fb8 962932f af22fb8 1e73176 af22fb8 11a7529 1b18ff1 0dd7013 1b18ff1 af22fb8 b9a5fb3 e3cd87e b9a5fb3 34b9836 af22fb8 b9a5fb3 1b18ff1 b9a5fb3 af22fb8 34b9836 09eccc9 af22fb8 34b9836 af22fb8 34b9836 e6e676f af22fb8 34b9836 e6e676f 34b9836 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from transformers import T5ForConditionalGeneration, AutoTokenizer
import gradio as gr
# Load the model and tokenizer
model_name = "ejschwartz/hext5" # Replace with your desired model
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_input_length = 4096
examples = [
"void __fastcall __noreturn <func>(int a1, int a2, char a3, __int64 a4, __int64 a5)\n{\n __int64 v5; // rdi\n int v6; // ebx\n const char *v9; // rsi\n char *v10; // r12\n char *v11; // r13\n char *v12; // rax\n char v13[42]; // [rsp+Eh] [rbp-2Ah] BYREF\n\n v5 = (unsigned int)(a1 - 1);\n v6 = status;\n if ( (unsigned int)v5 <= 3 )\n {\n v9 = (&off_413A60)[v5];\n if ( a2 < 0 )\n {\n v13[0] = a3;\n v11 = v13;\n v10 = &asc_412691[-a2];\n v13[1] = 0;\n }\n else\n {\n v10 = \"--\";\n v11 = *(char **)(a4 + 32LL * a2);\n }\n v12 = dcgettext(0LL, v9, 5);\n error(v6, 0, v12, v10, v11, a5);\n abort();\n }\n abort();\n}\n"
]
# predict summary
def predict_summary(code):
global model
global tokenizer
input = tokenizer('summarize: '+code, return_tensors='pt', max_length=max_input_length, truncation=True)
output = model.generate(**input, max_new_tokens=256)[0]
return tokenizer.decode(output, skip_special_tokens=True)
# predict identifier (func name)
def predict_identifier(code):
global model
global tokenizer
'''
code should be like: "unsigned __int8 *__cdecl <func>(int *<var_0>,...){ return <func_1>(1);}"
'''
input = tokenizer('identifier_predict: '+code, return_tensors='pt', max_length=max_input_length, truncation=True)
output = model.generate(**input, max_new_tokens=10000)[0]
return tokenizer.decode(output)
# Create the Gradio interface for predicting identifiers
identifier_iface = gr.Interface(
fn=predict_identifier,
inputs="text",
outputs="text",
title="Predict Identifiers",
description="Enter a code snippet and see the model generate function identifiers.",
examples=examples
)
# Create the Gradio interface for predicting summaries
summary_iface = gr.Interface(
fn=predict_summary,
inputs="text",
outputs="text",
title="Predict Summary",
description="Enter a code snippet and see the model generate a summary.",
examples=examples
)
# Combine the interfaces into a single tabbed interface
combined_iface = gr.TabbedInterface(
interface_list=[identifier_iface, summary_iface],
tab_names=["Predict Identifiers", "Predict Summary"]
)
# Launch the combined interface
combined_iface.launch()
|