Spaces:
Running
Running
File size: 1,515 Bytes
98cc895 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import nltk
nltk.download('punkt')
import pandas as pd
import gradio as gr
from nltk import sent_tokenize
from transformers import pipeline
detector = pipeline(task='text-classification', model='yikang0131/argugpt-detector-sent')
def predict_doc(doc):
sents = sent_tokenize(doc)
data = {'sentence': [], 'label': [], 'score': []}
res = []
for sent in sents:
label, score = predict_one_sent(sent)
data['sentence'].append(sent)
data['score'].append(score)
if label == 'LABEL_0':
res.append((sent, 'Human'))
data['label'].append('Human')
else:
res.append((sent, 'Machine'))
data['label'].append('Machine')
df = pd.DataFrame(data)
df.to_csv('result.csv')
return res, df, 'result.csv'
def predict_one_sent(sent):
res = detector(sent)[0]
return res['label'], res['score']
iface = gr.Interface(
fn=predict_doc,
inputs=[
gr.Textbox(
label='Essay input',
info="Please enter essay in the textbox",
lines=5
)
],
outputs=[
gr.HighlightedText(
label='Labeled Result',
show_legend=True
).style(color_map={'Machine': 'red', 'Human': 'green'}),
gr.DataFrame(
label='Table with Probability Score',
max_rows=10
),
gr.File(
label='CSV file storing data with all sentences'
)
],
theme=gr.themes.Base()
)
iface.launch() |