|
from io import StringIO |
|
from typing import Optional |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
|
|
from utils.similarity import batch_cos_sim |
|
|
|
|
|
def read_data(filepath: str) -> Optional[pd.DataFrame]: |
|
if filepath.endswith('.xlsx'): |
|
df = pd.read_csv(filepath) |
|
elif filepath.endswith('.csv'): |
|
df = pd.read_csv(filepath) |
|
else: |
|
raise Exception('File type not supported') |
|
return df |
|
|
|
|
|
def process(model_name: str, |
|
text: str, |
|
file=None, |
|
): |
|
if file: |
|
df = read_data(file.name) |
|
elif text: |
|
string_io = StringIO(text) |
|
df = pd.read_csv(string_io) |
|
else: |
|
raise Exception('No input provided') |
|
df = batch_cos_sim(df, model_name) |
|
path = 'output.csv' |
|
df.to_csv(path, index=False, encoding='utf-8-sig') |
|
return str(df), path |
|
|
|
|
|
model_name_input = gr.components.Textbox( |
|
value='paraphrase-multilingual-MiniLM-L12-v2', |
|
lines=1, |
|
type='text' |
|
) |
|
|
|
model_name_option = gr.components.Dropdown( |
|
label='Model Name', |
|
value='paraphrase-multilingual-MiniLM-L12-v2', |
|
choices=[ |
|
'paraphrase-multilingual-MiniLM-L12-v2', |
|
'paraphrase-multilingual-mpnet-base-v2', |
|
'cyclone/simcse-chinese-roberta-wwm-ext' |
|
] |
|
) |
|
|
|
text_input = gr.components.Textbox( |
|
value='prompt,response\n', |
|
lines=10, |
|
type='text' |
|
) |
|
|
|
text_output = gr.components.Textbox( |
|
label='Output', |
|
type='text' |
|
) |
|
|
|
file_output = gr.components.File(label='Output File', |
|
file_count='single', |
|
file_types=['', '.', '.csv', '.xls', '.xlsx']) |
|
|
|
app = gr.Interface( |
|
fn=process, |
|
inputs=[model_name_option, text_input, 'file'], |
|
outputs=[text_output, file_output] |
|
) |
|
app.launch() |
|
|