Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
st.set_page_config(page_title="Turkish Review Analysis - via AG", page_icon='📖') | |
st.header("📖Review Analysis for Your File - TR") | |
with st.sidebar: | |
hf_key = st.text_input("HuggingFace Access Key", key="hf_key", type="password") | |
MODEL_HOTEL = { | |
"albert": "anilguven/albert_tr_turkish_hotel_reviews", # Add the emoji for the Meta-Llama model | |
"distilbert": "anilguven/distilbert_tr_turkish_hotel_reviews", | |
"bert": "anilguven/bert_tr_turkish_hotel_reviews", | |
"electra": "anilguven/electra_tr_turkish_hotel_reviews", | |
} | |
MODEL_MOVIE = { | |
"albert": "anilguven/albert_tr_turkish_movie_reviews", # Add the emoji for the Meta-Llama model | |
"distilbert": "anilguven/distilbert_tr_turkish_movie_reviews", | |
"bert": "anilguven/bert_tr_turkish_movie_reviews", | |
"electra": "anilguven/electra_tr_turkish_movie_reviews", | |
} | |
MODELS = ["albert","distilbert","bert","electra"] | |
MODEL_TASK = ["Movie review analysis","Hotel review analysis"] | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
# Create a mapping from formatted model names to their original identifiers | |
def format_model_name(model_key): | |
name_parts = model_key | |
formatted_name = ''.join(name_parts) # Join them into a single string with title case | |
return formatted_name | |
formatted_names_to_identifiers = { | |
format_model_name(key): key for key in MODEL_HOTEL.keys() | |
} | |
# Debug to ensure names are formatted correctly | |
#st.write("Formatted Model Names to Identifiers:", formatted_names_to_identifiers | |
with st.expander("About this app"): | |
st.write(f""" | |
1-Upload your file as txt or csv file. Each file contains one sample in the each row.\n | |
2-Choose your task (movie or hotel review) | |
3-Choose your model according to your task analysis (negative or positive).\n | |
4-And model predict your text files. \n | |
5-Download your test results. | |
""") | |
st.text('') | |
uploaded_file = st.file_uploader( | |
"Upload a csv or txt file", | |
type=["csv", "txt"], | |
help="Scanned documents are not supported yet!", | |
) | |
if not uploaded_file or not hf_key: | |
st.stop() | |
def convert_df(df): | |
# IMPORTANT: Cache the conversion to prevent computation on every rerun | |
return df.to_csv().encode("utf-8") | |
datas = [] | |
try: | |
if uploaded_file.name.lower().endswith(".csv"): | |
text = uploaded_file.read().decode("utf-8", errors="replace") | |
datas = text.split("\n") | |
with st.expander("Show Datas"): | |
st.text(datas) | |
elif uploaded_file.name.lower().endswith(".txt"): | |
text = uploaded_file.read().decode("utf-8", errors="replace") | |
datas = text.split("\n") | |
with st.expander("Show Datas"): | |
st.text(datas) | |
else: | |
raise NotImplementedError(f"File type {uploaded_file.name.split('.')[-1]} not supported") | |
except Exception as e: | |
st.error("Error reading file. Make sure the file is not corrupted or encrypted") | |
st.stop() | |
task_name: str = st.selectbox("Task", options=MODEL_TASK) | |
model_select = '' | |
if task_name == "Movie review analysis": model_select = MODEL_MOVIE | |
else: model_select = MODEL_HOTEL | |
model_name: str = st.selectbox("Model", options=MODELS) | |
selected_model = model_select[model_name] | |
if not hf_key: | |
st.info("Please add your HuggingFace Access Key to continue.") | |
st.stop() | |
access_token = hf_key | |
pipe = pipeline("text-classification", model=selected_model, token=access_token) | |
#from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
#tokenizer = AutoTokenizer.from_pretrained(selected_model) | |
#pipe = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=selected_model) | |
# Display the selected model using the formatted name | |
model_display_name = selected_model # Already formatted | |
st.write(f"Model being used: `{model_display_name}`") | |
results=[] | |
txt = '' | |
labels=[] | |
accuracies=[] | |
values=[] | |
if st.button("Submit for File Analysis"):#User Review Button | |
if not hf_key: | |
st.info("Please add your HuggingFace Access Key to continue.") | |
st.stop() | |
else: | |
label='' | |
for data in datas: | |
result = pipe(data)[0] | |
if result["label"] == "LABEL_0": label = "Negative" | |
else: label = "Positive" | |
results.append(data[:-1] + ", " + label + ", " + str(result["score"]*100) + "\n") | |
labels.append(label) | |
accuracies.append(str(result["score"]*100)) | |
values.append(data[:-1]) | |
txt += data[:-1] + ", " + label + ", " + str(result["score"]*100) + "\n" | |
st.text("All files evaluated. You'll download result file.") | |
if uploaded_file.name.lower().endswith(".txt"): | |
with st.expander("Show Results"): | |
st.write(results) | |
st.download_button('Download Result File', txt, uploaded_file.name.lower()[:-4] + "_results.txt") | |
elif uploaded_file.name.lower().endswith(".csv"): | |
dataframe = pd.DataFrame({ "text": values,"label": labels,"accuracy": accuracies}) | |
with st.expander("Show Results"): | |
st.write(dataframe) | |
csv = convert_df(dataframe) | |
st.download_button(label="Download as CSV",data=csv,file_name=uploaded_file.name.lower()[:-4] + "_results.csv",mime="text/csv") | |
else: | |
raise NotImplementedError(f"File type not supported") | |
# with open(result_file) as f: | |
# st.download_button('Download Txt file', f) | |