Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,9 @@ from sklearn.cluster import KMeans
|
|
11 |
from sklearn.decomposition import PCA
|
12 |
import json
|
13 |
import math
|
|
|
|
|
|
|
14 |
|
15 |
def sorting(df):
|
16 |
df.index = list(map(float, df.index))
|
@@ -539,6 +542,19 @@ def sample_size_calculator(confidence_level, p, E):
|
|
539 |
|
540 |
return n
|
541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
def upload_and_select_dataframe():
|
543 |
st.sidebar.title("File Upload")
|
544 |
uploaded_files = st.sidebar.file_uploader("Choose CSV or Excel files", type=["csv", "xlsx", "xls", "xlsb"], accept_multiple_files=True)
|
@@ -581,7 +597,6 @@ df = upload_and_select_dataframe()
|
|
581 |
|
582 |
try:
|
583 |
try:
|
584 |
-
d = df.head()
|
585 |
st.subheader("Data preview")
|
586 |
st.dataframe(df.head())
|
587 |
|
@@ -925,27 +940,19 @@ try:
|
|
925 |
st.success("Z-Test analysis completed successfully.")
|
926 |
|
927 |
elif main_option == "Coding":
|
928 |
-
"""
|
929 |
selected_list = st.sidebar.multiselect(
|
930 |
'Select the desired "Open Question" column.',
|
931 |
cols,
|
932 |
default=[]
|
933 |
)
|
934 |
df["id"] = df.index
|
935 |
-
api_key = "sk-e2a7f307b1ad4c649f9fa1f6ebd582e8"
|
936 |
prompt_user = st.text_input("Write a brief description of the selected column question.")
|
937 |
if st.button("Submit"):
|
938 |
|
939 |
-
df2 =
|
940 |
-
dataframe = df[["id"]+selected_list],
|
941 |
-
id_column = "id",
|
942 |
-
text_column = selected_list[0],
|
943 |
-
api_key = api_key)
|
944 |
|
945 |
st.subheader("Categorized data")
|
946 |
st.dataframe(df2)
|
947 |
-
"""
|
948 |
-
st.info("This section of the program is under development.")
|
949 |
|
950 |
elif main_option == "Machine Learning":
|
951 |
st.info("This section of the program is under development.")
|
|
|
11 |
from sklearn.decomposition import PCA
|
12 |
import json
|
13 |
import math
|
14 |
+
from transformers import pipeline
|
15 |
+
|
16 |
+
classifier = pipeline('zero-shot-classification', model='MoritzLaurer/deberta-v3-large-zeroshot-v1.1-all-33')
|
17 |
|
18 |
def sorting(df):
|
19 |
df.index = list(map(float, df.index))
|
|
|
542 |
|
543 |
return n
|
544 |
|
545 |
+
import pandas as pd
|
546 |
+
|
547 |
+
def categorize_sentences(prompt, df, Text_name):
|
548 |
+
texts = df[Text_name].tolist()
|
549 |
+
|
550 |
+
labels = []
|
551 |
+
for text in texts:
|
552 |
+
result = classifier(text, candidate_labels=[prompt])
|
553 |
+
labels.append(result['labels'][0])
|
554 |
+
|
555 |
+
df['labels'] = labels
|
556 |
+
return df
|
557 |
+
|
558 |
def upload_and_select_dataframe():
|
559 |
st.sidebar.title("File Upload")
|
560 |
uploaded_files = st.sidebar.file_uploader("Choose CSV or Excel files", type=["csv", "xlsx", "xls", "xlsb"], accept_multiple_files=True)
|
|
|
597 |
|
598 |
try:
|
599 |
try:
|
|
|
600 |
st.subheader("Data preview")
|
601 |
st.dataframe(df.head())
|
602 |
|
|
|
940 |
st.success("Z-Test analysis completed successfully.")
|
941 |
|
942 |
elif main_option == "Coding":
|
|
|
943 |
selected_list = st.sidebar.multiselect(
|
944 |
'Select the desired "Open Question" column.',
|
945 |
cols,
|
946 |
default=[]
|
947 |
)
|
948 |
df["id"] = df.index
|
|
|
949 |
prompt_user = st.text_input("Write a brief description of the selected column question.")
|
950 |
if st.button("Submit"):
|
951 |
|
952 |
+
df2 = categorize_sentences(prompt_user, df, selected_list)
|
|
|
|
|
|
|
|
|
953 |
|
954 |
st.subheader("Categorized data")
|
955 |
st.dataframe(df2)
|
|
|
|
|
956 |
|
957 |
elif main_option == "Machine Learning":
|
958 |
st.info("This section of the program is under development.")
|