AMKhakbaz commited on
Commit
9a77817
·
verified ·
1 Parent(s): acf1ebc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -4
app.py CHANGED
@@ -539,6 +539,61 @@ def upload_and_select_dataframe():
539
  st.sidebar.info("Please upload some files.")
540
  return None
541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
543
 
544
  with main_col:
@@ -550,8 +605,6 @@ with main_col:
550
  with col2:
551
  st.title("Chortke")
552
 
553
- import streamlit as st
554
-
555
  st.markdown('[Click to register a suggestion or comment](https://docs.google.com/forms/d/e/1FAIpQLScLyP7bBbqMfGdspjL7Ij64UZ6v2KjqjKNbm8gwEsgWsFs_Qg/viewform?usp=header)')
556
 
557
  df = upload_and_select_dataframe()
@@ -888,8 +941,21 @@ try:
888
  result = analyze_z_test(uploaded_file)
889
  if result:
890
  st.success("Z-Test analysis completed successfully.")
891
-
892
- elif main_option in ["Machine Learning", "Coding"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
893
  st.info("This section of the program is under development.")
894
 
895
  except Exception as e:
 
539
  st.sidebar.info("Please upload some files.")
540
  return None
541
 
542
+ def categorize_responses(df, api_key, prompt=None):
543
+ """
544
+ Categorize responses in a DataFrame using OpenAI's GPT-based API.
545
+
546
+ Args:
547
+ df (pd.DataFrame): Input DataFrame with columns 'id' and 'response'.
548
+ api_key (str): OpenAI API key for accessing GPT-based models.
549
+ prompt (str, optional): A question or explanation of the survey question for better categorization.
550
+
551
+ Returns:
552
+ pd.DataFrame: DataFrame with 'id' and 'category' columns.
553
+ """
554
+ # Set OpenAI API key
555
+ openai.api_key = api_key
556
+
557
+ # Combine the question prompt with instructions for categorization
558
+ base_prompt = "Categorize the following responses into conceptual categories."
559
+ if prompt:
560
+ base_prompt = f"{prompt}\n\n{base_prompt}"
561
+
562
+ # Initialize a list to store results
563
+ categories = []
564
+
565
+ # Iterate through the responses in the DataFrame
566
+ for response in df['response']:
567
+ # Construct the prompt for the specific response
568
+ full_prompt = f"{base_prompt}\n\nResponse: {response}\n\nCategory:"
569
+
570
+ try:
571
+ # Use the OpenAI API to get the category
572
+ completion = openai.Completion.create(
573
+ engine="text-davinci-003", # Use a GPT-3.5 engine or similar
574
+ prompt=full_prompt,
575
+ max_tokens=50, # Limit the token count for category output
576
+ temperature=0.7
577
+ )
578
+
579
+ # Extract the category from the API response
580
+ category = completion.choices[0].text.strip()
581
+ except Exception as e:
582
+ # Handle exceptions (e.g., API issues) and assign a default value
583
+ print(f"Error processing response: {response}\n{e}")
584
+ category = "Unknown"
585
+
586
+ # Append the category to the results list
587
+ categories.append(category)
588
+
589
+ # Create a new DataFrame with IDs and categories
590
+ categorized_df = pd.DataFrame({
591
+ 'id': df['id'],
592
+ 'category': categories
593
+ })
594
+
595
+ return categorized_df
596
+
597
  empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
598
 
599
  with main_col:
 
605
  with col2:
606
  st.title("Chortke")
607
 
 
 
608
  st.markdown('[Click to register a suggestion or comment](https://docs.google.com/forms/d/e/1FAIpQLScLyP7bBbqMfGdspjL7Ij64UZ6v2KjqjKNbm8gwEsgWsFs_Qg/viewform?usp=header)')
609
 
610
  df = upload_and_select_dataframe()
 
941
  result = analyze_z_test(uploaded_file)
942
  if result:
943
  st.success("Z-Test analysis completed successfully.")
944
+
945
+ elif main_option == "Coding":
946
+ selected_list = st.sidebar.multiselect(
947
+ 'Select the desired "Open Question" column.',
948
+ cols,
949
+ default=[]
950
+ )
951
+ api_key = "sk-1a0127fbc52a4e50a93bd5cd18af3a85"
952
+ prompt_user = st.text_input("Write a brief description of the selected column question.")
953
+ if st.button("Submit"):
954
+ df2 = categorize_responses(df[selected_list], api_key, prompt=prompt_user)
955
+ st.subheader("Categorized data")
956
+ st.dataframe(df2)
957
+
958
+ elif main_option == "Machine Learning":
959
  st.info("This section of the program is under development.")
960
 
961
  except Exception as e: