DrishtiSharma commited on
Commit
981ebc4
·
verified ·
1 Parent(s): 6a4cef3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -11
app.py CHANGED
@@ -32,10 +32,27 @@ def initialize_llm(model_choice):
32
  model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
33
  llm = initialize_llm(model_choice)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def load_dataset_into_session():
36
  input_option = st.radio(
37
  "Select Dataset Input:",
38
- ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=2, horizontal=True
39
  )
40
 
41
  # Option 1: Load dataset from the repo directory
@@ -43,7 +60,7 @@ def load_dataset_into_session():
43
  file_path = "./source/test.csv"
44
  if st.button("Load Dataset"):
45
  try:
46
- st.session_state.df = pd.read_csv(file_path)
47
  st.success(f"File loaded successfully from '{file_path}'!")
48
  except Exception as e:
49
  st.error(f"Error loading dataset from the repo directory: {e}")
@@ -55,11 +72,7 @@ def load_dataset_into_session():
55
  )
56
  if st.button("Load Dataset"):
57
  try:
58
- dataset = load_dataset(dataset_name, name="all", split="train", trust_remote_code=True, uniform_split=True)
59
- if hasattr(dataset, "to_pandas"):
60
- st.session_state.df = dataset.to_pandas()
61
- else:
62
- st.session_state.df = pd.DataFrame(dataset)
63
  st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
64
  except Exception as e:
65
  st.error(f"Error loading Hugging Face dataset: {e}")
@@ -69,7 +82,7 @@ def load_dataset_into_session():
69
  uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
70
  if uploaded_file:
71
  try:
72
- st.session_state.df = pd.read_csv(uploaded_file)
73
  st.success("File uploaded successfully!")
74
  except Exception as e:
75
  st.error(f"Error reading uploaded file: {e}")
@@ -79,12 +92,22 @@ load_dataset_into_session()
79
 
80
  if "df" in st.session_state and llm:
81
  df = st.session_state.df
 
 
 
 
 
 
 
 
82
  st.write("### Dataset Preview")
83
- st.dataframe(df.head(10))
 
84
 
85
  # Create SmartDataFrame
86
  chat_df = SmartDataframe(df, config={"llm": llm})
87
 
 
88
  st.write("### Chat with Your Patent Data")
89
  user_query = st.text_input("Enter your question about the patent data (e.g., 'Predict if the patent will be accepted.'):")
90
 
@@ -95,6 +118,7 @@ if "df" in st.session_state and llm:
95
  except Exception as e:
96
  st.error(f"Error: {e}")
97
 
 
98
  st.write("### Generate and View Graphs")
99
  plot_query = st.text_input("Enter a query to generate a graph (e.g., 'Plot the number of patents by filing year.'):")
100
 
@@ -112,7 +136,16 @@ if "df" in st.session_state and llm:
112
  except Exception as e:
113
  st.error(f"Error: {e}")
114
 
115
- # Instructions
 
 
 
 
 
 
 
 
 
116
  with st.sidebar:
117
  st.header("Instructions:")
118
  st.markdown(
@@ -123,10 +156,10 @@ with st.sidebar:
123
  " - Example: 'What is the primary classification of this patent?'\n"
124
  " - Example: 'Summarize the abstract of this patent.'\n"
125
  "4. Enter a query to generate and view graphs based on patent attributes.\n"
 
126
  )
127
  st.markdown("---")
128
  st.header("References:")
129
  st.markdown(
130
  "1. [Chat With Your CSV File With PandasAI - Prince Krampah](https://medium.com/aimonks/chat-with-your-csv-file-with-pandasai-22232a13c7b7)"
131
  )
132
-
 
32
  model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
33
  llm = initialize_llm(model_choice)
34
 
35
+ # Cache dataset loading
36
+ @st.cache_data
37
+ def load_repo_dataset(file_path):
38
+ return pd.read_csv(file_path)
39
+
40
+ @st.cache_data
41
+ def load_huggingface_dataset(dataset_name):
42
+ dataset = load_dataset(dataset_name, name="all", split="train", trust_remote_code=True, uniform_split=True)
43
+ if hasattr(dataset, "to_pandas"):
44
+ return dataset.to_pandas()
45
+ return pd.DataFrame(dataset)
46
+
47
+ @st.cache_data
48
+ def load_uploaded_csv(uploaded_file):
49
+ return pd.read_csv(uploaded_file)
50
+
51
+ # Dataset selection logic
52
  def load_dataset_into_session():
53
  input_option = st.radio(
54
  "Select Dataset Input:",
55
+ ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
56
  )
57
 
58
  # Option 1: Load dataset from the repo directory
 
60
  file_path = "./source/test.csv"
61
  if st.button("Load Dataset"):
62
  try:
63
+ st.session_state.df = load_repo_dataset(file_path)
64
  st.success(f"File loaded successfully from '{file_path}'!")
65
  except Exception as e:
66
  st.error(f"Error loading dataset from the repo directory: {e}")
 
72
  )
73
  if st.button("Load Dataset"):
74
  try:
75
+ st.session_state.df = load_huggingface_dataset(dataset_name)
 
 
 
 
76
  st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
77
  except Exception as e:
78
  st.error(f"Error loading Hugging Face dataset: {e}")
 
82
  uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
83
  if uploaded_file:
84
  try:
85
+ st.session_state.df = load_uploaded_csv(uploaded_file)
86
  st.success("File uploaded successfully!")
87
  except Exception as e:
88
  st.error(f"Error reading uploaded file: {e}")
 
92
 
93
  if "df" in st.session_state and llm:
94
  df = st.session_state.df
95
+
96
+ # Display dataset metadata
97
+ st.write("### Dataset Metadata")
98
+ st.text(f"Number of Rows: {df.shape[0]}")
99
+ st.text(f"Number of Columns: {df.shape[1]}")
100
+ st.text(f"Column Names: {', '.join(df.columns)}")
101
+
102
+ # Display dataset preview
103
  st.write("### Dataset Preview")
104
+ num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
105
+ st.dataframe(df.head(num_rows))
106
 
107
  # Create SmartDataFrame
108
  chat_df = SmartDataframe(df, config={"llm": llm})
109
 
110
+ # Chat functionality
111
  st.write("### Chat with Your Patent Data")
112
  user_query = st.text_input("Enter your question about the patent data (e.g., 'Predict if the patent will be accepted.'):")
113
 
 
118
  except Exception as e:
119
  st.error(f"Error: {e}")
120
 
121
+ # Plot generation functionality
122
  st.write("### Generate and View Graphs")
123
  plot_query = st.text_input("Enter a query to generate a graph (e.g., 'Plot the number of patents by filing year.'):")
124
 
 
136
  except Exception as e:
137
  st.error(f"Error: {e}")
138
 
139
+ # Download processed dataset
140
+ st.write("### Download Processed Dataset")
141
+ st.download_button(
142
+ label="Download Dataset as CSV",
143
+ data=df.to_csv(index=False),
144
+ file_name="processed_dataset.csv",
145
+ mime="text/csv"
146
+ )
147
+
148
+ # Sidebar instructions
149
  with st.sidebar:
150
  st.header("Instructions:")
151
  st.markdown(
 
156
  " - Example: 'What is the primary classification of this patent?'\n"
157
  " - Example: 'Summarize the abstract of this patent.'\n"
158
  "4. Enter a query to generate and view graphs based on patent attributes.\n"
159
+ "5. Download the processed dataset as a CSV file."
160
  )
161
  st.markdown("---")
162
  st.header("References:")
163
  st.markdown(
164
  "1. [Chat With Your CSV File With PandasAI - Prince Krampah](https://medium.com/aimonks/chat-with-your-csv-file-with-pandasai-22232a13c7b7)"
165
  )