DrishtiSharma commited on
Commit
cfbd391
·
verified ·
1 Parent(s): 8ae5bcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -1
app.py CHANGED
@@ -1,3 +1,130 @@
 
 
 
 
1
  from langchain.agents.agent_types import AgentType
2
  from langchain_experimental.agents.agent_toolkits import create_csv_agent
3
- from langchain_openai import ChatOpenAI, OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from datasets import load_dataset
4
+ import time
5
  from langchain.agents.agent_types import AgentType
6
  from langchain_experimental.agents.agent_toolkits import create_csv_agent
7
+ from langchain_openai import ChatOpenAI
8
+ import ast
9
+
10
+ # Streamlit App Title and Description
11
+ st.title("Patent Data Analysis with LangChain")
12
+ st.write("""This app allows you to analyze patent-related datasets interactively using LangChain agents. You can upload datasets, load from Hugging Face, or use a repository directory dataset.""")
13
+
14
+ # Dataset loading without caching to support progress bar
15
+ def load_huggingface_dataset(dataset_name):
16
+ # Initialize progress bar
17
+ progress_bar = st.progress(0)
18
+ try:
19
+ # Incrementally update progress
20
+ progress_bar.progress(10)
21
+ dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True)
22
+ progress_bar.progress(50)
23
+ if hasattr(dataset, "to_pandas"):
24
+ df = dataset.to_pandas()
25
+ else:
26
+ df = pd.DataFrame(dataset)
27
+ progress_bar.progress(100) # Final update to 100%
28
+ return df
29
+ except Exception as e:
30
+ progress_bar.progress(0) # Reset progress bar on failure
31
+ raise e
32
+
33
+ def load_uploaded_csv(uploaded_file):
34
+ # Initialize progress bar
35
+ progress_bar = st.progress(0)
36
+ try:
37
+ # Simulate progress
38
+ progress_bar.progress(10)
39
+ time.sleep(1) # Simulate file processing delay
40
+ progress_bar.progress(50)
41
+ df = pd.read_csv(uploaded_file)
42
+ progress_bar.progress(100) # Final update
43
+ return df
44
+ except Exception as e:
45
+ progress_bar.progress(0) # Reset progress bar on failure
46
+ raise e
47
+
48
+ # Dataset selection logic
49
+ def load_dataset_into_session():
50
+ input_option = st.radio(
51
+ "Select Dataset Input:",
52
+ ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
53
+ )
54
+
55
+ # Option 1: Load dataset from the repo directory
56
+ if input_option == "Use Repo Directory Dataset":
57
+ file_path = "./source/test.csv"
58
+ if st.button("Load Dataset"):
59
+ try:
60
+ with st.spinner("Loading dataset from the repo directory..."):
61
+ st.session_state.df = pd.read_csv(file_path)
62
+ st.success(f"File loaded successfully from '{file_path}'!")
63
+ except Exception as e:
64
+ st.error(f"Error loading dataset from the repo directory: {e}")
65
+
66
+ # Option 2: Load dataset from Hugging Face
67
+ elif input_option == "Use Hugging Face Dataset":
68
+ dataset_name = st.text_input(
69
+ "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
70
+ )
71
+ if st.button("Load Dataset"):
72
+ try:
73
+ st.session_state.df = load_huggingface_dataset(dataset_name)
74
+ st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
75
+ except Exception as e:
76
+ st.error(f"Error loading Hugging Face dataset: {e}")
77
+
78
+ # Option 3: Upload CSV File
79
+ elif input_option == "Upload CSV File":
80
+ uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
81
+ if uploaded_file:
82
+ try:
83
+ st.session_state.df = load_uploaded_csv(uploaded_file)
84
+ st.success("File uploaded successfully!")
85
+ except Exception as e:
86
+ st.error(f"Error reading uploaded file: {e}")
87
+
88
+ # Load dataset into session
89
+ load_dataset_into_session()
90
+
91
+ if "df" in st.session_state:
92
+ df = st.session_state.df
93
+
94
+ # Display dataset metadata
95
+ st.write("### Dataset Metadata")
96
+ st.text(f"Number of Rows: {df.shape[0]}")
97
+ st.text(f"Number of Columns: {df.shape[1]}")
98
+ st.text(f"Column Names: {', '.join(df.columns)}")
99
+
100
+ # Display dataset preview
101
+ st.write("### Dataset Preview")
102
+ num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
103
+ st.dataframe(df.head(num_rows))
104
+
105
+ # Define LangChain CSV Agent
106
+ st.header("Run Queries on Patent Data")
107
+
108
+ with st.spinner("Setting up LangChain CSV Agent..."):
109
+ df.to_csv("patent_data.csv", index=False)
110
+
111
+ csv_agent = create_csv_agent(
112
+ ChatOpenAI(temperature=0, model="gpt-4", api_key=st.secrets["api_key"]),
113
+ path=["patent_data.csv"],
114
+ verbose=True,
115
+ agent_type=AgentType.OPENAI_FUNCTIONS,
116
+ allow_dangerous_code=True
117
+ )
118
+
119
+ # Query Input and Execution
120
+ query = st.text_area("Enter your natural language query:", "How many patents are related to AI?")
121
+
122
+ if st.button("Run Query"):
123
+ with st.spinner("Running query..."):
124
+ try:
125
+ result = csv_agent.invoke(query)
126
+ st.success("Query executed successfully!")
127
+ st.write("### Query Result:")
128
+ st.write(result)
129
+ except Exception as e:
130
+ st.error(f"Error executing query: {e}")