DrishtiSharma commited on
Commit
4ef520b
·
verified ·
1 Parent(s): f57eb57

Delete interim/incorrect.py

Browse files
Files changed (1) hide show
  1. interim/incorrect.py +0 -149
interim/incorrect.py DELETED
@@ -1,149 +0,0 @@
1
- import os
2
- import streamlit as st
3
- import pandas as pd
4
- from datasets import load_dataset
5
- import time
6
- from langchain.agents.agent_types import AgentType
7
- from langchain_experimental.agents.agent_toolkits import create_csv_agent
8
- from langchain_openai import ChatOpenAI
9
- import ast
10
-
11
- # Streamlit App Title and Description
12
- st.title("Patent Data Analysis with LangChain")
13
- st.write("""This app allows you to analyze patent-related datasets interactively using LangChain agents. You can upload datasets, load from Hugging Face, or use a repository directory dataset.""")
14
-
15
- # Dataset loading without caching to support progress bar
16
- def load_huggingface_dataset(dataset_name):
17
- # Initialize progress bar
18
- progress_bar = st.progress(0)
19
- try:
20
- # Incrementally update progress
21
- progress_bar.progress(10)
22
- dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
23
- progress_bar.progress(50)
24
- if hasattr(dataset, "to_pandas"):
25
- df = dataset.to_pandas()
26
- else:
27
- df = pd.DataFrame(dataset)
28
- progress_bar.progress(100) # Final update to 100%
29
- return df
30
- except Exception as e:
31
- progress_bar.progress(0) # Reset progress bar on failure
32
- raise e
33
-
34
- def load_uploaded_csv(uploaded_file):
35
- # Initialize progress bar
36
- progress_bar = st.progress(0)
37
- try:
38
- # Simulate progress
39
- progress_bar.progress(10)
40
- time.sleep(1) # Simulate file processing delay
41
- progress_bar.progress(50)
42
- df = pd.read_csv(uploaded_file)
43
- progress_bar.progress(100) # Final update
44
- return df
45
- except Exception as e:
46
- progress_bar.progress(0) # Reset progress bar on failure
47
- raise e
48
-
49
- # Dataset selection logic
50
- def load_dataset_into_session():
51
- input_option = st.radio(
52
- "Select Dataset Input:",
53
- ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
54
- )
55
-
56
- # Option 1: Load dataset from the repo directory
57
- if input_option == "Use Repo Directory Dataset":
58
- file_path = "./source/test.csv"
59
- if st.button("Load Dataset"):
60
- try:
61
- with st.spinner("Loading dataset from the repo directory..."):
62
- st.session_state.df = pd.read_csv(file_path)
63
- st.success(f"File loaded successfully from '{file_path}'!")
64
- except Exception as e:
65
- st.error(f"Error loading dataset from the repo directory: {e}")
66
-
67
- # Option 2: Load dataset from Hugging Face
68
- elif input_option == "Use Hugging Face Dataset":
69
- dataset_name = st.text_input(
70
- "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
71
- )
72
- if st.button("Load Dataset"):
73
- try:
74
- st.session_state.df = load_huggingface_dataset(dataset_name)
75
- st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
76
- except Exception as e:
77
- st.error(f"Error loading Hugging Face dataset: {e}")
78
-
79
- # Option 3: Upload CSV File
80
- elif input_option == "Upload CSV File":
81
- uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
82
- if uploaded_file:
83
- try:
84
- st.session_state.df = load_uploaded_csv(uploaded_file)
85
- st.success("File uploaded successfully!")
86
- except Exception as e:
87
- st.error(f"Error reading uploaded file: {e}")
88
-
89
- # Load dataset into session
90
- load_dataset_into_session()
91
-
92
- if "df" in st.session_state:
93
- df = st.session_state.df
94
-
95
- # Display dataset metadata
96
- st.write("### Dataset Metadata")
97
- st.text(f"Number of Rows: {df.shape[0]}")
98
- st.text(f"Number of Columns: {df.shape[1]}")
99
- st.text(f"Column Names: {', '.join(df.columns)}")
100
-
101
- # Display dataset preview
102
- st.write("### Dataset Preview")
103
- num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
104
- st.dataframe(df.head(num_rows))
105
-
106
- # Define LangChain CSV Agent
107
- st.header("Run Queries on Patent Data")
108
-
109
- with st.spinner("Setting up LangChain CSV Agent..."):
110
- df.to_csv("patent_data.csv", index=False)
111
-
112
- csv_agent = create_csv_agent(
113
- ChatOpenAI(temperature=0, model="gpt-4", api_key=os.getenv("OPENAI_API_KEY")),
114
- path=["patent_data.csv"],
115
- verbose=True,
116
- agent_type=AgentType.OPENAI_FUNCTIONS,
117
- allow_dangerous_code=True
118
- )
119
-
120
- # Query Input and Execution
121
- query = st.text_area("Enter your natural language query:", "How many patents are related to AI?")
122
-
123
- if st.button("Run Query"):
124
- with st.spinner("Running query..."):
125
- try:
126
- # Split query execution into smaller chunks if needed
127
- max_rows = 1000
128
- total_rows = len(df)
129
- results = []
130
-
131
- for start in range(0, total_rows, max_rows):
132
- chunk = df.iloc[start:start + max_rows]
133
- chunk.to_csv("chunk_data.csv", index=False)
134
- partial_agent = create_csv_agent(
135
- ChatOpenAI(temperature=0, model="gpt-4", api_key=os.getenv("OPENAI_API_KEY")),
136
- path=["chunk_data.csv"],
137
- verbose=True,
138
- agent_type=AgentType.OPENAI_FUNCTIONS,
139
- allow_dangerous_code=True
140
- )
141
- result = partial_agent.invoke(query)
142
- results.append(result)
143
-
144
- st.success("Query executed successfully!")
145
- st.write("### Query Result:")
146
- st.write("\n".join(results))
147
-
148
- except Exception as e:
149
- st.error(f"Error executing query: {e}")