Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gpt_index import GPTListIndex, SimpleWebPageReader, BeautifulSoupWebReader, GPTSimpleVectorIndex,LLMPredictor
|
2 |
+
from IPython.display import Markdown, display
|
3 |
+
from langchain.agents import load_tools, Tool, initialize_agent
|
4 |
+
from langchain.llms import OpenAI
|
5 |
+
from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
|
6 |
+
from langchain.agents import initialize_agent, Tool
|
7 |
+
from langchain import LLMChain
|
8 |
+
from langchain import PromptTemplate
|
9 |
+
import gradio as gr
|
10 |
+
import pandas as pd
|
11 |
+
import openai
|
12 |
+
from sklearn.manifold import TSNE
|
13 |
+
from sklearn.cluster import KMeans
|
14 |
+
from openai.embeddings_utils import get_embedding
|
15 |
+
|
16 |
+
import numpy as np
|
17 |
+
import matplotlib.pyplot as plt
|
18 |
+
import matplotlib
|
19 |
+
import datetime
|
20 |
+
from datetime import datetime, date, time, timedelta
|
21 |
+
|
22 |
+
with open('lastradartext.txt', 'r') as file:
|
23 |
+
data_old = file.read()
|
24 |
+
value1,value2,value3,value4,value5,value6=data_old.split('SEPERATOR')
|
25 |
+
|
26 |
+
def getstuff(openapikey):
|
27 |
+
mainlistofanswers=[]
|
28 |
+
for each in ['www.mckinsey.com','www.bcg.com','www.bain.com','www.accenture.com']:
|
29 |
+
print(each)
|
30 |
+
Input_URL = "https://"+each
|
31 |
+
documents = SimpleWebPageReader(html_to_text=True).load_data([Input_URL])
|
32 |
+
index = GPTSimpleVectorIndex(documents)
|
33 |
+
print('Came here 0')
|
34 |
+
#@title # Creating your Langchain Agent
|
35 |
+
def querying_db(query: str):
|
36 |
+
response = index.query(query)
|
37 |
+
return response
|
38 |
+
|
39 |
+
tools = [
|
40 |
+
Tool(
|
41 |
+
name = "QueryingDB",
|
42 |
+
func=querying_db,
|
43 |
+
description="This function takes a query string as input and returns the most relevant answer from the documentation as output"
|
44 |
+
)]
|
45 |
+
llm = OpenAI(temperature=0,openai_api_key=openapikey)
|
46 |
+
print('Came here 1')
|
47 |
+
query_string = "what are the top technologies mentioned?"
|
48 |
+
|
49 |
+
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
|
50 |
+
result = agent.run(query_string)
|
51 |
+
mainlistofanswers.append(result)
|
52 |
+
|
53 |
+
print('Came here 2')
|
54 |
+
newlistoftech=[]
|
55 |
+
newlistofcompanies=[]
|
56 |
+
for i in range(len(mainlistofanswers)):
|
57 |
+
each=mainlistofanswers[i]
|
58 |
+
each=each.replace("The top technologies mentioned are ","").replace("The technologies mentioned are ","")
|
59 |
+
each=each.replace(":","").replace(" and "," ").replace("and "," ").replace(" and"," ").replace(" the "," ").replace("the "," ").replace(" the"," ").strip()
|
60 |
+
for item in each.split(","):
|
61 |
+
newlistoftech.append(item.strip())
|
62 |
+
newlistofcompanies.append(i)
|
63 |
+
tech_df=pd.DataFrame()
|
64 |
+
tech_df['tech']=newlistoftech
|
65 |
+
tech_df['company']=newlistofcompanies
|
66 |
+
print('Came here 3')
|
67 |
+
embedding_model = "text-embedding-ada-002"
|
68 |
+
embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002
|
69 |
+
max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191
|
70 |
+
|
71 |
+
tech_df["embedding"] = tech_df['tech'].apply(lambda x: get_embedding(x, engine=embedding_model))
|
72 |
+
|
73 |
+
|
74 |
+
print('Came here 4')
|
75 |
+
dateforfilesave=datetime.today().strftime("%d-%m-%Y")
|
76 |
+
|
77 |
+
# Load the embeddings
|
78 |
+
|
79 |
+
# Convert to a list of lists of floats
|
80 |
+
matrix = np.array(tech_df['embedding'].to_list())
|
81 |
+
|
82 |
+
# Create a t-SNE model and transform the data
|
83 |
+
tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
|
84 |
+
vis_dims = tsne.fit_transform(matrix)
|
85 |
+
|
86 |
+
n_clusters = 5
|
87 |
+
|
88 |
+
kmeans = KMeans(n_clusters=n_clusters, init="k-means++", random_state=42)
|
89 |
+
kmeans.fit(matrix)
|
90 |
+
labels = kmeans.labels_
|
91 |
+
tech_df["Cluster"] = labels
|
92 |
+
print('Came here 5')
|
93 |
+
colors = ["red", "darkorange", "darkgrey", "blue", "darkgreen"]
|
94 |
+
x = [x for x,y in vis_dims]
|
95 |
+
y = [y for x,y in vis_dims]
|
96 |
+
color_indices = tech_df['Cluster'].values
|
97 |
+
|
98 |
+
colormap = matplotlib.colors.ListedColormap(colors)
|
99 |
+
#plt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3,)
|
100 |
+
fig, ax = plt.subplots(figsize=(12,8))
|
101 |
+
ax.scatter(x, y, c=color_indices, cmap=colormap, alpha=1, s=100)
|
102 |
+
|
103 |
+
for i, txt in enumerate(tech_df['tech'].tolist()):
|
104 |
+
ax.annotate(txt, (x[i], y[i]),fontsize=14)
|
105 |
+
|
106 |
+
plt.title("Top Technologies as of "+dateforfilesave,fontsize=20)
|
107 |
+
plt.axis('off')
|
108 |
+
plt.savefig('lasttechradar.png', bbox_inches='tight')
|
109 |
+
print('Came here 6')
|
110 |
+
response = openai.Completion.create(
|
111 |
+
engine="text-davinci-003",
|
112 |
+
prompt=f'I will give you top technologies list. Write a paragraph on it.\n\nTechnologies:'+",".join(tech_df['tech'].tolist()),
|
113 |
+
temperature=0,
|
114 |
+
max_tokens=1024,
|
115 |
+
top_p=1,
|
116 |
+
frequency_penalty=0,
|
117 |
+
presence_penalty=0,
|
118 |
+
)
|
119 |
+
print(response["choices"][0]["text"].replace("\n", ""))
|
120 |
+
desc_tmp=response["choices"][0]["text"].replace("\n", "")
|
121 |
+
print('Came here 7')
|
122 |
+
# Reading a review which belong to each group.
|
123 |
+
rev_per_cluster = 5
|
124 |
+
|
125 |
+
clusterstextlist=[]
|
126 |
+
for i in range(n_clusters):
|
127 |
+
print(f"Cluster {i} Theme:", end=" ")
|
128 |
+
|
129 |
+
reviews = "\n".join(tech_df[tech_df['Cluster'] == i]['tech'].tolist())
|
130 |
+
response = openai.Completion.create(
|
131 |
+
engine="text-davinci-003",
|
132 |
+
prompt=f'What do the following technologies have in common?\n\nCustomer reviews:\n"""\n{reviews}\n"""\n\nTheme:',
|
133 |
+
temperature=0,
|
134 |
+
max_tokens=64,
|
135 |
+
top_p=1,
|
136 |
+
frequency_penalty=0,
|
137 |
+
presence_penalty=0,
|
138 |
+
)
|
139 |
+
print(response["choices"][0]["text"].replace("\n", ""))
|
140 |
+
|
141 |
+
print(reviews)
|
142 |
+
clusterstextlist.append("Cluster "+str(i)+"\nTheme:"+response["choices"][0]["text"].replace("\n", "")+'\n'+reviews+'\n'+"-" * 10+'\n\n')
|
143 |
+
|
144 |
+
textlist=[mainlistofanswers[0],"SEPERATOR",mainlistofanswers[1],"SEPERATOR",mainlistofanswers[2],"SEPERATOR",mainlistofanswers[3],"SEPERATOR",desc_tmp,"SEPERATOR","".join(clusterstextlist)]
|
145 |
+
with open('lastradartext.txt', 'w') as f:
|
146 |
+
for line in textlist:
|
147 |
+
f.write(f"{line}\n")
|
148 |
+
print('Came here 8')
|
149 |
+
with open('lastradartext.txt', 'r') as file:
|
150 |
+
data_old = file.read()
|
151 |
+
value1,value2,value3,value4,value5,value6=data_old.split('SEPERATOR')
|
152 |
+
return 'lasttechradar.png',mainlistofanswers[0],mainlistofanswers[1],mainlistofanswers[2],mainlistofanswers[3],desc_tmp,"".join(clusterstextlist)
|
153 |
+
|
154 |
+
with gr.Blocks() as demo:
|
155 |
+
gr.Markdown("<h1><center>ChatGPT Technology Radar</center></h1>")
|
156 |
+
gr.Markdown(
|
157 |
+
"""What are the top technologies as of now? Let us query top consulting websites & use ChatGPT to understand. This demonstrates 'Chain of Thought' thinking using ChatGPT. It also shows how to get real time data and marry it with ChatGPT capabilities.\n LangChain & GPT-Index are both used."""
|
158 |
+
)
|
159 |
+
|
160 |
+
with gr.Row() as row:
|
161 |
+
textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key')
|
162 |
+
btn = gr.Button("Refresh")
|
163 |
+
with gr.Row() as row:
|
164 |
+
with gr.Column():
|
165 |
+
output_image = gr.components.Image(label="Tech Radar",value='lasttechradar.png')
|
166 |
+
with gr.Column():
|
167 |
+
outputMck = gr.Textbox(placeholder=value1, lines=1,label='McKinsey View')
|
168 |
+
outputBcg = gr.Textbox(placeholder=value2, lines=1,label='BCG View')
|
169 |
+
outputBain = gr.Textbox(placeholder=value3, lines=1,label='Bain View')
|
170 |
+
outputAcc = gr.Textbox(placeholder=value4, lines=1,label='Accenture View')
|
171 |
+
with gr.Row() as row:
|
172 |
+
with gr.Column():
|
173 |
+
outputdesc = gr.Textbox(placeholder=value5, lines=1,label='Description')
|
174 |
+
with gr.Column():
|
175 |
+
outputclusters = gr.Textbox(placeholder=value6, lines=1,label='Clusters')
|
176 |
+
|
177 |
+
|
178 |
+
btn.click(getstuff, inputs=[textboxopenapi],outputs=[output_image,outputMck,outputBcg,outputBain,outputAcc,outputdesc,outputclusters])
|
179 |
+
|
180 |
+
|
181 |
+
demo.launch(debug=True)
|