hra commited on
Commit
3f1766c
·
1 Parent(s): b3275aa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gpt_index import GPTListIndex, SimpleWebPageReader, BeautifulSoupWebReader, GPTSimpleVectorIndex,LLMPredictor
2
+ from IPython.display import Markdown, display
3
+ from langchain.agents import load_tools, Tool, initialize_agent
4
+ from langchain.llms import OpenAI
5
+ from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
6
+ from langchain.agents import initialize_agent, Tool
7
+ from langchain import LLMChain
8
+ from langchain import PromptTemplate
9
+ import gradio as gr
10
+ import pandas as pd
11
+ import openai
12
+ from sklearn.manifold import TSNE
13
+ from sklearn.cluster import KMeans
14
+ from openai.embeddings_utils import get_embedding
15
+
16
+ import numpy as np
17
+ import matplotlib.pyplot as plt
18
+ import matplotlib
19
+ import datetime
20
+ from datetime import datetime, date, time, timedelta
21
+
22
+ with open('lastradartext.txt', 'r') as file:
23
+ data_old = file.read()
24
+ value1,value2,value3,value4,value5,value6=data_old.split('SEPERATOR')
25
+
26
+ def getstuff(openapikey):
27
+ mainlistofanswers=[]
28
+ for each in ['www.mckinsey.com','www.bcg.com','www.bain.com','www.accenture.com']:
29
+ print(each)
30
+ Input_URL = "https://"+each
31
+ documents = SimpleWebPageReader(html_to_text=True).load_data([Input_URL])
32
+ index = GPTSimpleVectorIndex(documents)
33
+ print('Came here 0')
34
+ #@title # Creating your Langchain Agent
35
+ def querying_db(query: str):
36
+ response = index.query(query)
37
+ return response
38
+
39
+ tools = [
40
+ Tool(
41
+ name = "QueryingDB",
42
+ func=querying_db,
43
+ description="This function takes a query string as input and returns the most relevant answer from the documentation as output"
44
+ )]
45
+ llm = OpenAI(temperature=0,openai_api_key=openapikey)
46
+ print('Came here 1')
47
+ query_string = "what are the top technologies mentioned?"
48
+
49
+ agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
50
+ result = agent.run(query_string)
51
+ mainlistofanswers.append(result)
52
+
53
+ print('Came here 2')
54
+ newlistoftech=[]
55
+ newlistofcompanies=[]
56
+ for i in range(len(mainlistofanswers)):
57
+ each=mainlistofanswers[i]
58
+ each=each.replace("The top technologies mentioned are ","").replace("The technologies mentioned are ","")
59
+ each=each.replace(":","").replace(" and "," ").replace("and "," ").replace(" and"," ").replace(" the "," ").replace("the "," ").replace(" the"," ").strip()
60
+ for item in each.split(","):
61
+ newlistoftech.append(item.strip())
62
+ newlistofcompanies.append(i)
63
+ tech_df=pd.DataFrame()
64
+ tech_df['tech']=newlistoftech
65
+ tech_df['company']=newlistofcompanies
66
+ print('Came here 3')
67
+ embedding_model = "text-embedding-ada-002"
68
+ embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002
69
+ max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191
70
+
71
+ tech_df["embedding"] = tech_df['tech'].apply(lambda x: get_embedding(x, engine=embedding_model))
72
+
73
+
74
+ print('Came here 4')
75
+ dateforfilesave=datetime.today().strftime("%d-%m-%Y")
76
+
77
+ # Load the embeddings
78
+
79
+ # Convert to a list of lists of floats
80
+ matrix = np.array(tech_df['embedding'].to_list())
81
+
82
+ # Create a t-SNE model and transform the data
83
+ tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
84
+ vis_dims = tsne.fit_transform(matrix)
85
+
86
+ n_clusters = 5
87
+
88
+ kmeans = KMeans(n_clusters=n_clusters, init="k-means++", random_state=42)
89
+ kmeans.fit(matrix)
90
+ labels = kmeans.labels_
91
+ tech_df["Cluster"] = labels
92
+ print('Came here 5')
93
+ colors = ["red", "darkorange", "darkgrey", "blue", "darkgreen"]
94
+ x = [x for x,y in vis_dims]
95
+ y = [y for x,y in vis_dims]
96
+ color_indices = tech_df['Cluster'].values
97
+
98
+ colormap = matplotlib.colors.ListedColormap(colors)
99
+ #plt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3,)
100
+ fig, ax = plt.subplots(figsize=(12,8))
101
+ ax.scatter(x, y, c=color_indices, cmap=colormap, alpha=1, s=100)
102
+
103
+ for i, txt in enumerate(tech_df['tech'].tolist()):
104
+ ax.annotate(txt, (x[i], y[i]),fontsize=14)
105
+
106
+ plt.title("Top Technologies as of "+dateforfilesave,fontsize=20)
107
+ plt.axis('off')
108
+ plt.savefig('lasttechradar.png', bbox_inches='tight')
109
+ print('Came here 6')
110
+ response = openai.Completion.create(
111
+ engine="text-davinci-003",
112
+ prompt=f'I will give you top technologies list. Write a paragraph on it.\n\nTechnologies:'+",".join(tech_df['tech'].tolist()),
113
+ temperature=0,
114
+ max_tokens=1024,
115
+ top_p=1,
116
+ frequency_penalty=0,
117
+ presence_penalty=0,
118
+ )
119
+ print(response["choices"][0]["text"].replace("\n", ""))
120
+ desc_tmp=response["choices"][0]["text"].replace("\n", "")
121
+ print('Came here 7')
122
+ # Reading a review which belong to each group.
123
+ rev_per_cluster = 5
124
+
125
+ clusterstextlist=[]
126
+ for i in range(n_clusters):
127
+ print(f"Cluster {i} Theme:", end=" ")
128
+
129
+ reviews = "\n".join(tech_df[tech_df['Cluster'] == i]['tech'].tolist())
130
+ response = openai.Completion.create(
131
+ engine="text-davinci-003",
132
+ prompt=f'What do the following technologies have in common?\n\nCustomer reviews:\n"""\n{reviews}\n"""\n\nTheme:',
133
+ temperature=0,
134
+ max_tokens=64,
135
+ top_p=1,
136
+ frequency_penalty=0,
137
+ presence_penalty=0,
138
+ )
139
+ print(response["choices"][0]["text"].replace("\n", ""))
140
+
141
+ print(reviews)
142
+ clusterstextlist.append("Cluster "+str(i)+"\nTheme:"+response["choices"][0]["text"].replace("\n", "")+'\n'+reviews+'\n'+"-" * 10+'\n\n')
143
+
144
+ textlist=[mainlistofanswers[0],"SEPERATOR",mainlistofanswers[1],"SEPERATOR",mainlistofanswers[2],"SEPERATOR",mainlistofanswers[3],"SEPERATOR",desc_tmp,"SEPERATOR","".join(clusterstextlist)]
145
+ with open('lastradartext.txt', 'w') as f:
146
+ for line in textlist:
147
+ f.write(f"{line}\n")
148
+ print('Came here 8')
149
+ with open('lastradartext.txt', 'r') as file:
150
+ data_old = file.read()
151
+ value1,value2,value3,value4,value5,value6=data_old.split('SEPERATOR')
152
+ return 'lasttechradar.png',mainlistofanswers[0],mainlistofanswers[1],mainlistofanswers[2],mainlistofanswers[3],desc_tmp,"".join(clusterstextlist)
153
+
154
+ with gr.Blocks() as demo:
155
+ gr.Markdown("<h1><center>ChatGPT Technology Radar</center></h1>")
156
+ gr.Markdown(
157
+ """What are the top technologies as of now? Let us query top consulting websites & use ChatGPT to understand. This demonstrates 'Chain of Thought' thinking using ChatGPT. It also shows how to get real time data and marry it with ChatGPT capabilities.\n LangChain & GPT-Index are both used."""
158
+ )
159
+
160
+ with gr.Row() as row:
161
+ textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key')
162
+ btn = gr.Button("Refresh")
163
+ with gr.Row() as row:
164
+ with gr.Column():
165
+ output_image = gr.components.Image(label="Tech Radar",value='lasttechradar.png')
166
+ with gr.Column():
167
+ outputMck = gr.Textbox(placeholder=value1, lines=1,label='McKinsey View')
168
+ outputBcg = gr.Textbox(placeholder=value2, lines=1,label='BCG View')
169
+ outputBain = gr.Textbox(placeholder=value3, lines=1,label='Bain View')
170
+ outputAcc = gr.Textbox(placeholder=value4, lines=1,label='Accenture View')
171
+ with gr.Row() as row:
172
+ with gr.Column():
173
+ outputdesc = gr.Textbox(placeholder=value5, lines=1,label='Description')
174
+ with gr.Column():
175
+ outputclusters = gr.Textbox(placeholder=value6, lines=1,label='Clusters')
176
+
177
+
178
+ btn.click(getstuff, inputs=[textboxopenapi],outputs=[output_image,outputMck,outputBcg,outputBain,outputAcc,outputdesc,outputclusters])
179
+
180
+
181
+ demo.launch(debug=True)