Spaces:

hra
/

ChatGPT-Tech-Radar

Runtime error

App Files Files Community

hra commited on Feb 23, 2023

Commit

3f1766c

1 Parent(s): b3275aa

Create app.py

Browse files

Files changed (1) hide show

app.py +181 -0

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from gpt_index import GPTListIndex, SimpleWebPageReader, BeautifulSoupWebReader, GPTSimpleVectorIndex,LLMPredictor
+from IPython.display import Markdown, display
+from langchain.agents import load_tools, Tool, initialize_agent
+from langchain.llms import OpenAI
+from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
+from langchain.agents import initialize_agent, Tool
+from langchain import LLMChain
+from langchain import PromptTemplate
+import gradio as gr
+import pandas as pd
+import openai
+from sklearn.manifold import TSNE
+from sklearn.cluster import KMeans
+from openai.embeddings_utils import get_embedding
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+import datetime
+from datetime import datetime, date, time, timedelta
+with open('lastradartext.txt', 'r') as file:
+    data_old = file.read()
+value1,value2,value3,value4,value5,value6=data_old.split('SEPERATOR')
+def getstuff(openapikey):
+  mainlistofanswers=[]
+  for each in ['www.mckinsey.com','www.bcg.com','www.bain.com','www.accenture.com']:
+    print(each)
+    Input_URL = "https://"+each
+    documents = SimpleWebPageReader(html_to_text=True).load_data([Input_URL])
+    index = GPTSimpleVectorIndex(documents)
+    print('Came here 0')
+    #@title # Creating your Langchain Agent
+    def querying_db(query: str):
+      response = index.query(query)
+      return response
+    tools = [
+        Tool(
+            name = "QueryingDB",
+            func=querying_db,
+            description="This function takes a query string as input and returns the most relevant answer from the documentation as output"
+        )]
+    llm = OpenAI(temperature=0,openai_api_key=openapikey)
+    print('Came here 1')
+    query_string = "what are the top technologies mentioned?"
+    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
+    result = agent.run(query_string)
+    mainlistofanswers.append(result)
+  print('Came here 2')
+  newlistoftech=[]
+  newlistofcompanies=[]
+  for i in range(len(mainlistofanswers)):
+    each=mainlistofanswers[i]
+    each=each.replace("The top technologies mentioned are ","").replace("The technologies mentioned are ","")
+    each=each.replace(":","").replace(" and "," ").replace("and "," ").replace(" and"," ").replace(" the "," ").replace("the "," ").replace(" the"," ").strip()
+    for item in each.split(","):
+      newlistoftech.append(item.strip())
+      newlistofcompanies.append(i)
+  tech_df=pd.DataFrame()
+  tech_df['tech']=newlistoftech
+  tech_df['company']=newlistofcompanies
+  print('Came here 3')
+  embedding_model = "text-embedding-ada-002"
+  embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
+  max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191
+  tech_df["embedding"] = tech_df['tech'].apply(lambda x: get_embedding(x, engine=embedding_model))
+  print('Came here 4')
+  dateforfilesave=datetime.today().strftime("%d-%m-%Y")
+  # Load the embeddings
+  # Convert to a list of lists of floats
+  matrix = np.array(tech_df['embedding'].to_list())
+  # Create a t-SNE model and transform the data
+  tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
+  vis_dims = tsne.fit_transform(matrix)
+  n_clusters = 5
+  kmeans = KMeans(n_clusters=n_clusters, init="k-means++", random_state=42)
+  kmeans.fit(matrix)
+  labels = kmeans.labels_
+  tech_df["Cluster"] = labels
+  print('Came here 5')
+  colors = ["red", "darkorange", "darkgrey", "blue", "darkgreen"]
+  x = [x for x,y in vis_dims]
+  y = [y for x,y in vis_dims]
+  color_indices = tech_df['Cluster'].values
+  colormap = matplotlib.colors.ListedColormap(colors)
+  #plt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3,)
+  fig, ax = plt.subplots(figsize=(12,8))
+  ax.scatter(x, y, c=color_indices, cmap=colormap, alpha=1, s=100)
+  for i, txt in enumerate(tech_df['tech'].tolist()):
+      ax.annotate(txt, (x[i], y[i]),fontsize=14)
+  plt.title("Top Technologies as of "+dateforfilesave,fontsize=20)
+  plt.axis('off')
+  plt.savefig('lasttechradar.png', bbox_inches='tight')
+  print('Came here 6')
+  response = openai.Completion.create(
+        engine="text-davinci-003",
+        prompt=f'I will give you top technologies list. Write a paragraph on it.\n\nTechnologies:'+",".join(tech_df['tech'].tolist()),
+        temperature=0,
+        max_tokens=1024,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+    )
+  print(response["choices"][0]["text"].replace("\n", ""))
+  desc_tmp=response["choices"][0]["text"].replace("\n", "")
+  print('Came here 7')
+  # Reading a review which belong to each group.
+  rev_per_cluster = 5
+  clusterstextlist=[]
+  for i in range(n_clusters):
+      print(f"Cluster {i} Theme:", end=" ")
+      reviews = "\n".join(tech_df[tech_df['Cluster'] == i]['tech'].tolist())
+      response = openai.Completion.create(
+          engine="text-davinci-003",
+          prompt=f'What do the following technologies have in common?\n\nCustomer reviews:\n"""\n{reviews}\n"""\n\nTheme:',
+          temperature=0,
+          max_tokens=64,
+          top_p=1,
+          frequency_penalty=0,
+          presence_penalty=0,
+      )
+      print(response["choices"][0]["text"].replace("\n", ""))
+      print(reviews)
+      clusterstextlist.append("Cluster "+str(i)+"\nTheme:"+response["choices"][0]["text"].replace("\n", "")+'\n'+reviews+'\n'+"-" * 10+'\n\n')
+  textlist=[mainlistofanswers[0],"SEPERATOR",mainlistofanswers[1],"SEPERATOR",mainlistofanswers[2],"SEPERATOR",mainlistofanswers[3],"SEPERATOR",desc_tmp,"SEPERATOR","".join(clusterstextlist)]
+  with open('lastradartext.txt', 'w') as f:
+    for line in textlist:
+        f.write(f"{line}\n")
+  print('Came here 8')
+  with open('lastradartext.txt', 'r') as file:
+    data_old = file.read()
+  value1,value2,value3,value4,value5,value6=data_old.split('SEPERATOR')
+  return 'lasttechradar.png',mainlistofanswers[0],mainlistofanswers[1],mainlistofanswers[2],mainlistofanswers[3],desc_tmp,"".join(clusterstextlist)
+with gr.Blocks() as demo:
+    gr.Markdown("<h1><center>ChatGPT Technology Radar</center></h1>")
+    gr.Markdown(
+        """What are the top technologies as of now? Let us query top consulting websites & use ChatGPT to understand. This demonstrates 'Chain of Thought' thinking using ChatGPT. It also shows how to get real time data and marry it with ChatGPT capabilities.\n LangChain & GPT-Index are both used."""
+        )
+    with gr.Row() as row:
+      textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key')
+      btn = gr.Button("Refresh")
+    with gr.Row() as row:
+      with gr.Column():
+        output_image = gr.components.Image(label="Tech Radar",value='lasttechradar.png')
+      with gr.Column():
+        outputMck = gr.Textbox(placeholder=value1, lines=1,label='McKinsey View')
+        outputBcg = gr.Textbox(placeholder=value2, lines=1,label='BCG View')
+        outputBain = gr.Textbox(placeholder=value3, lines=1,label='Bain View')
+        outputAcc = gr.Textbox(placeholder=value4, lines=1,label='Accenture View')
+    with gr.Row() as row:
+      with gr.Column():
+        outputdesc = gr.Textbox(placeholder=value5, lines=1,label='Description')
+      with gr.Column():
+        outputclusters = gr.Textbox(placeholder=value6, lines=1,label='Clusters')
+    btn.click(getstuff, inputs=[textboxopenapi],outputs=[output_image,outputMck,outputBcg,outputBain,outputAcc,outputdesc,outputclusters])
+demo.launch(debug=True)