hanoch.rahimi@gmail commited on
Commit
a280e4d
·
1 Parent(s): eab6925

added openai summarization and visual design

Browse files
Files changed (4) hide show
  1. app.py +21 -11
  2. requirements.txt +1 -0
  3. semsearch.pyproj +1 -0
  4. utils.py +33 -0
app.py CHANGED
@@ -10,6 +10,7 @@ import streamlit as st
10
  from transformers import AutoTokenizer
11
  from sentence_transformers import SentenceTransformer
12
 
 
13
 
14
  PINECONE_KEY = st.secrets["PINECONE_API_KEY"] # app.pinecone.io
15
  OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"] # app.pinecone.io
@@ -48,23 +49,22 @@ retriever, tokenizer = init_models()
48
  def card(name, description, score, data_type, region, country):
49
  return st.markdown(f"""
50
  <div class="container-fluid">
51
- <div class="row align-items-start">
52
  <div class="col-md-8 col-sm-8">
53
- <b>{name}</b>
54
- <br>
55
- <span style="color: #808080;">
56
- <small>{description}</small>
57
- [<b>Score: </b>{score}]
58
  </span>
59
  </div>
60
  <div class="col-md-1 col-sm-1">
61
- <small>{data_type}</metadata>
62
  </div>
63
  <div class="col-md-1 col-sm-1">
64
- <small>{region}</metadata>
65
  </div>
66
  <div class="col-md-1 col-sm-1">
67
- <small>{country}</metadata>
 
68
  </div>
69
  </div>
70
  </div>
@@ -126,7 +126,13 @@ def run_query(query, prompt, scrape_boost, top_k , regions, countries):
126
  if 'type' in match['metadata'] and match['metadata']['type']=='description-webcontent':
127
  score = score * scrape_boost
128
  answer = {'score': score}
129
- answer["name"] = match["metadata"]['company_name'].strip('_description')
 
 
 
 
 
 
130
  answer["description"] = match["metadata"]['description'] if "description" in match['metadata'] else ""
131
  answer["metadata"] = match["metadata"]
132
  results.append(answer)
@@ -150,9 +156,13 @@ def run_query(query, prompt, scrape_boost, top_k , regions, countries):
150
 
151
  sorted_result = sorted(results, key=lambda x: x['score'], reverse=True)
152
 
 
 
 
 
153
  for r in sorted_result:
154
  company_name = r["name"]
155
- description = r["description"].replace(company_name, f"<mark>{company_name}</mark>")
156
  score = round(r["score"], 4)
157
  data_type = r["metadata"]["type"] if "type" in r["metadata"] else ""
158
  region = r["metadata"]["region"]
 
10
  from transformers import AutoTokenizer
11
  from sentence_transformers import SentenceTransformer
12
 
13
+ from utils import get_companies_data
14
 
15
  PINECONE_KEY = st.secrets["PINECONE_API_KEY"] # app.pinecone.io
16
  OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"] # app.pinecone.io
 
49
  def card(name, description, score, data_type, region, country):
50
  return st.markdown(f"""
51
  <div class="container-fluid">
52
+ <div class="row align-items-start" style="padding-bottom:10px;">
53
  <div class="col-md-8 col-sm-8">
54
+ <b>{name}.</b>
55
+ <span style="">
56
+ {description}
 
 
57
  </span>
58
  </div>
59
  <div class="col-md-1 col-sm-1">
60
+ <span>{region}</span>
61
  </div>
62
  <div class="col-md-1 col-sm-1">
63
+ <span>{country}</span>
64
  </div>
65
  <div class="col-md-1 col-sm-1">
66
+ <span>{data_type}</span>
67
+ <span>[Score: {score}</span>
68
  </div>
69
  </div>
70
  </div>
 
126
  if 'type' in match['metadata'] and match['metadata']['type']=='description-webcontent':
127
  score = score * scrape_boost
128
  answer = {'score': score}
129
+ if match['id'].endswith("_description"):
130
+ answer['id'] = match['id'][:-12]
131
+ elif match['id'].endswith("_webcontent"):
132
+ answer['id'] = match['id'][:-11]
133
+ else:
134
+ answer['id'] = match['id']
135
+ answer["name"] = match["metadata"]['company_name']
136
  answer["description"] = match["metadata"]['description'] if "description" in match['metadata'] else ""
137
  answer["metadata"] = match["metadata"]
138
  results.append(answer)
 
156
 
157
  sorted_result = sorted(results, key=lambda x: x['score'], reverse=True)
158
 
159
+
160
+ st.markdown("<h2>Related companies</h2>", unsafe_allow_html=True)
161
+ #df = get_companies_data([r['id'] for r in results])
162
+
163
  for r in sorted_result:
164
  company_name = r["name"]
165
+ description = r["description"] #.replace(company_name, f"<mark>{company_name}</mark>")
166
  score = round(r["score"], 4)
167
  data_type = r["metadata"]["type"] if "type" in r["metadata"] else ""
168
  region = r["metadata"]["region"]
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  langchain
2
  openai
3
  pinecone-client
 
4
  sentence_transformers
5
  transformers
 
1
  langchain
2
  openai
3
  pinecone-client
4
+ psycopg2-binary==2.8.6
5
  sentence_transformers
6
  transformers
semsearch.pyproj CHANGED
@@ -35,6 +35,7 @@
35
  </ItemGroup>
36
  <ItemGroup>
37
  <Compile Include="app.py" />
 
38
  </ItemGroup>
39
  <ItemGroup>
40
  <Folder Include=".streamlit" />
 
35
  </ItemGroup>
36
  <ItemGroup>
37
  <Compile Include="app.py" />
38
+ <Compile Include="utils.py" />
39
  </ItemGroup>
40
  <ItemGroup>
41
  <Folder Include=".streamlit" />
utils.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import psycopg2
3
+ from psycopg2 import extras
4
+ import streamlit as st
5
+
6
+ def create_connection():
7
+ host = st.secrets["RAIZED_DB_HOST"]
8
+ user = st.secrets["RAIZED_DB_USER"]
9
+ pswd = st.secrets["RAIZED_DB_PASSWORD"]
10
+ dbname = 'raized-central'
11
+ return psycopg2.connect(
12
+ database=dbname,
13
+ user=user,
14
+ password=pswd,
15
+ host=host,
16
+ port=5432
17
+ )
18
+
19
+ def get_companies_data(company_ids=[]):
20
+ with create_connection() as con:
21
+ cur = con.cursor(cursor_factory=extras.NamedTupleCursor)
22
+ cmd = '''
23
+ SELECT company_id, company_name, description_long, country_name, region, mapped_cat, website_url, next_funding_tag_inv
24
+ FROM central.v_companies_latest_mat_fe
25
+ WHERE company_id in %(company_ids)s
26
+ '''
27
+ params = {"company_ids": tuple(company_ids)}
28
+ cur.execute(cmd, params)
29
+ column_names = [desc[0] for desc in cur.description]
30
+ data = cur.fetchall()
31
+ # Create a DataFrame from the results and column names
32
+ df = pd.DataFrame(data, columns=column_names)
33
+ return df