That1BrainCell commited on
Commit
b174994
·
verified ·
1 Parent(s): 3c9f97a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -9
app.py CHANGED
@@ -6,12 +6,21 @@ import numpy as np
6
  from io import StringIO
7
  import sys
8
  import time
 
9
 
10
  # File Imports
11
  from embedding import get_embeddings # Ensure this file/module is available
12
  from preprocess import filtering # Ensure this file/module is available
13
  from search import *
14
 
 
 
 
 
 
 
 
 
15
  # Cosine Similarity Function
16
  def cosine_similarity(vec1, vec2):
17
  vec1 = np.array(vec1)
@@ -42,16 +51,21 @@ class StreamCapture:
42
 
43
  # Main Function
44
  def score(main_product, main_url, product_count, link_count, search, logger, log_area):
 
 
 
45
  data = {}
46
  similar_products = extract_similar_products(main_product)[:product_count]
 
47
 
 
48
  if search == 'All':
49
 
50
  def process_product(product, search_function, main_product):
51
  search_result = search_function(product)
52
  return filtering(search_result, main_product, product, link_count)
53
-
54
-
55
  search_functions = {
56
  'google': search_google,
57
  'duckduckgo': search_duckduckgo,
@@ -91,16 +105,47 @@ def score(main_product, main_url, product_count, link_count, search, logger, log
91
  elif search == 'wikipedia':
92
  data[product] = filtering(search_wikipedia(product), main_product, product, link_count)
93
 
 
 
94
  logger.write("\n\nFiltered Links ------------------>\n")
95
  logger.write(str(data) + "\n")
96
  log_area.text(logger.getvalue())
97
 
 
 
 
98
  logger.write("\n\nCreating Main product Embeddings ---------->\n")
99
- main_result, main_embedding = get_embeddings(main_url,tag_option)
100
- log_area.text(logger.getvalue())
101
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  print("main",main_embedding)
103
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  cosine_sim_scores = []
105
 
106
  logger.write("\n\nCreating Similar product Embeddings ---------->\n")
@@ -116,9 +161,15 @@ def score(main_product, main_url, product_count, link_count, search, logger, log
116
  cosine_sim_scores.append((product,'No Product links Found Increase Number of Links or Change Search Source',None,None))
117
 
118
  else:
119
- for link in data[product][:link_count]:
 
 
 
 
 
 
 
120
 
121
- similar_result, similar_embedding = get_embeddings(link,tag_option)
122
  log_area.text(logger.getvalue())
123
 
124
  print(similar_embedding)
@@ -126,10 +177,24 @@ def score(main_product, main_url, product_count, link_count, search, logger, log
126
  score = cosine_similarity(main_embedding[i], similar_embedding[i])
127
  cosine_sim_scores.append((product, link, i, score))
128
  log_area.text(logger.getvalue())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  logger.write("--------------- DONE -----------------\n")
131
  log_area.text(logger.getvalue())
132
- return cosine_sim_scores, main_result
133
 
134
  # Streamlit Interface
135
  st.title("Check Infringement")
@@ -155,7 +220,7 @@ if st.button('Check for Infringement'):
155
 
156
  with st.spinner('Processing...'):
157
  with StreamCapture() as logger:
158
- cosine_sim_scores, main_result = score(main_product, main_url,product_count, link_count, search_method, logger, log_output)
159
 
160
  st.success('Processing complete!')
161
 
 
6
  from io import StringIO
7
  import sys
8
  import time
9
+ from pymongo import MongoClient
10
 
11
  # File Imports
12
  from embedding import get_embeddings # Ensure this file/module is available
13
  from preprocess import filtering # Ensure this file/module is available
14
  from search import *
15
 
16
+
17
+ # Mongo Connections
18
+ srv_connection_uri = "mongodb+srv://adityasm1410:[email protected]/?retryWrites=true&w=majority&appName=Patseer"
19
+
20
+ client = MongoClient(srv_connection_uri)
21
+ db = client['embeddings']
22
+ collection = db['data']
23
+
24
  # Cosine Similarity Function
25
  def cosine_similarity(vec1, vec2):
26
  vec1 = np.array(vec1)
 
51
 
52
  # Main Function
53
  def score(main_product, main_url, product_count, link_count, search, logger, log_area):
54
+
55
+ existing_products_urls = set(collection.distinct('url'))
56
+
57
  data = {}
58
  similar_products = extract_similar_products(main_product)[:product_count]
59
+
60
 
61
+ # Normal Filtering + Embedding -----------------------------------------------
62
  if search == 'All':
63
 
64
  def process_product(product, search_function, main_product):
65
  search_result = search_function(product)
66
  return filtering(search_result, main_product, product, link_count)
67
+
68
+
69
  search_functions = {
70
  'google': search_google,
71
  'duckduckgo': search_duckduckgo,
 
105
  elif search == 'wikipedia':
106
  data[product] = filtering(search_wikipedia(product), main_product, product, link_count)
107
 
108
+
109
+ # Filtered Link -----------------------------------------
110
  logger.write("\n\nFiltered Links ------------------>\n")
111
  logger.write(str(data) + "\n")
112
  log_area.text(logger.getvalue())
113
 
114
+
115
+
116
+ # Main product Embeddings ---------------------------------
117
  logger.write("\n\nCreating Main product Embeddings ---------->\n")
 
 
118
 
119
+ # Check main product in MongoDB
120
+ if main_url in existing_products_urls:
121
+ saved_data = collection.find_one({'url': main_url})
122
+
123
+ if tag_option not in saved_data:
124
+ main_result , main_embedding = get_embeddings(main_url,tag_option)
125
+ else:
126
+ main_embedding = saved_data[tag_option]
127
+ else:
128
+ main_result , main_embedding = get_embeddings(main_url,tag_option)
129
+
130
+ log_area.text(logger.getvalue())
131
  print("main",main_embedding)
132
+
133
+ update_doc = {
134
+ '$set': {
135
+ 'product_name': main_product,
136
+ 'url': main_url,
137
+ tag_option: main_embedding
138
+ }
139
+ }
140
+
141
+ collection.update_one(
142
+ {'url': main_url},
143
+ update_doc,
144
+ upsert=True
145
+ )
146
+
147
+
148
+ #Similar Products Check
149
  cosine_sim_scores = []
150
 
151
  logger.write("\n\nCreating Similar product Embeddings ---------->\n")
 
161
  cosine_sim_scores.append((product,'No Product links Found Increase Number of Links or Change Search Source',None,None))
162
 
163
  else:
164
+ for link,present in data[product][:link_count]:
165
+
166
+ saved_data = collection.find_one({'url': link})
167
+
168
+ if present and (tag_option in saved_data):
169
+ similar_embedding = saved_data[tag_option]
170
+ else:
171
+ similar_result, similar_embedding = get_embeddings(link,tag_option)
172
 
 
173
  log_area.text(logger.getvalue())
174
 
175
  print(similar_embedding)
 
177
  score = cosine_similarity(main_embedding[i], similar_embedding[i])
178
  cosine_sim_scores.append((product, link, i, score))
179
  log_area.text(logger.getvalue())
180
+
181
+ update_doc = {
182
+ '$set': {
183
+ 'product_name': product,
184
+ 'url': link,
185
+ tag_option: similar_embedding
186
+ }
187
+ }
188
+
189
+ collection.update_one(
190
+ {'url': link},
191
+ update_doc,
192
+ upsert=True
193
+ )
194
 
195
  logger.write("--------------- DONE -----------------\n")
196
  log_area.text(logger.getvalue())
197
+ return cosine_sim_scores
198
 
199
  # Streamlit Interface
200
  st.title("Check Infringement")
 
220
 
221
  with st.spinner('Processing...'):
222
  with StreamCapture() as logger:
223
+ cosine_sim_scores = score(main_product, main_url,product_count, link_count, search_method, logger, log_output)
224
 
225
  st.success('Processing complete!')
226