broadfield-dev commited on
Commit
9a3bd4a
·
verified ·
1 Parent(s): 105a52f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -26
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import subprocess
3
- from flask import Flask, render_template, request, Response, jsonify
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
6
  import time
@@ -24,28 +24,37 @@ def load_feeds_in_background():
24
 
25
  @app.route('/')
26
  def index():
27
- # Show existing articles immediately, even if empty
28
- stored_docs = vector_db.similarity_search("news", k=1000) # Try to retrieve all available articles
29
- logger.info(f"Found {len(stored_docs)} documents in vector DB")
30
- # Use a set to ensure unique articles by title, link, and description hash
31
- unique_articles = {}
32
- for doc in stored_docs:
33
- title = doc.metadata["title"]
34
- link = doc.metadata["link"]
35
- description = doc.metadata["original_description"]
36
- desc_hash = hashlib.md5(description.encode()).hexdigest()
37
- key = f"{title}|{link}|{desc_hash}"
38
- if key not in unique_articles:
39
- unique_articles[key] = {
40
- "title": title,
41
- "link": link,
42
- "description": description,
43
- "category": doc.metadata["category"],
44
- "published": doc.metadata["published"],
45
- "image": doc.metadata.get("image", "svg"),
46
- }
47
- enriched_articles = list(unique_articles.values())
48
- logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
 
 
 
 
 
 
 
 
 
49
 
50
  # Start loading new feeds in the background
51
  subprocess.Popen(["python", "rss_processor.py", "load_feeds"])
@@ -57,14 +66,49 @@ def index():
57
  categorized_articles[cat] = []
58
  categorized_articles[cat].append(article)
59
 
60
- return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  @app.route('/check_feeds', methods=['GET'])
63
  def check_feeds():
64
  try:
65
  # Check if vector DB has new or updated documents
66
- docs = vector_db.similarity_search("news", k=1)
67
- if docs:
68
  logger.info("Feeds loaded successfully in vector DB")
69
  return jsonify({"status": "loaded"})
70
  return jsonify({"status": "loading"}), 202
 
1
  import os
2
  import subprocess
3
+ from flask import Flask, render_template, request
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
6
  import time
 
24
 
25
  @app.route('/')
26
  def index():
27
+ # Show all existing articles immediately, even if empty
28
+ try:
29
+ # Get all documents from Chroma DB
30
+ all_docs = vector_db.get(include=['documents', 'metadatas'])
31
+ stored_docs = [
32
+ Document(page_content=doc['documents'], metadata=doc['metadatas'])
33
+ for doc in all_docs['documents']
34
+ ]
35
+ logger.info(f"Found {len(stored_docs)} documents in vector DB")
36
+ # Use a set to ensure unique articles by title, link, and full description hash
37
+ unique_articles = {}
38
+ for doc in stored_docs:
39
+ title = doc.metadata["title"]
40
+ link = doc.metadata["link"]
41
+ description = doc.metadata["original_description"]
42
+ desc_hash = hashlib.md5(description.encode()).hexdigest()
43
+ key = f"{title}|{link}|{desc_hash}"
44
+ if key not in unique_articles:
45
+ unique_articles[key] = {
46
+ "title": title,
47
+ "link": link,
48
+ "description": description,
49
+ "category": doc.metadata["category"],
50
+ "published": doc.metadata["published"],
51
+ "image": doc.metadata.get("image", "svg"),
52
+ }
53
+ enriched_articles = list(unique_articles.values())
54
+ logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
55
+ except Exception as e:
56
+ logger.error(f"Error retrieving documents from vector DB: {e}")
57
+ enriched_articles = [] # Fallback if DB is empty or inaccessible
58
 
59
  # Start loading new feeds in the background
60
  subprocess.Popen(["python", "rss_processor.py", "load_feeds"])
 
66
  categorized_articles[cat] = []
67
  categorized_articles[cat].append(article)
68
 
69
+ return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True, has_articles=bool(enriched_articles))
70
+
71
+ @app.route('/search', methods=['POST'])
72
+ def search():
73
+ query = request.form.get('search')
74
+ if query:
75
+ logger.info(f"Processing search query: {query}")
76
+ results = vector_db.similarity_search(query, k=10)
77
+ unique_search_articles = {}
78
+ for doc in results:
79
+ title = doc.metadata["title"]
80
+ link = doc.metadata["link"]
81
+ description = doc.metadata["original_description"]
82
+ desc_hash = hashlib.md5(description.encode()).hexdigest()
83
+ key = f"{title}|{link}|{desc_hash}"
84
+ if key not in unique_search_articles:
85
+ unique_search_articles[key] = {
86
+ "title": title,
87
+ "link": link,
88
+ "description": description,
89
+ "category": doc.metadata["category"],
90
+ "published": doc.metadata["published"],
91
+ "image": doc.metadata.get("image", "svg"),
92
+ }
93
+ enriched_articles = list(unique_search_articles.values())
94
+ logger.info(f"Search returned {len(enriched_articles)} unique results")
95
+
96
+ categorized_articles = {}
97
+ for article in enriched_articles:
98
+ cat = article["category"]
99
+ if cat not in categorized_articles:
100
+ categorized_articles[cat] = []
101
+ categorized_articles[cat].append(article)
102
+
103
+ return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True, has_articles=bool(enriched_articles))
104
+ return render_template("index.html", categorized_articles={}, loading_new_feeds=True, has_articles=False)
105
 
106
  @app.route('/check_feeds', methods=['GET'])
107
  def check_feeds():
108
  try:
109
  # Check if vector DB has new or updated documents
110
+ all_docs = vector_db.get(include=['documents', 'metadatas'])
111
+ if all_docs['documents']:
112
  logger.info("Feeds loaded successfully in vector DB")
113
  return jsonify({"status": "loaded"})
114
  return jsonify({"status": "loading"}), 202