Spaces:
Running
Running
File size: 5,531 Bytes
3a7387c 37fdec4 9383dc3 7bafad1 cb518f2 9383dc3 3a7387c cb518f2 37fdec4 9a3bd4a 37fdec4 5d47c6a 37fdec4 9383dc3 37fdec4 9383dc3 37fdec4 5d47c6a 37fdec4 9383dc3 9a3bd4a 5d47c6a 37fdec4 5d47c6a 9383dc3 5d47c6a 72c3c36 5d47c6a 9a3bd4a 37fdec4 5d47c6a 3a7387c 9383dc3 37fdec4 9383dc3 5d47c6a 9383dc3 5d47c6a 9383dc3 37fdec4 5d47c6a 9383dc3 37fdec4 9a3bd4a 5d47c6a 37fdec4 5d47c6a 37fdec4 5d47c6a 9a3bd4a 5d47c6a ce02056 37fdec4 1e338bc 5d47c6a 37fdec4 9383dc3 37fdec4 1e338bc 3a7387c be9be7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
import threading
from flask import Flask, render_template, request, jsonify, Response
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging
import json
app = Flask(__name__)
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global flag to track background loading
loading_complete = False
def load_feeds_in_background():
global loading_complete
try:
logger.info("Starting background RSS feed fetch")
articles = fetch_rss_feeds()
logger.info(f"Fetched {len(articles)} articles")
process_and_store_articles(articles)
logger.info("Background feed processing complete")
loading_complete = True
# Notify frontend of new data (simulated via SSE for simplicity)
def event_stream():
yield f"data: {json.dumps({'status': 'updated'})}\n\n"
app.response_class = Response
return Response(event_stream(), mimetype="text/event-stream")
except Exception as e:
logger.error(f"Error in background feed loading: {e}")
loading_complete = True
@app.route('/')
def index():
global loading_complete
loading_complete = False # Reset on each load
# Start background feed loading
threading.Thread(target=load_feeds_in_background, daemon=True).start()
try:
# Retrieve all articles from Chroma DB
all_docs = vector_db.get(include=['documents', 'metadatas'])
if not all_docs.get('metadatas'):
logger.info("No articles in DB yet")
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
# Process and categorize articles, getting 10 most recent per category
enriched_articles = []
seen_keys = set()
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
if not meta:
continue
title = meta.get("title", "No Title")
link = meta.get("link", "")
key = f"{title}|{link}"
if key not in seen_keys:
seen_keys.add(key)
enriched_articles.append({
"title": title,
"link": link,
"description": meta.get("original_description", "No Description"),
"category": meta.get("category", "Uncategorized"),
"published": meta.get("published", "Unknown Date"),
"image": meta.get("image", "svg"),
})
# Sort by published date (assuming ISO format or comparable string)
enriched_articles.sort(key=lambda x: x["published"], reverse=True)
# Group by category and limit to 10 most recent per category
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
if cat not in categorized_articles:
categorized_articles[cat] = []
categorized_articles[cat].append(article)
# Limit to 10 most recent per category
for cat in categorized_articles:
categorized_articles[cat] = sorted(categorized_articles[cat], key=lambda x: x["published"], reverse=True)[:10]
logger.info(f"Displaying articles: {sum(len(articles) for articles in categorized_articles.values())} total")
return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
except Exception as e:
logger.error(f"Error retrieving articles: {e}")
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
@app.route('/search', methods=['POST'])
def search():
query = request.form.get('search')
if not query:
return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
try:
logger.info(f"Searching for: {query}")
results = vector_db.similarity_search(query, k=10)
enriched_articles = []
seen_keys = set()
for doc in results:
meta = doc.metadata
title = meta.get("title", "No Title")
link = meta.get("link", "")
key = f"{title}|{link}"
if key not in seen_keys:
seen_keys.add(key)
enriched_articles.append({
"title": title,
"link": link,
"description": meta.get("original_description", "No Description"),
"category": meta.get("category", "Uncategorized"),
"published": meta.get("published", "Unknown Date"),
"image": meta.get("image", "svg"),
})
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
categorized_articles.setdefault(cat, []).append(article)
return render_template("index.html", categorized_articles=categorized_articles, has_articles=bool(enriched_articles), loading=False)
except Exception as e:
logger.error(f"Search error: {e}")
return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
@app.route('/check_loading')
def check_loading():
global loading_complete
if loading_complete:
return jsonify({"status": "complete"})
return jsonify({"status": "loading"}), 202
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |