Spaces:

Dunevhhhh
/

Test2

Build error

App Files Files Community

Dunevhhhh commited on Feb 19

Commit

f256f51

verified ·

1 Parent(s): 69174a5

Create filter_news.py

Browse files

Files changed (1) hide show

filter_news.py +47 -0

filter_news.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# filter_news.py (erweitert)
+import re
+from config import CATEGORY_FILTERS
+import logging
+logger = logging.getLogger(__name__)
+class NewsFilter:
+    def __init__(self):
+        self.keyword_filters = CATEGORY_FILTERS
+        self.blacklist = self.load_wordlist("blacklist.txt")
+        self.whitelist = self.load_wordlist("whitelist.txt")
+    def load_wordlist(self, filename):
+        try:
+            with open(f"config/{filename}") as f:
+                return [line.strip().lower() for line in f if line.strip()]
+        except FileNotFoundError:
+            return []
+    def filter_articles(self, articles):
+        filtered = []
+        for article in articles:
+            if self.is_blacklisted(article) and not self.is_whitelisted(article):
+                continue
+            filtered.append(article)
+        return self.prioritize_articles(filtered)
+    def is_blacklisted(self, article):
+        text = f"{article['title']} {article['description']}".lower()
+        return any(re.search(rf'\b{word}\b', text) for word in self.blacklist)
+    def is_whitelisted(self, article):
+        text = f"{article['title']} {article['description']}".lower()
+        return any(re.search(rf'\b{word}\b', text) for word in self.whitelist)
+    def prioritize_articles(self, articles):
+        # Priorisierung nach Kategorie und Keywords
+        def sort_key(article):
+            score = 0
+            category_keywords = self.keyword_filters.get(article["category"], [])
+            text = f"{article['title']} {article['description']}".lower()
+            for keyword in category_keywords:
+                if re.search(rf'\b{keyword.lower()}\b', text):
+                    score += 1
+            return -score  # Höhere Priorität zuerst
+        return sorted(articles, key=sort_key)