Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

App Files Files Community

awacke1 commited on Dec 20, 2024

Commit

b997fd9

verified ·

1 Parent(s): 71c8ee5

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -0

app.py CHANGED Viewed

@@ -59,7 +59,62 @@ FILE_EMOJIS = {
     "mp3": "🎵",
 }
 def get_high_info_terms(text: str) -> list:
     stop_words = set([
         'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
         'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',

     "mp3": "🎵",
 }
 def get_high_info_terms(text: str) -> list:
+    # Expanded stop words
+    stop_words = set([
+        'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
+        'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
+        'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
+        'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
+        'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
+        'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
+        'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there', 'as', 'if', 'while'
+    ])
+    # Key phrases tailored to your interests
+    key_phrases = [
+        'artificial intelligence', 'machine learning', 'deep learning', 'neural networks',
+        'natural language processing', 'healthcare systems', 'clinical medicine',
+        'genomics', 'biological systems', 'cognitive science', 'data visualization',
+        'wellness technology', 'robotics', 'medical imaging', 'semantic understanding',
+        'transformers', 'large language models', 'empirical studies', 'scientific research',
+        'quantum mechanics', 'biomedical engineering', 'computational biology'
+    ]
+    # Preserve key phrases and remove them from the text
+    preserved_phrases = []
+    lower_text = text.lower()
+    for phrase in key_phrases:
+        if phrase in lower_text:
+            preserved_phrases.append(phrase)
+            text = text.replace(phrase, '')
+            break  # Stop after the first matching key phrase
+    # Extract words and filter high-info terms
+    words = re.findall(r'\b\w+(?:-\w+)*\b', text)
+    high_info_words = [
+        word.lower() for word in words
+        if len(word) > 3
+        and word.lower() not in stop_words
+        and not word.isdigit()
+        and any(c.isalpha() for c in word)
+    ]
+    # Combine preserved phrases and filtered words, ensuring uniqueness
+    unique_terms = []
+    seen = set()
+    for term in preserved_phrases + high_info_words:
+        if term not in seen:
+            seen.add(term)
+            unique_terms.append(term)
+    # Return only the top 5 terms
+    return unique_terms[:5]
+def get_high_info_terms_old(text: str) -> list:
     stop_words = set([
         'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
         'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',