Spaces:
Sleeping
Sleeping
File size: 1,538 Bytes
6ac6ea7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
nltk.download('punkt')
nltk.download('stopwords')
def get_text_from_url(url):
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text()
return text
except:
return ""
def extract_keywords(text):
text = text.lower()
tokens = word_tokenize(text)
words = [word for word in tokens if word.isalnum()]
stop_words = set(stopwords.words('english'))
keywords = [word for word in words if word not in stop_words]
return set(keywords)
def compare_keywords(url_a, url_b):
text_a = get_text_from_url(url_a)
text_b = get_text_from_url(url_b)
if not text_a or not text_b:
return "❌ Failed to fetch one or both websites. Please check URLs."
keywords_a = extract_keywords(text_a)
keywords_b = extract_keywords(text_b)
diff_keywords = keywords_b - keywords_a
sorted_keywords = sorted(diff_keywords)
return "\n".join(sorted_keywords)
interface = gr.Interface(
fn=compare_keywords,
inputs=[
gr.Textbox(label="Your Website (A)"),
gr.Textbox(label="Competitor Website (B)")
],
outputs="text",
title="🔍 Competitor Keyword Finder",
description="Enter your website and a competitor's. This tool finds keywords the competitor uses but you don’t."
)
interface.launch()
|