File size: 2,194 Bytes
d6afb45 7ee1b98 d6afb45 f586a70 56e3a34 d6afb45 b878468 d6afb45 df0b1b7 7313962 f586a70 4551e44 9fe62de f586a70 df0618d 07cfa54 f586a70 3381f0b 9fe62de 3381f0b c69bac0 817d95e 7313962 c69bac0 43954cf 9fe62de 4551e44 8eb0cc4 7313962 21a312e f586a70 289044f 4551e44 6005136 d6afb45 56e3a34 3381f0b f586a70 56e3a34 26f9624 792d4ad 4551e44 f586a70 4551e44 f586a70 176890c d6afb45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import urllib.request
import requests
import bs4
import lxml
def find_all(url,q=None,num=None):
rawp = []
source = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(source,'lxml')
# title of the page
print(soup.title)
# get attributes:
print(soup.title.name)
# get values:
print(soup.title.string)
# beginning navigation:
print(soup.title.parent.name)
rawp.append([tag.name for tag in soup.find_all()] )
print([tag.name for tag in soup.find_all()])
return rawp
def find_it(url,q=None,num=None):
out = []
out_l = []
z=""
source = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(source,'lxml')
for p in soup.find_all(f'{q}'):
if num != "":
z=p.get(f'{num}')
try:
test = soup.select(f'{p.name}:first-child')
#print(p.findChildren())
except Exception as e:
print (e)
#out.append(p)
out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
out_l.append(p.string)
#out.append(p.parent.name)
print(dir(p))
print(p.parent.name)
for url in soup.find_all('a'):
print(url.get('href'))
#print(soup.get_text())
return out,out_l
def find_it2(url):
response = requests.get(url,a1=None,q2=None,q3=None)
try:
response.raise_for_status()
soup = BeautifulSoup(response.content, 'lxml')
out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')])
return out
except Exception as e:
print (e)
return e
with gr.Blocks() as app:
with gr.Row():
inp = gr.Textbox()
q = gr.Textbox(value="p")
num = gr.Textbox()
with gr.Row():
all_btn = gr.Button("Load")
find_btn = gr.Button("Find")
with gr.Row():
rawp = gr.JSON()
outp = gr.JSON()
outl = gr.Textbox()
all_btn.click(find_all,[inp,q,num],[rawp])
find_btn.click(find_it,[inp,q,num],[outp,outl])
app.launch()
|