Learning Python: BeautifulSoup

domingo, 11 de agosto de 2024

from urllib.request import urlopen

from bs4 import BeautifulSoup

# The URL of the page to scrape

url = 'https://py4e-data.dr-chuck.net/comments_42.html'

# Fetch the HTML content from the URL

html = urlopen(url).read()

# Parse the HTML with BeautifulSoup

soup = BeautifulSoup(html, 'html.parser')

# Now you can prettify and print the parsed HTML

print(soup.prettify())

https://beautiful-soup-4.readthedocs.io/en/latest/