import requests
from bs4 import BeautifulSoup
def crawl(url, max_depth=2):
if max_depth < 0:
return
try:
response = requests.get(url)
content = response.content
soup = BeautifulSoup(content, ‘html.parser’)
links = set()
for link in soup.find_all(‘a’):
href = link.get(‘href’)
if href and href.startswith(‘http’):
links.add(href)
print(f”Found {len(links)} links at {url}”)
for link in links:
crawl(link, max_depth – 1)
except requests.RequestException as e:
print(f”Error during requests to {url} : {str(e)}”)
# 使用例
start_url = “https://b.hatena.ne.jp/” # スタートするURL
crawl(start_url, max_depth=2) # 深さ2でクロール