To download all FDA guidance documents listed on the given website, you can modify the previous Python script to handle pagination and extract the links to the individual guidance documents. Here's the updated code:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def download_pdf(url, filename):
response = requests. get(url)
with open(filename, ‘wb') as file:
file.write(response.content)
get_all_pages(url):
pages = []
while url:
print(f'Processing page: {url}')
response = requests.get(url)
soup = BeautifulSoupCresponse.text, ‘html.parser')
pages. append(soup)
next_page = soup.find('a', {'title': 'Go to next page"})
url = next_page['href'] if next_page else None
return pages
def main():
base_url = 'https://www.fda.gov'
starting_url = f' {base_url}/regulatory-information/search-fda-guidance-
all_pages = get_all_pages(starting_url)
output_directory = ‘guidance_documents:
os.makedirs(output_directory, exist_ok=True)
for soup in all_pages:
for row in soup. find_all¢'tr')[1:]:
columns = row. find_all('td')
title = columns[0].text.stripQ
pdf_url = urljoin(base_url, columns[1].find('a')['href'])
filename = os.path.join(output_directory, title + '.pdf")
printf" Downloading {filename}')
download_pdf(pdf_url, filename)
printC'ALL FDA guidance documents downloaded. ')
if _name__ == '__main__':
main()