import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os
SAVE_FOLDER_PATH = '~/Downloads/mingpao_images/'
def download_images(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
images = soup.find_all('img')
for image in images:
src_url = urljoin('
https://life.mingpao.com', image['src'])
image_name = src_url.split('/')[-1]
with open(os.path.join(SAVE_FOLDER_PATH, image_name), 'wb') as f:
f.write(requests.get(src_url).content)
if __name__ == '__main__':
base_url = '
https://life.mingpao.com/general/archive2?tag=comic&page='
total_pages = 50
if not os.path.exists(SAVE_FOLDER_PATH):
os.makedirs(SAVE_FOLDER_PATH)
for i in range(1, total_pages+1):
url = base_url + str(i)
download_images(url)