python爬虫电影天堂

2024-08-16 16:32:22 编程来源：ZONE.CI 全球网 0 阅读模式

Python爬虫技术可以帮助我们获取互联网上所需要的数据，比如电影天堂网站上的电影资源。在这里，我们将使用Python爬虫来获取电影天堂网站上的电影资源。

import requests
from bs4 import BeautifulSoup

#获取电影天堂网站
url = 'http://www.dytt8.net/'
response = requests.get(url)
response.encoding = 'gb2312'
soup = BeautifulSoup(response.text, 'html.parser')

#查找电影页面的链接
movie_page_links = []
for link in soup.find_all('a'):
    temp_link = link.get('href')
    if temp_link and 'html' in temp_link and 'index' not in temp_link and 'ftp' not in temp_link:
        movie_page_links.append(temp_link)

#获取电影资源
movie_resources = []
for link in movie_page_links:
    try:
        temp_response = requests.get(link)
        temp_response.encoding = 'gb2312'
        temp_soup = BeautifulSoup(temp_response.text, 'html.parser')
        movie_name = temp_soup.select('.title_all h1 font')[0].text
        movie_download = temp_soup.select('.downurl tbody tr td a')[0].get('href')
        movie_resources.append((movie_name, movie_download))
    except:
        pass

#输出电影资源
for movie in movie_resources:
    print(movie[0], movie[1])

python爬虫电影天堂