2019-02-25

基于 SCF 下载 mzitu 到COS

仅供学习交流，严禁用于商业用途！！！

你想拥有吗？

mzitu: https://www.mzitu.com

创建一个 index.py 文件，并且将下面代码复制到文件里面

import requests, re, os
from contextlib import closing
from qcloud_cos import CosConfig
from qcloud_cos import CosS3Client
from qcloud_cos import CosClientError
from qcloud_cos import CosServiceError
from bs4 import BeautifulSoup

BUCKET = os.environ.get('bucket')
PREFIX = os.environ.get('prefix', '/')
COS = CosS3Client(CosConfig(
    Region=os.environ.get('region'),
    Secret_id=os.environ.get('secret_id'),
    Secret_key=os.environ.get('secret_key'),
    Token=os.environ.get('token')
))

TMP = '/tmp/'

HEADERS = {
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
    'Referer': 'https://www.xxx.com'
}

def download(downloadUrl, uploadPath):
    with closing(requests.get(downloadUrl, stream=True, headers=HEADERS)) as response:
        if response.status_code == 200:
            filePath = TMP + os.path.basename(uploadPath)

            with open(filePath, 'wb') as file:
                for data in response.iter_content(1024):
                    file.write(data)
                    file.flush()

            try:
                COS.upload_file(
                    Bucket=BUCKET,
                    Key=PREFIX + uploadPath,
                    LocalFilePath=filePath
                )
            except (CosServiceError, CosClientError) as  e:
                print(e.get_resource_location())

            os.remove(filePath)

def main_handler(event, context):
    if not 'queryString' in event.keys() or not 'id' in event['queryString'].keys():
        print('参数错误')
        return False

    url = 'https://www.xxx.com/%d' % int(event['queryString']['id'])

    response = requests.get(url)

    html = BeautifulSoup(response.text,'html.parser')

    title = html.select('h2.main-title')[0].text

    response = COS.list_objects(
        Bucket=BUCKET,
        Prefix=PREFIX + title + '/',
        Delimiter='/',
        MaxKeys=200
    )

    if 'Contents' in response.keys() and len(response['Contents']) == html.select('.pagenavi a:nth-last-child(2) span')[0].text:
        print('已存在')
        return True

    next_url = url

    while 1:
        response = requests.get(next_url)

        html = BeautifulSoup(response.text,'html.parser')

        data = html.select('.main-image a')[0]

        img_url = data.select('img')[0].attrs.get('src')

        download(img_url, title + '/' + os.path.basename(img_url))

        next_url = data.attrs.get('href')

        if not url in next_url:
            break

    print('完成')
    return True

安装相关依赖

由于我们程序是部署到SCF上面的，一些依赖可能会没有，所以我们需要将依赖安装到项目文件夹内

1
2
3

$ pip install requests cos-python-sdk-v5 beautifulsoup4 -t 项目路径

$ pip install requests cos-python-sdk-v5 beautifulsoup4 -t D:\projects\mzitu

压缩代码包

在项目目录全选（Ctrl + A）全部文件，压缩成ZIP

创建一个SCF函数，并上传压缩的代码包

修改SCF参数

设置内存为 1536MB
设置超时时间为 60秒
添加环境变量

bucket=     # Bucket 名称，由 bucketname-appid 构成
region=     # 地域 https://cloud.tencent.com/document/product/436/6224
secret_id=  # SecretId
secret_key= # SecretKey
prefix=     # 上传前缀，以/结尾，默认/