download image from Baidu Image
Project description
BaiduImagesDownload
BaiduImagesDownload
是一个快速、简单百度图片爬取工具
from BaiduImagesDownload import Crawler
net, num, urls = Crawler.get_images_url('二次元', 20)
Crawler.download_images(urls)
目录
安装
pip install BaiduImagesDownload
使用
基本
from BaiduImagesDownload import Crawler
# original为True代表优先下载原图
net, num, urls = Crawler.get_images_url('二次元', 20, original=True)
Crawler.download_images(urls)
下载设置
from BaiduImagesDownload import Crawler
# rule设置允许的图片格式,默认为('.png', '.jpg')
# timeout为超时时间,默认为60(s)
net, num, urls = Crawler.get_images_url('二次元', 20)
Crawler.download_images(urls, rule=('.png', '.jpg'), timeout=60)
文档
get_images_url
class Crawler:
@staticmethod
def get_images_url(word: str, num: int, original: bool = True,
timeout: int = __CONCURRENT_TIMEOUT) -> (bool, bool, list):
参数
word: str
: 搜索关键词num: int
: 搜索数量original: bool, optional
:是否下原图,默认为True
timeout: int, optional
: 请求 timeout, 默认为60(s)
返回
net: bool
: 网络连接是否成功,成功为 True,失败为 Falsenum: bool
: 图片数量是否满足,满足为 True,不足为 Falseurls: list
: 获取的 urls,每项为一个dict
,其中有两个键obj_url
,from_url
。obj_url
为对应图片的url
,from_url
为Referer
download_images
class Crawler:
@staticmethod
def download_images(urls: list, rule: tuple = ('.png', '.jpg'),
path: str = 'download', timeout: int = __CONCURRENT_TIMEOUT,
concurrent: int = __CONCURRENT_NUM, command: bool = True) -> (int, int):
参数
urls: list
: 需要爬的图片列表,格式与get_images_url
返回的相同rule: tuple, optional
: 允许下载的格式,默认为('.png', '.jpg')
path: str, optional
: 图片下载的路径,默认为'download'
timeout: int, optional
: 请求 timeout, 默认为60(s)
concurrent: int, optional
: 并行下载的数量,默认为100
command: bool, optional
: 是否在控制台显示进度条,默认为True
返回
success: int
: 下载成功的数量failed: int
: 下载失败的数量
日志
可以设置日志的等级以及输出,具体请查看logging
import logging
from BaiduImagesDownload import logger
# 设置日志的等级为DEBUG
# 默认为INFO
logger.setLevel(logging.DEBUG)
# 设置输出到文件
file_handler = logging.FileHandler('~/BaiduImagesDownload.log')
file_handler.setFormatter(logging.Formatter(
'[%(asctime)s] [%(levelname)s] %(message)s')) # 设置输出格式
logger.addHandler(file_handler)
许可
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Close
Hashes for BaiduImagesDownload-2.0.0.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | d88d10030084c2c8fd4a8474d3c2da88b850fb911ad05ac2811cbded9204f75d |
|
MD5 | 9b67a21a90a930c303597f9c2aca4a26 |
|
BLAKE2b-256 | d289356b6d76558ceeac7add69a0669ee8cd4bdc795a7fd0965b3dd9983961ca |
Close
Hashes for BaiduImagesDownload-2.0.0-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 21716ae126ccfb2015fc534102e0f4e4f8f459830bfdd068e338011618cc8533 |
|
MD5 | 25c83b8bfe1758a29719ccac0c47d218 |
|
BLAKE2b-256 | 101067fd1b741dc77093ab36ffb65a7ade99512d8715be0c2806ade90ff591d4 |