download image from Baidu Image
Project description
BaiduImagesDownload
BaiduImagesDownload
是一个快速、简单百度图片爬取工具
from BaiduImagesDownload.crawler import Crawler
net, num, urls = Crawler.get_images_url('二次元', 20)
Crawler.download_images(urls)
目录
安装
pip install BaiduImagesDownload
使用
基本
from BaiduImagesDownload.crawler import Crawler
# original为True代表优先下载原图
net, num, urls = Crawler.get_images_url('二次元', 20, original=True)
Crawler.download_images(urls)
下载设置
from BaiduImagesDownload.crawler import Crawler
# rule设置允许的图片格式,默认为('.png', '.jpg')
# timeout为超时时间,默认为60(s)
net, num, urls = Crawler.get_images_url('二次元', 20)
Crawler.download_images(urls, rule=('.png', '.jpg'), timeout=60)
文档
get_images_url
class Crawler:
@staticmethod
def get_images_url(word: str, num: int, original: bool = True,
timeout: int = __CONCURRENT_TIMEOUT) -> (bool, bool, list):
参数
word: str
: 搜索关键词num: int
: 搜索数量original: bool, optional
:是否下原图,默认为True
timeout: int, optional
: 请求 timeout, 默认为60(s)
返回
net: bool
: 网络连接是否成功,成功为 True,失败为 Falsenum: bool
: 图片数量是否满足,满足为 True,不足为 Falseurls: list
: 获取的 urls,每项为一个dict
,其中有两个键obj_url
,from_url
。obj_url
为对应图片的url
,from_url
为Referer
download_images
class Crawler:
@staticmethod
def download_images(urls: list, rule: tuple = ('.png', '.jpg'),
path: str = 'download', timeout: int = __CONCURRENT_TIMEOUT,
concurrent: int = __CONCURRENT_NUM) -> (int, int):
参数
urls: list
: 需要爬的图片列表,格式与get_images_url
返回的相同rule: tuple, optional
: 允许下载的格式,默认为('.png', '.jpg')
path: str, optional
: 图片下载的路径,默认为'download'
timeout: int, optional
: 请求 timeout, 默认为60(s)
concurrent: int, optional
: 并行下载的数量,默认为100
返回
success: int
: 下载成功的数量failed: int
: 下载失败的数量
日志
可以设置日志的等级以及输出,具体请查看logging
import logging
from BaiduImagesDownload.crawler import logging
# 设置日志的等级为DEBUG
# 默认为INFO
logger.setLevel(logging.DEBUG)
# 设置输出到文件
file_handler = logging.FileHandler('~/BaiduImagesDownload.log')
file_handler.setFormatter(logging.Formatter(
'[%(asctime)s] [%(levelname)s] %(message)s')) # 设置输出格式
logger.addHandler(file_handler)
许可
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Close
Hashes for BaiduImagesDownload-1.2.0.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | 9a0c26f140a5ba9cf3a245524fe5ba24550cdf2394d0c68751daaa7a04d35a1c |
|
MD5 | c0fd2b61541573a180f392c72428ff80 |
|
BLAKE2b-256 | 78e0e726beb040db5a18d42f551aa6ea5f181ef21e70decffd9ada15c376efab |
Close
Hashes for BaiduImagesDownload-1.2.0-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 5ea33bcca56aa4e86662b18b36263faaa0d2efd65ff8a00c0b13decd6c738f54 |
|
MD5 | 46235f302bba8362b3c8885cd11bd489 |
|
BLAKE2b-256 | 39a7e5952729686eafc1f0103337198d94bdf325df22e94d7cc3f3459d35f95c |