-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
74 lines (72 loc) · 2.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from requests import get
from json import loads
from time import sleep
from ua import randUA
from tqdm import tqdm
import os
print("欢迎使用B站专栏图片爬虫\n" + "-" * 30)
uid = input("请输入要爬取用户的UID:")
if not os.path.exists(uid):
os.makedirs(uid)
length = 30
pn = 0
while length == 30:
pn += 1
print("页数:" + str(pn))
api = "https://api.bilibili.com/x/space/article?ps=30&mid=" + uid + "&pn=" + str(pn)
headers = {
"User-Agent": randUA(),
"Referer": "https://space.bilibili.com/" + uid
}
res = loads(get(api, headers=headers).content)
if (res['code'] != 0):
print("\033[1;31m错误!\n错误代码:" + str(res['code']) + "\n错误信息:" + res['message'] + "\033[0m\n程序将在3秒后退出...")
sleep(3)
exit()
ids = [str(i['id']) for i in res['data']['articles']]
for i in ids:
if not os.path.exists(uid + "/" + i):
os.makedirs(uid + "/" + i)
else:
continue
print("正在下载:CV" + i)
url = "https://www.bilibili.com/read/cv" + i
headers = {
"User-Agent": randUA(),
"Referer": url
}
html = get(url, headers=headers).content
html = str(html).encode('utf8').decode('unicode_escape')
data = html.split('__INITIAL_STATE__=')[-1]
if """ in data:
data = data.split('"')
imgs = [j.replace("\\u002F", "/").replace("\\", "/") for j in data if 'hdslb.com' in j if not '/' in j]
else:
data = data.split('"')
imgs = ["https:" + j.replace("\\u002F", "/").replace("\\", "/")[:-1] for j in data if 'hdslb.com' in j if not 'http' in j if not '/' in j]
imgs = imgs + [j.replace("\\u002F", "/").replace("\\", "/")[:-1] for j in data if 'hdslb.com' in j if not '/' in j if j[-1] == '\\']
imgs = [j for j in imgs if j[0:4] == 'http']
imgID = 0
for j in imgs:
imgID += 1
filepath = uid + "/" + i + "/" + str(imgID) + "." + j.split('.')[-1]
if os.path.exists(filepath):
print("第" + str(imgID) + "张图已存在")
continue
res = get(j, headers=headers, stream=True)
total = int(res.headers.get('content-length', 0))
with open(filepath, 'wb') as f, tqdm(
desc="第" + str(imgID) + "张图",
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in res.iter_content(chunk_size=1024):
size = f.write(data)
bar.update(size)
print("CV" + i + "下载完成,共下载图片:" + str(len(imgs)))
length = len(ids)
print("\033[1;32m全部图片下载完成!\033[0m\n程序将在3秒后退出...")
sleep(3)
exit()