-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathz_果果漫画网站爬取.py
71 lines (63 loc) · 2 KB
/
z_果果漫画网站爬取.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import requests
from bs4 import BeautifulSoup
import os
import time
from xpinyin import Pinyin
import ffmpy3
# 提取图片链接
def kill(str):
while (str.find('/') >= 0):
k=str.find('/')
str=str.replace(str[k], "+" ) # 正主为+
str=str.replace(str[k-1], "(" ) # 被杀者为(
str = str.replace("+", "/")
str = str.replace("(", '')
return str
# 下载章节内漫画
def download(imgs,na,name):
url=imgs
res = requests.get(url,timeout=(20,20))
ti=str(time.time())+'.jpg'
sn = imgs[-9:] #以图片末尾命名
dir_name = 'C:\\Users\\tkdg\\Desktop\\{}\\{}\\'.format(name,na)
if not os.path.exists(dir_name):
os.mkdir(dir_name)
file = open(dir_name + ti, 'wb')# ti or sn
file.write(res.content)
file.close()
print(res)
print(sn)
# 获得章节HTML
def chapters(urls,chapter,name):
url='https://guoguo.hxt9881.com{}'.format(urls)
#提取图片链接
res=requests.get(url,timeout=(20,20))
html_text=res.text
soup=BeautifulSoup(html_text,'html.parser')
script=soup.find_all('script')[3].text
script=script.replace(';var chapterImages = [','')
script=script.split(']')
imgs=script[0].replace('"','')
imgs=kill(imgs)
imgs=imgs.split(',')
# print(imgs[3],type(imgs))
for i in imgs:
download(i,chapter,name)
print('finish !')
# 获得目标漫画链接
name=input("请你输入目标漫画:")
dir_name = 'C:\\Users\\tkdg\\Desktop\\{}\\'.format(name)
if not os.path.exists(dir_name):
os.mkdir(dir_name)
name1= Pinyin().get_pinyin(name,'')
url="https://guoguo.hxt9881.com/manhua/{}/".format(name1)
res=requests.get(url,timeout=(20,20))
soup=BeautifulSoup(res.text,'html.parser')
lists=soup.find('ul',{'id':"chapter-list-1"}).find_all('li')
for i in lists:
urls=i.find('a').get('href')
chapter=i.find('span',{'class':"list_con_zj"}).text
chapter=chapter.replace(' ','')
chapters(urls,chapter,name)
# print(urls,type(urls))
# print(chapter,type(chapter))