diff --git a/.buildinfo b/.buildinfo index 042eb56..6896f8b 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 496c554a013a2b9a9fa47504ea006d82 +config: f744698d55f38bf171e6cd3e48afa688 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/_modules/index.html b/_modules/index.html index 7297fbf..7223d00 100644 --- a/_modules/index.html +++ b/_modules/index.html @@ -5,7 +5,7 @@
-
import requests
from lxml import etree
-from xhs.exception import DataFetchError, ErrorEnum, IPBlockError, SignError
+from xhs.exception import (DataFetchError, ErrorEnum, IPBlockError,
+ NeedVerifyError, SignError)
from .help import (cookie_jar_to_cookie_str, download_file,
get_imgs_url_from_note, get_search_id, get_valid_path_name,
@@ -200,12 +201,17 @@ xhs.core 源代码
return response
if data.get("success"):
return data.get("data", data.get("success"))
- elif data["code"] == ErrorEnum.IP_BLOCK.value.code:
- raise IPBlockError(ErrorEnum.IP_BLOCK.value.msg)
- elif data["code"] == ErrorEnum.SIGN_FAULT.value.code:
- raise SignError(ErrorEnum.SIGN_FAULT.value.msg)
+ elif response.status_code == 471:
+ # someday someone maybe will bypass captcha
+ raise NeedVerifyError(
+ f"出现验证码,请求失败,Verifytype: {response.headers['Verifytype']},Verifyuuid: {response.headers['Verifyuuid']}",
+ response=response)
+ elif data.get("code") == ErrorEnum.IP_BLOCK.value.code:
+ raise IPBlockError(ErrorEnum.IP_BLOCK.value.msg, response=response)
+ elif data.get("code") == ErrorEnum.SIGN_FAULT.value.code:
+ raise SignError(ErrorEnum.SIGN_FAULT.value.msg, response=response)
else:
- raise DataFetchError(data)
+ raise DataFetchError(data, response=response)
return dict_new
url = "https://www.xiaohongshu.com/explore/" + note_id
- res = self.session.get(url, headers={"user-agent": self.user_agent})
+ res = self.session.get(url, headers={"user-agent": self.user_agent, "referer": "https://www.xiaohongshu.com/"})
html = res.text
state = re.findall(r"window.__INITIAL_STATE__=({.*})</script>", html)[
0
@@ -912,8 +918,7 @@ xhs.core 源代码
headers = {
"Referer": "https://creator.xiaohongshu.com/"
}
- print(data)
- return self.post(uri, data, headers=headers, is_creator=True)
+ return self.post(uri, data, headers=headers)
)
res = response.json()
- print(res)
if res["data"]["hasFirstFrame"]:
image_id = res["data"]["firstFrameFileId"]
return image_id
diff --git a/_static/documentation_options.js b/_static/documentation_options.js
index 08d37ca..dcb085e 100644
--- a/_static/documentation_options.js
+++ b/_static/documentation_options.js
@@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
- VERSION: '0.2.10',
+ VERSION: '0.2.11',
LANGUAGE: 'zh_CN',
COLLAPSE_INDEX: false,
BUILDER: 'html',
diff --git a/basic.html b/basic.html
index f6948b5..242a59d 100644
--- a/basic.html
+++ b/basic.html
@@ -5,7 +5,7 @@
- 快速入门 — xhs 0.2.10 文档
+ 快速入门 — xhs 0.2.11 文档
diff --git a/crawl.html b/crawl.html
index 9139c1e..ec11ca1 100644
--- a/crawl.html
+++ b/crawl.html
@@ -5,7 +5,7 @@
- 主页爬取 — xhs 0.2.10 文档
+ 主页爬取 — xhs 0.2.11 文档
diff --git a/creator.html b/creator.html
index 0be72cb..c09f4e8 100644
--- a/creator.html
+++ b/creator.html
@@ -5,7 +5,7 @@
- 笔记发布 — xhs 0.2.10 文档
+ 笔记发布 — xhs 0.2.11 文档
diff --git a/genindex.html b/genindex.html
index 484a249..3cf2db0 100644
--- a/genindex.html
+++ b/genindex.html
@@ -5,7 +5,7 @@
- 索引 — xhs 0.2.10 文档
+ 索引 — xhs 0.2.11 文档
diff --git a/index.html b/index.html
index 896a2dd..45144f5 100644
--- a/index.html
+++ b/index.html
@@ -5,7 +5,7 @@
- 介绍 — xhs 0.2.10 文档
+ 介绍 — xhs 0.2.11 文档
diff --git a/search.html b/search.html
index 0631c4f..c81784e 100644
--- a/search.html
+++ b/search.html
@@ -5,7 +5,7 @@
- 搜索 — xhs 0.2.10 文档
+ 搜索 — xhs 0.2.11 文档
diff --git a/source/xhs.html b/source/xhs.html
index ce281c0..8a3c8cc 100644
--- a/source/xhs.html
+++ b/source/xhs.html
@@ -5,7 +5,7 @@
- Documentation — xhs 0.2.10 文档
+ Documentation — xhs 0.2.11 文档