diff --git a/src/backup/base.py b/src/backup/base.py index 9d620a4..e637c33 100644 --- a/src/backup/base.py +++ b/src/backup/base.py @@ -7,7 +7,7 @@ import time from src.config import Config -from src.databases import MongodbManager +from src.databases import MongodbBase, MongodbManager from src.utils import LOGGER @@ -26,7 +26,7 @@ def __init__(self, backup_type: str, backup_config: dict): self.backup_type = backup_type self.backup_config = backup_config # 初始化数据库 - self.mongo_base = MongodbManager.get_mongo_base( + self.mongo_base: MongodbBase = MongodbManager.get_mongo_base( mongodb_config=Config.MONGODB_CONFIG ) # liuli_send_list 存储所有已经备份过的文章列表 @@ -70,7 +70,7 @@ def save_backup(self, doc_source: str, doc_source_name: str, doc_name: str) -> b "doc_source_name": doc_source_name, "doc_name": doc_name, } - update_data = {"$set": {**filter_dict, **{"ts": time.time()}}} + update_data = {"$set": {**filter_dict, **{"ts": int(time.time())}}} self.bak_coll.update_one( filter=filter_dict, update=update_data, upsert=True ) diff --git a/src/backup/github_backup.py b/src/backup/github_backup.py index e5666a4..9098ef5 100644 --- a/src/backup/github_backup.py +++ b/src/backup/github_backup.py @@ -37,7 +37,6 @@ def backup(self, backup_data: dict) -> bool: bool: 是否成功 """ # 以下字段必须存在 - doc_id = backup_data["doc_id"] doc_source = backup_data["doc_source"] doc_source_name = backup_data["doc_source_name"] doc_name = backup_data["doc_name"] diff --git a/src/backup/mongodb_backup.py b/src/backup/mongodb_backup.py new file mode 100644 index 0000000..e1588f7 --- /dev/null +++ b/src/backup/mongodb_backup.py @@ -0,0 +1,151 @@ +""" + Created by howie.hu at 2022-01-19. + Description: 基于MongoDB做备份 + - 命令:PIPENV_DOTENV_LOCATION=./pro.env pipenv run python src/backup/mongodb_backup.py + Changelog: all notable changes to this file will be documented +""" +import time + +from src.backup.base import BackupBase +from src.common.remote import send_get_request +from src.databases.mongodb_tools import ( + mongodb_delete_many_data, + mongodb_find, + mongodb_update_data, +) +from src.utils import LOGGER + + +class MongoBackup(BackupBase): + """基于MongoDB进行文章备份""" + + def __init__(self, backup_config: dict): + """ + 初始化相关变量 + :param send_config: {} + """ + super().__init__(backup_type="mongodb", backup_config=backup_config or {}) + self.liuli_backup_coll = self.mongo_base.get_collection( + coll_name="liuli_backup" + ) + + def backup(self, backup_data: dict) -> bool: + """执行备份动作 + + Args: + backup_data (dict): 备份数据 + + Returns: + bool: 是否成功 + """ + # 以下字段必须存在 + doc_source = backup_data["doc_source"] + doc_source_name = backup_data["doc_source_name"] + doc_name = backup_data["doc_name"] + # 有些html源文件比较大,直接网络请求然后保存 + doc_link = backup_data["doc_link"] + + file_msg = f"{doc_source}/{doc_source_name}/{doc_name}" + file_path = f"{file_msg}.html" + is_backup = self.is_backup( + doc_source=doc_source, + doc_source_name=doc_source_name, + doc_name=doc_name, + ) + # 在数据库存在就默认线上必定存在,希望用户不操作这个仓库造成状态不同步 + if not is_backup: + # 上传前做是否存在检测 + # 已存在的但是数据库没有状态需要重新同步 + filter_dict = { + "doc_source": doc_source, + "doc_source_name": doc_source_name, + "doc_name": doc_name, + } + # 先判断文件是否存在 + db_find_res = mongodb_find( + coll_conn=self.liuli_backup_coll, + filter_dict=filter_dict, + return_dict={"_id": 0}, + ) + if db_find_res["status"] and not db_find_res["info"]: + # 没有备份过继续远程备份 + resp = send_get_request(url=doc_link) + # 调试,先硬编码 + before_str = 'data-src="' + after_str = 'src="https://images.weserv.nl/?url=' + # 查询成功但是没有数据,则重新备份 + content = resp.text.replace(before_str, after_str) + update_data = { + "$set": { + **filter_dict, + **{"ts": int(time.time()), "content": content}, + } + } + + db_update_res = mongodb_update_data( + coll_conn=self.liuli_backup_coll, + filter_dict=filter_dict, + update_data=update_data, + ) + if db_update_res["status"]: + LOGGER.info(f"Backup({self.backup_type}): {file_path} 上传成功!") + else: + LOGGER.error( + f"Backup({self.backup_type}): {file_path} 上传失败!{db_update_res['info']}" + ) + else: + LOGGER.info(f"Backup({self.backup_type}): {file_path} 已成功!") + # 保存当前文章状态 + self.save_backup( + doc_source=doc_source, + doc_source_name=doc_source_name, + doc_name=doc_name, + ) + else: + LOGGER.info(f"Backup({self.backup_type}): {file_path} 已存在!") + + def delete(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool: + """删除某个文件 + + Args: + doc_source (str): 文章获取源 + doc_source_name (str): 文章源 + doc_name (str): 文章名字 + Returns: + bool: 是否成功 + """ + file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html" + db_res = mongodb_delete_many_data( + coll_conn=self.liuli_backup_coll, + filter_dict={ + "doc_source": doc_source, + "doc_source_name": doc_source_name, + "doc_name": doc_name, + }, + ) + if db_res["status"]: + LOGGER.info(f"Backup({self.backup_type}): {file_path} 删除成功!") + # 删除当前文章状态 + self.delete_backup( + doc_source=doc_source, + doc_source_name=doc_source_name, + doc_name=doc_name, + ) + else: + LOGGER.error(f"Backup({self.backup_type}): {file_path} 删除失败!{e}") + + +if __name__ == "__main__": + test_backup_data = { + "doc_source": "liuli_wechat", + "doc_source_name": "老胡的储物柜", + "doc_name": "打造一个干净且个性化的公众号阅读环境", + "doc_link": "https://mp.weixin.qq.com/s/NKnTiLixjB9h8fSd7Gq8lw", + } + mongo_backup = MongoBackup({}) + mongo_backup.backup(test_backup_data) + # mongo_backup.delete( + # doc_source="liuli_wechat", + # doc_source_name="老胡的储物柜", + # doc_name="打造一个干净且个性化的公众号阅读环境", + # ) diff --git a/src/liuli_schedule.py b/src/liuli_schedule.py index aac906f..16dbcba 100644 --- a/src/liuli_schedule.py +++ b/src/liuli_schedule.py @@ -12,7 +12,7 @@ import schedule from src.config.config import Config -from src.processor.rss import gen_rss_xml +from src.processor.rss import to_rss from src.schedule_task.wechat_task import send_doc, update_ads_tag, update_wechat_doc from src.utils import LOGGER @@ -29,7 +29,7 @@ def schedule_task(): # 文章分发 send_doc() # 生成 RSS - gen_rss_xml() + to_rss() def main(): diff --git a/src/processor/__init__.py b/src/processor/__init__.py index df0bd93..6533b7a 100644 --- a/src/processor/__init__.py +++ b/src/processor/__init__.py @@ -3,4 +3,5 @@ Description: 常用中间件 Changelog: all notable changes to this file will be documented """ +from .rss import to_rss from .utils import fetch_keyword_list, html_to_text_h2t, str_replace diff --git a/src/processor/rss/__init__.py b/src/processor/rss/__init__.py index 3266177..6da1fca 100644 --- a/src/processor/rss/__init__.py +++ b/src/processor/rss/__init__.py @@ -3,4 +3,4 @@ Description: 将文章生成RSS Changelog: all notable changes to this file will be documented """ -from .doc2rss import gen_rss_xml +from .doc2rss import to_rss diff --git a/src/processor/rss/doc2rss.py b/src/processor/rss/doc2rss.py index 6c8749c..955015e 100644 --- a/src/processor/rss/doc2rss.py +++ b/src/processor/rss/doc2rss.py @@ -18,11 +18,11 @@ from src.utils import LOGGER -def gen_rss_xml(wechat_list: list = None): - """为公众号文章生成RSS +def to_rss(wechat_list: list = None): + """为文章生成RSS Args: - wechat_list (list, optional): 公众号文章列表. + wechat_list (list, optional): 文章列表. """ wechat_list = wechat_list or Config.WECHAT_LIST mongo_base = MongodbManager.get_mongo_base(mongodb_config=Config.MONGODB_CONFIG) @@ -91,7 +91,7 @@ def gen_rss_xml(wechat_list: list = None): rss_db_data = { "doc_source_name": wechat_name, "rss_data": rss_data, - "updated_at": time.time(), + "updated_at": int(time.time()), } rss_db_res = mongodb_update_data( coll_conn=coll_rss_conn, @@ -111,4 +111,4 @@ def gen_rss_xml(wechat_list: list = None): if __name__ == "__main__": - gen_rss_xml() + to_rss() diff --git a/src/schedule_task/wechat_task.py b/src/schedule_task/wechat_task.py index 1a3f926..863ec00 100644 --- a/src/schedule_task/wechat_task.py +++ b/src/schedule_task/wechat_task.py @@ -88,7 +88,7 @@ def send_doc(): # 是否启用分发器 mongo_base = MongodbManager.get_mongo_base(mongodb_config=Config.MONGODB_CONFIG) coll = mongo_base.get_collection(coll_name="liuli_articles") - cur_ts = time.time() + cur_ts = int(time.time()) filter_dict = { # 时间范围,除第一次外后面其实可以去掉 "doc_ts": {"$gte": cur_ts - (2 * 24 * 60 * 60), "$lte": cur_ts}, diff --git a/src/sender/bark_sender.py b/src/sender/bark_sender.py index 76b7f7b..b735b72 100644 --- a/src/sender/bark_sender.py +++ b/src/sender/bark_sender.py @@ -51,7 +51,7 @@ def send(self, send_data) -> bool: { "send_type": self.send_type, "doc_id": doc_id, - "ts": time.time(), + "ts": int(time.time()), } ) # 下发成功 diff --git a/src/sender/ding_sender.py b/src/sender/ding_sender.py index 770425e..7359506 100644 --- a/src/sender/ding_sender.py +++ b/src/sender/ding_sender.py @@ -75,7 +75,7 @@ def send(self, send_data) -> bool: { "send_type": self.send_type, "doc_id": doc_id, - "ts": time.time(), + "ts": int(time.time()), } ) # 下发成功 diff --git a/src/sender/tg_sender.py b/src/sender/tg_sender.py index 0e4f536..9c5950d 100644 --- a/src/sender/tg_sender.py +++ b/src/sender/tg_sender.py @@ -79,7 +79,7 @@ def send(self, send_data) -> bool: { "send_type": self.send_type, "doc_id": doc_id, - "ts": time.time(), + "ts": int(time.time()), } ) # 下发成功 diff --git a/src/sender/wecom_sender.py b/src/sender/wecom_sender.py index d9d69ce..711802d 100644 --- a/src/sender/wecom_sender.py +++ b/src/sender/wecom_sender.py @@ -155,7 +155,7 @@ def send(self, send_data) -> bool: { "send_type": self.send_type, "doc_id": doc_id, - "ts": time.time(), + "ts": int(time.time()), } )