Skip to content

Commit

Permalink
Update: mongodb backup #33
Browse files Browse the repository at this point in the history
  • Loading branch information
howie6879 committed Jan 21, 2022
1 parent 4d7cb2a commit 131afa0
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 17 deletions.
6 changes: 3 additions & 3 deletions src/backup/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time

from src.config import Config
from src.databases import MongodbManager
from src.databases import MongodbBase, MongodbManager
from src.utils import LOGGER


Expand All @@ -26,7 +26,7 @@ def __init__(self, backup_type: str, backup_config: dict):
self.backup_type = backup_type
self.backup_config = backup_config
# 初始化数据库
self.mongo_base = MongodbManager.get_mongo_base(
self.mongo_base: MongodbBase = MongodbManager.get_mongo_base(
mongodb_config=Config.MONGODB_CONFIG
)
# liuli_send_list 存储所有已经备份过的文章列表
Expand Down Expand Up @@ -70,7 +70,7 @@ def save_backup(self, doc_source: str, doc_source_name: str, doc_name: str) -> b
"doc_source_name": doc_source_name,
"doc_name": doc_name,
}
update_data = {"$set": {**filter_dict, **{"ts": time.time()}}}
update_data = {"$set": {**filter_dict, **{"ts": int(time.time())}}}
self.bak_coll.update_one(
filter=filter_dict, update=update_data, upsert=True
)
Expand Down
1 change: 0 additions & 1 deletion src/backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def backup(self, backup_data: dict) -> bool:
bool: 是否成功
"""
# 以下字段必须存在
doc_id = backup_data["doc_id"]
doc_source = backup_data["doc_source"]
doc_source_name = backup_data["doc_source_name"]
doc_name = backup_data["doc_name"]
Expand Down
151 changes: 151 additions & 0 deletions src/backup/mongodb_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
Created by howie.hu at 2022-01-19.
Description: 基于MongoDB做备份
- 命令:PIPENV_DOTENV_LOCATION=./pro.env pipenv run python src/backup/mongodb_backup.py
Changelog: all notable changes to this file will be documented
"""
import time

from src.backup.base import BackupBase
from src.common.remote import send_get_request
from src.databases.mongodb_tools import (
mongodb_delete_many_data,
mongodb_find,
mongodb_update_data,
)
from src.utils import LOGGER


class MongoBackup(BackupBase):
"""基于MongoDB进行文章备份"""

def __init__(self, backup_config: dict):
"""
初始化相关变量
:param send_config: {}
"""
super().__init__(backup_type="mongodb", backup_config=backup_config or {})
self.liuli_backup_coll = self.mongo_base.get_collection(
coll_name="liuli_backup"
)

def backup(self, backup_data: dict) -> bool:
"""执行备份动作
Args:
backup_data (dict): 备份数据
Returns:
bool: 是否成功
"""
# 以下字段必须存在
doc_source = backup_data["doc_source"]
doc_source_name = backup_data["doc_source_name"]
doc_name = backup_data["doc_name"]
# 有些html源文件比较大,直接网络请求然后保存
doc_link = backup_data["doc_link"]

file_msg = f"{doc_source}/{doc_source_name}/{doc_name}"
file_path = f"{file_msg}.html"
is_backup = self.is_backup(
doc_source=doc_source,
doc_source_name=doc_source_name,
doc_name=doc_name,
)
# 在数据库存在就默认线上必定存在,希望用户不操作这个仓库造成状态不同步
if not is_backup:
# 上传前做是否存在检测
# 已存在的但是数据库没有状态需要重新同步
filter_dict = {
"doc_source": doc_source,
"doc_source_name": doc_source_name,
"doc_name": doc_name,
}
# 先判断文件是否存在
db_find_res = mongodb_find(
coll_conn=self.liuli_backup_coll,
filter_dict=filter_dict,
return_dict={"_id": 0},
)
if db_find_res["status"] and not db_find_res["info"]:
# 没有备份过继续远程备份
resp = send_get_request(url=doc_link)
# 调试,先硬编码
before_str = 'data-src="'
after_str = 'src="https://images.weserv.nl/?url='
# 查询成功但是没有数据,则重新备份
content = resp.text.replace(before_str, after_str)
update_data = {
"$set": {
**filter_dict,
**{"ts": int(time.time()), "content": content},
}
}

db_update_res = mongodb_update_data(
coll_conn=self.liuli_backup_coll,
filter_dict=filter_dict,
update_data=update_data,
)
if db_update_res["status"]:
LOGGER.info(f"Backup({self.backup_type}): {file_path} 上传成功!")
else:
LOGGER.error(
f"Backup({self.backup_type}): {file_path} 上传失败!{db_update_res['info']}"
)
else:
LOGGER.info(f"Backup({self.backup_type}): {file_path} 已成功!")
# 保存当前文章状态
self.save_backup(
doc_source=doc_source,
doc_source_name=doc_source_name,
doc_name=doc_name,
)
else:
LOGGER.info(f"Backup({self.backup_type}): {file_path} 已存在!")

def delete(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool:
"""删除某个文件
Args:
doc_source (str): 文章获取源
doc_source_name (str): 文章源
doc_name (str): 文章名字
Returns:
bool: 是否成功
"""
file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html"
db_res = mongodb_delete_many_data(
coll_conn=self.liuli_backup_coll,
filter_dict={
"doc_source": doc_source,
"doc_source_name": doc_source_name,
"doc_name": doc_name,
},
)
if db_res["status"]:
LOGGER.info(f"Backup({self.backup_type}): {file_path} 删除成功!")
# 删除当前文章状态
self.delete_backup(
doc_source=doc_source,
doc_source_name=doc_source_name,
doc_name=doc_name,
)
else:
LOGGER.error(f"Backup({self.backup_type}): {file_path} 删除失败!{e}")


if __name__ == "__main__":
test_backup_data = {
"doc_source": "liuli_wechat",
"doc_source_name": "老胡的储物柜",
"doc_name": "打造一个干净且个性化的公众号阅读环境",
"doc_link": "https://mp.weixin.qq.com/s/NKnTiLixjB9h8fSd7Gq8lw",
}
mongo_backup = MongoBackup({})
mongo_backup.backup(test_backup_data)
# mongo_backup.delete(
# doc_source="liuli_wechat",
# doc_source_name="老胡的储物柜",
# doc_name="打造一个干净且个性化的公众号阅读环境",
# )
4 changes: 2 additions & 2 deletions src/liuli_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import schedule

from src.config.config import Config
from src.processor.rss import gen_rss_xml
from src.processor.rss import to_rss
from src.schedule_task.wechat_task import send_doc, update_ads_tag, update_wechat_doc
from src.utils import LOGGER

Expand All @@ -29,7 +29,7 @@ def schedule_task():
# 文章分发
send_doc()
# 生成 RSS
gen_rss_xml()
to_rss()


def main():
Expand Down
1 change: 1 addition & 0 deletions src/processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
Description: 常用中间件
Changelog: all notable changes to this file will be documented
"""
from .rss import to_rss
from .utils import fetch_keyword_list, html_to_text_h2t, str_replace
2 changes: 1 addition & 1 deletion src/processor/rss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
Description: 将文章生成RSS
Changelog: all notable changes to this file will be documented
"""
from .doc2rss import gen_rss_xml
from .doc2rss import to_rss
10 changes: 5 additions & 5 deletions src/processor/rss/doc2rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
from src.utils import LOGGER


def gen_rss_xml(wechat_list: list = None):
"""为公众号文章生成RSS
def to_rss(wechat_list: list = None):
"""为文章生成RSS
Args:
wechat_list (list, optional): 公众号文章列表.
wechat_list (list, optional): 文章列表.
"""
wechat_list = wechat_list or Config.WECHAT_LIST
mongo_base = MongodbManager.get_mongo_base(mongodb_config=Config.MONGODB_CONFIG)
Expand Down Expand Up @@ -91,7 +91,7 @@ def gen_rss_xml(wechat_list: list = None):
rss_db_data = {
"doc_source_name": wechat_name,
"rss_data": rss_data,
"updated_at": time.time(),
"updated_at": int(time.time()),
}
rss_db_res = mongodb_update_data(
coll_conn=coll_rss_conn,
Expand All @@ -111,4 +111,4 @@ def gen_rss_xml(wechat_list: list = None):


if __name__ == "__main__":
gen_rss_xml()
to_rss()
2 changes: 1 addition & 1 deletion src/schedule_task/wechat_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def send_doc():
# 是否启用分发器
mongo_base = MongodbManager.get_mongo_base(mongodb_config=Config.MONGODB_CONFIG)
coll = mongo_base.get_collection(coll_name="liuli_articles")
cur_ts = time.time()
cur_ts = int(time.time())
filter_dict = {
# 时间范围,除第一次外后面其实可以去掉
"doc_ts": {"$gte": cur_ts - (2 * 24 * 60 * 60), "$lte": cur_ts},
Expand Down
2 changes: 1 addition & 1 deletion src/sender/bark_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def send(self, send_data) -> bool:
{
"send_type": self.send_type,
"doc_id": doc_id,
"ts": time.time(),
"ts": int(time.time()),
}
)
# 下发成功
Expand Down
2 changes: 1 addition & 1 deletion src/sender/ding_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def send(self, send_data) -> bool:
{
"send_type": self.send_type,
"doc_id": doc_id,
"ts": time.time(),
"ts": int(time.time()),
}
)
# 下发成功
Expand Down
2 changes: 1 addition & 1 deletion src/sender/tg_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def send(self, send_data) -> bool:
{
"send_type": self.send_type,
"doc_id": doc_id,
"ts": time.time(),
"ts": int(time.time()),
}
)
# 下发成功
Expand Down
2 changes: 1 addition & 1 deletion src/sender/wecom_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def send(self, send_data) -> bool:
{
"send_type": self.send_type,
"doc_id": doc_id,
"ts": time.time(),
"ts": int(time.time()),
}
)

Expand Down

0 comments on commit 131afa0

Please sign in to comment.