Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SingleFile support for saving transcript whole local #4

Merged
merged 6 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
### 安装

```
pip3 install -r requirements.txt
pip3 install .
```

### 运行
Expand All @@ -19,7 +19,7 @@ pip3 install -r requirements.txt

执行以下命令运行:
```
python3 vistopia/main.py --token [token] [subcommand]
python3 -m vistopia.main --token [token] [subcommand]
```

子命令目前支持:
Expand All @@ -29,6 +29,19 @@ python3 vistopia/main.py --token [token] [subcommand]
- `save-show`: 保存节目至本地,并添加封面和 ID3 信息
- `save-transcript`: 保存节目文稿至本地

### 使用 SingleFile 将文稿网页保存为纯本地文件

1. 下载 [SingleFile CLI](https://github.com/gildas-lormeau/single-file-cli/releases) 命令行程序
2. 浏览器登陆网页版看理想,使用 cookie 导出工具(如:EditThisCookie)导出 json 格式的 cookies,并保存为本地文件
3. 运行 `save-transcript` 命令,并传入 `--single-file-exec-path` 和 `--cookie-file-path` 参数

命令格式:
```
python3 -m vistopia.main --token [token] save-transcript --id [id] \
--single-file-exec-path=/path/to/local/single-file \
--cookie-file-path=/path/to/vistopia.cookie
```

## 不足

目前不支持 API 签名。
Expand Down
33 changes: 25 additions & 8 deletions vistopia/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
from logging import getLogger
from typing import Optional

import click
from tabulate import tabulate
Expand All @@ -14,8 +15,8 @@

class Context:
def __init__(self):
self.token: str = None
self.visitor: Visitor = None
self.token: Optional[str] = None
self.visitor: Optional[Visitor] = None


# def _print_table(list):
Expand Down Expand Up @@ -115,7 +116,7 @@ def show_content(ctx, **argv):
def save_show(ctx, **argv):
content_id = argv.pop("id")
episode_id = argv.pop("episode_id", None)
episodes = set(range_expand(episode_id) if episode_id else None)
episodes = set(range_expand(episode_id) if episode_id else [])

logger.debug(json.dumps(
ctx.obj.visitor.get_catalog(content_id), indent=2, ensure_ascii=False))
Expand All @@ -130,19 +131,35 @@ def save_show(ctx, **argv):
@main.command("save-transcript")
@click.option("--id", type=click.INT, required=True)
@click.option("--episode-id", help="Episode ID in the form '1-3,4,8'")
@click.option("--single-file-exec-path", type=click.Path(),
help="Path to the single-file CLI tool")
@click.option("--cookie-file-path", type=click.Path(),
help=(
"Path to the browser cookie file "
"(only needed in single-file mode)"))
@click.pass_context
def save_transcript(ctx, **argv):
content_id = argv.pop("id")
episode_id = argv.pop("episode_id", None)
episodes = set(range_expand(episode_id) if episode_id else None)
single_file_exec_path = argv.pop("single_file_exec_path")
cookie_file_path = argv.pop("cookie_file_path")
episodes = set(range_expand(episode_id) if episode_id else [])

logger.debug(json.dumps(
ctx.obj.visitor.get_catalog(content_id), indent=2, ensure_ascii=False))

ctx.obj.visitor.save_transcript(
content_id,
episodes=episodes
)
if single_file_exec_path and cookie_file_path:
ctx.obj.visitor.save_transcript_with_single_file(
content_id,
episodes=episodes,
single_file_exec_path=single_file_exec_path,
cookie_file_path=cookie_file_path
)
else:
ctx.obj.visitor.save_transcript(
content_id,
episodes=episodes
)


if __name__ == "__main__":
Expand Down
69 changes: 56 additions & 13 deletions vistopia/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def save_show(self, id: int,
for part in catalog["catalog"]:
for article in part["part"]:

if episodes is not None and \
int(article["sort_number"]) not in episodes:
if episodes and \
int(article["sort_number"]) not in episodes:
continue

fname = show_dir / "{}.mp3".format(article["title"])
Expand All @@ -95,8 +95,8 @@ def save_transcript(self, id: int, episodes: Optional[set] = None):
for part in catalog["catalog"]:
for article in part["part"]:

if episodes is not None and \
int(article["sort_number"]) not in episodes:
if episodes and \
int(article["sort_number"]) not in episodes:
continue

fname = show_dir / "{}.html".format(article["title"])
Expand All @@ -114,19 +114,62 @@ def save_transcript(self, id: int, episodes: Optional[set] = None):
with open(fname, "w") as f:
f.write(content)

def save_transcript_with_single_file(self, id: int,
episodes: Optional[set] = None,
single_file_exec_path: str = "",
cookie_file_path: str = ""):
import subprocess
from pathlib import Path
logger.debug(f"save_transcript_with_single_file id {id}")

catalog = self.get_catalog(id)
show_dir = Path(catalog["title"])
show_dir.mkdir(exist_ok=True)

for part in catalog["catalog"]:
for article in part["part"]:
if episodes and int(article["sort_number"]) not in episodes:
continue

fname = show_dir / "{}.html".format(article["title"])
if not fname.exists():
command = [
single_file_exec_path,
"https://www.vistopia.com.cn/article/"
+ article["article_id"],
str(fname),
"--browser-cookies-file=" + cookie_file_path
]
logger.debug(f"singlefile command {command}")
try:
subprocess.run(command, check=True)
print(
f"Successfully fetched and saved to {fname}")
except subprocess.CalledProcessError as e:
print(f"Failed to fetch page using single-file: {e}")

@staticmethod
def retag(fname, article_info, catalog_info, series_info):

from mutagen.easyid3 import EasyID3

track = EasyID3(fname)
track["title"] = article_info["title"]
track["album"] = series_info["title"]
track["artist"] = series_info["author"]
track["tracknumber"] = article_info["sort_number"]
# track["tracksort"] = article_info["sort_number"]
track["website"] = article_info["content_url"]
track.save()
from mutagen.id3 import ID3NoHeaderError

try:
track = EasyID3(fname)
except ID3NoHeaderError:
# No ID3 tag found, creating a new ID3 tag
track = EasyID3()

track['title'] = article_info['title']
track['album'] = series_info['title']
track['artist'] = series_info['author']
track['tracknumber'] = str(article_info['sort_number'])
track['website'] = article_info['content_url']

try:
track.save(fname)
except Exception as e:
print(f"Error saving ID3 tags: {e}")

@staticmethod
def retag_cover(fname, article_info, catalog_info, series_info):
Expand Down
Loading