Skip to content

Commit

Permalink
fix(route): CCDI.gov.cn route broken (#10879)
Browse files Browse the repository at this point in the history
* fix ccdi site

* fix doc

* upgrade rate limit countermeasure and sort the router alphabetically

* set anticrawler flag in docs
  • Loading branch information
bigfei authored Sep 24, 2022
1 parent e48187f commit de48ec5
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 56 deletions.
14 changes: 12 additions & 2 deletions docs/government.md
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,16 @@ pageClass: routes

## 中央纪委国家监委

### 审查调查
### 要闻

<Route author="LogicJake" example="/ccdi/scdc" path="/ccdi/scdc"/>
<Route author="bigfei" example="/gov/ccdi/yaowenn" path="/gov/ccdi/:path+" :paramsDesc="['路径,默认为 要闻']" anticrawler="1">

::: tip 提示

路径处填写对应页面 URL 中 `http://www.ccdi.gov.cn/` 后的字段。下面是一个例子。

若订阅 [审查调查 > 中管干部 > 执纪审查](https://www.ccdi.gov.cn/scdcn/zggb/zjsc/) 则将对应页面 URL <https://www.ccdi.gov.cn/scdcn/zggb/zjsc/>`http://www.ccdi.gov.cn/` 后的字段 `scdcn/zggb/zjsc` 作为路径填入。此时路由为 [`/gov/ccdi/scdcn/zggb/zjsc`](https://rsshub.app/gov/ccdi/scdcn/zggb/zjsc)

:::

</Route>
3 changes: 0 additions & 3 deletions lib/router.js
Original file line number Diff line number Diff line change
Expand Up @@ -1801,9 +1801,6 @@ router.get('/cninfo/announcement/:column/:code/:orgId/:category?/:search?', lazy
// 金十数据
router.get('/jinshi/index', lazyloadRouteHandler('./routes/jinshi/index'));

// 中央纪委国家监委网站
router.get('/ccdi/scdc', lazyloadRouteHandler('./routes/ccdi/scdc'));

// 中华人民共和国农业农村部
router.get('/gov/moa/sjzxfb', lazyloadRouteHandler('./routes/gov/moa/sjzxfb'));
router.get('/gov/moa/:suburl(.*)', lazyloadRouteHandler('./routes/gov/moa/moa'));
Expand Down
51 changes: 0 additions & 51 deletions lib/routes/ccdi/scdc.js

This file was deleted.

31 changes: 31 additions & 0 deletions lib/v2/gov/ccdi/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* eslint-disable no-await-in-loop */
const { rootUrl, parseNewsList, parseArticle } = require('./utils');

const getRandomInt = (min, max) => {
min = Math.ceil(min);
max = Math.floor(max);
return Math.floor(Math.random() * (max - min) + min); // The maximum is exclusive and the minimum is inclusive
};

module.exports = async (ctx) => {
const defaultPath = '/yaowenn/';

let pathname = ctx.path.replace(/(^\/ccdi|\/$)/g, '');
pathname = pathname === '' ? defaultPath : pathname.endsWith('/') ? pathname : pathname + '/';
const currentUrl = `${rootUrl}${pathname}`;

const { list, title } = await parseNewsList(currentUrl, '.list_news_dl li', ctx);
const items = [];

for (const item of list) {
items.push(await parseArticle(item, ctx));
// sleep randomly for anti rate limit on ccdi site
await new Promise((r) => setTimeout(r, getRandomInt(1000, 2500)));
}

ctx.state.data = {
title,
link: currentUrl,
item: items,
};
};
57 changes: 57 additions & 0 deletions lib/v2/gov/ccdi/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const got = require('@/utils/got');
const timezone = require('@/utils/timezone');

const { CookieJar } = require('tough-cookie');
const cookieJar = new CookieJar();

const owner = '中央纪委国家监委网站';
const rootUrl = 'https://www.ccdi.gov.cn';

const parseNewsList = async (url, selector, ctx) => {
const response = await got(url, { cookieJar });
const $ = cheerio.load(response.data);
const list = $(selector)
.slice(0, ctx.query.limit ? parseInt(ctx.query.limit) : 8)
.toArray()
.map((item) => {
item = $(item);
return {
title: item.find('a').first().text().trim(),
link: new URL(item.find('a').first().attr('href'), url).href,
pubDate: parseDate(item.find('span').text(), 'YYYY-MM-DD'),
};
});
const title = $('.other_Location')
.text()
.replace(/(.+)/, owner);
return { list, title };
};

const parseArticle = async (item, ctx) =>
await ctx.cache.tryGet(item.link, async () => {
const response = await got(item.link, { cookieJar });
const $ = cheerio.load(response.data);

const title = $('.daty').text().trim();
item.author = title.match(/(.*)/s)?.[1].trim() ?? owner;
item.pubDate = timezone(parseDate(title.match(/(.*)/s)?.[1].trim() ?? item.pubDate), +8);

// Change the img src from relative to absolute for a better compatibility
$('.content')
.find('img')
.each((_, el) => {
$(el).attr('src', new URL($(el).attr('src'), item.link).href);
// oldsrc is causing freshrss imageproxy not to work correctly
$(el).removeAttr('oldsrc').removeAttr('alt');
});
item.description = $('.content').html();
return item;
});

module.exports = {
rootUrl,
parseNewsList,
parseArticle,
};
1 change: 1 addition & 0 deletions lib/v2/gov/maintainer.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module.exports = {
// ministry
'/ccdi/:path+': ['bigfei'],
'/cmse/xwzx/zhxw': ['nczitzk'],
'/cmse/xwzx/yzjz': ['nczitzk'],
'/cmse/gfgg': ['nczitzk'],
Expand Down
11 changes: 11 additions & 0 deletions lib/v2/gov/radar.js
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,17 @@ module.exports = {
},
],
},
'ccdi.gov.cn': {
_name: '中央纪委国家监委',
www: [
{
title: '要闻',
docs: 'https://docs.rsshub.app/government.html#zhong-yang-ji-wei-guo-jia-jian-wei-yao-wen',
source: ['/*'],
target: (params, url) => `/gov/ccdi/${new URL(url).href.match(/ccdi\.gov\.cn\/(.*)/)[1]}`,
},
],
},
'cmse.gov.cn': {
_name: '中国载人航天',
www: [
Expand Down
1 change: 1 addition & 0 deletions lib/v2/gov/router.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module.exports = function (router) {
// ministry
router.get(/ccdi(\/[\w/-]+)?/, require('./ccdi'));
router.get('/cmse/fxrw', require('./cmse/fxrw'));
router.get(/cmse(\/[\w/-]+)?/, require('./cmse'));
router.get(/cnnic(\/[\w/-]+)?/, require('./cnnic'));
Expand Down

0 comments on commit de48ec5

Please sign in to comment.