Skip to content

Commit

Permalink
fix: Adapt CNBC extractor to article redesign (#336)
Browse files Browse the repository at this point in the history
  • Loading branch information
toufic-m authored and adampash committed Mar 25, 2019
1 parent da9606a commit 3ed778b
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
32 changes: 32 additions & 0 deletions fixtures/www.cnbc.com/1553160766510.html

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions src/extractors/custom/www.cnbc.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ export const WwwCnbcComExtractor = {
domain: 'www.cnbc.com',

title: {
selectors: ['h1.title'],
selectors: ['h1.title', 'h1.ArticleHeader-headline'],
},

author: {
Expand All @@ -18,7 +18,11 @@ export const WwwCnbcComExtractor = {
},

content: {
selectors: ['div#article_body.content', 'div.story'],
selectors: [
'div#article_body.content',
'div.story',
'div.ArticleBody-articleBody',
],

// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
Expand Down
50 changes: 50 additions & 0 deletions src/extractors/custom/www.cnbc.com/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,54 @@ describe('WwwCnbcComExtractor', () => {
);
});
});

describe('website redesign', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.cnbc.com/2019/03/18/heres-how-cybersecurity-vendors-drive-the-hacking-news-cycle.html';
const html = fs.readFileSync(
'./fixtures/www.cnbc.com/1553160766510.html'
);
result = Mercury.parse(url, { html, fallback: false });
});

it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.cnbc.com/index.js.
const { title } = await result;

// Update these values with the expected values from
// the article.
assert.equal(
title,
'Desperate to get through to executives, some cybersecurity vendors are resorting to lies and blackmail'
);
});

it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.cnbc.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;

const $ = cheerio.load(content || '');

const first13 = excerptContent(
$('*')
.first()
.text(),
13
);

// Update these values with the expected values from
// the article.
assert.equal(
first13,
'The cybersecurity vendor marketplace is growing so crowded that some companies have been'
);
});
});
});

0 comments on commit 3ed778b

Please sign in to comment.