-
Notifications
You must be signed in to change notification settings - Fork 451
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f81dc63
commit e66ad8b
Showing
4 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
export const WwwLemondeFrExtractor = { | ||
domain: 'www.lemonde.fr', | ||
|
||
title: { | ||
selectors: ['h1.article__title'], | ||
}, | ||
|
||
author: { | ||
selectors: ['.author__name'], | ||
}, | ||
|
||
date_published: { | ||
selectors: [['meta[name="og:article:published_time"]', 'value']], | ||
}, | ||
|
||
dek: { | ||
selectors: ['.article__desc'], | ||
}, | ||
|
||
lead_image_url: { | ||
selectors: [['meta[name="og:image"]', 'value']], | ||
}, | ||
|
||
content: { | ||
selectors: ['.article__content'], | ||
|
||
transforms: {}, | ||
|
||
clean: [], | ||
}, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import assert from 'assert'; | ||
import URL from 'url'; | ||
import cheerio from 'cheerio'; | ||
|
||
import Mercury from 'mercury'; | ||
import getExtractor from 'extractors/get-extractor'; | ||
import { excerptContent } from 'utils/text'; | ||
|
||
const fs = require('fs'); | ||
|
||
describe('WwwLemondeFrExtractor', () => { | ||
describe('initial test case', () => { | ||
let result; | ||
let url; | ||
beforeAll(() => { | ||
url = | ||
'https://www.lemonde.fr/economie/article/2019/05/07/dans-ses-previsions-economiques-bruxelles-confirme-la-montee-des-perils_5459325_3234.html'; | ||
const html = fs.readFileSync( | ||
'./fixtures/www.lemonde.fr/1557235525251.html' | ||
); | ||
result = Mercury.parse(url, { html, fallback: false }); | ||
}); | ||
|
||
it('is selected properly', () => { | ||
const extractor = getExtractor(url); | ||
assert.equal(extractor.domain, URL.parse(url).hostname); | ||
}); | ||
|
||
it('returns the title', async () => { | ||
const { title } = await result; | ||
|
||
assert.equal( | ||
title, | ||
`Les sombres perspectives économiques de la Commission européenne` | ||
); | ||
}); | ||
|
||
it('returns the author', async () => { | ||
const { author } = await result; | ||
|
||
assert.equal(author, `Cécile Ducourtieux`); | ||
}); | ||
|
||
it('returns the date_published', async () => { | ||
const { date_published } = await result; | ||
|
||
assert.equal(date_published, `2019-05-07T11:59:43.000Z`); | ||
}); | ||
|
||
it('returns the dek', async () => { | ||
const { dek } = await result; | ||
|
||
assert.equal( | ||
dek, | ||
'Elle abaisse ses prévisions pour 2019, avec un PIB à 1,4 % pour l’ensemble de l’UE, et à 1,2 % pour la zone euro.' | ||
); | ||
}); | ||
|
||
it('returns the lead_image_url', async () => { | ||
const { lead_image_url } = await result; | ||
|
||
assert.equal( | ||
lead_image_url, | ||
`https://img.lemde.fr/2019/05/07/316/0/3824/1912/1440/720/60/0/d105b14_dfjDE1I-caggQrT4gvHf2nZP.jpg` | ||
); | ||
}); | ||
|
||
it('returns the content', async () => { | ||
const { content } = await result; | ||
|
||
const $ = cheerio.load(content || ''); | ||
|
||
const first13 = excerptContent( | ||
$('*') | ||
.first() | ||
.text(), | ||
13 | ||
); | ||
|
||
assert.equal( | ||
first13, | ||
'Les dirigeants européens qui doivent se réunir, jeudi 9 mai à Sibiu (Roumanie),' | ||
); | ||
}); | ||
}); | ||
}); |