Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: detect platforms #52

Merged
merged 2 commits into from
Dec 6, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/extractors/detect-by-html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import {
MediumExtractor,
BloggerExtractor,
} from './custom/';

const Detectors = {
'meta[name="al:ios:app_name"][value="Medium"]': MediumExtractor,
'meta[name="generator"][value="blogger"]': BloggerExtractor,
};

export default function detectByHtml($) {
const selector = Reflect.ownKeys(Detectors).find(s => $(s).length > 0);

return Detectors[selector];
}
24 changes: 24 additions & 0 deletions src/extractors/detect-by-html.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import assert from 'assert';
import cheerio from 'cheerio';

import detectByHtml from './detect-by-html';

describe('detectByHtml', () => {
it('detects a medium post from the html', () => {
const html =
'<head><meta name="al:ios:app_name" value="Medium" /></head>';

const $ = cheerio.load(html);

assert.equal(detectByHtml($).domain, 'medium.com');
});

it('returns nothing if no match is found', () => {
const html =
'<div></div>';

const $ = cheerio.load(html);

assert.equal(detectByHtml($), null);
});
});
6 changes: 4 additions & 2 deletions src/extractors/get-extractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ import URL from 'url';

import Extractors from './all';
import GenericExtractor from './generic';
import detectByHtml from './detect-by-html';

export default function getExtractor(url, parsedUrl) {
export default function getExtractor(url, parsedUrl, $) {
parsedUrl = parsedUrl || URL.parse(url);
const { hostname } = parsedUrl;
const baseDomain = hostname.split('.').slice(-2).join('.');

return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;
return Extractors[hostname] || Extractors[baseDomain] ||
detectByHtml($) || GenericExtractor;
}
13 changes: 12 additions & 1 deletion src/extractors/get-extractor.test.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import assert from 'assert';
import cheerio from 'cheerio';

import getExtractor from './get-extractor';

describe('getExtractor(url)', () => {
it('returns GenericExtractor if no custom extractor is found', () => {
const extractor = getExtractor('http://example.com');
const extractor = getExtractor('http://example.com', null, cheerio.load('<div />'));

assert.equal(extractor.domain, '*');
});
Expand All @@ -26,4 +27,14 @@ describe('getExtractor(url)', () => {

assert.equal(extractor.domain, 'wikipedia.org');
});

it('returns a custom extractor based on detectors', () => {
const html =
'<head><meta name="al:ios:app_name" value="Medium" /></head>';

const $ = cheerio.load(html);
const extractor = getExtractor('http://foo.com', null, $);

assert.equal(extractor.domain, 'medium.com');
});
});
6 changes: 3 additions & 3 deletions src/mercury.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ const Mercury = {
return Errors.badUrl;
}

const Extractor = getExtractor(url, parsedUrl);
// console.log(`Using extractor for ${Extractor.domain}`);

const $ = await Resource.create(url, html, parsedUrl);

const Extractor = getExtractor(url, parsedUrl, $);
// console.log(`Using extractor for ${Extractor.domain}`);

// If we found an error creating the resource, return that error
if ($.failed) {
return $;
Expand Down