Skip to content

Commit

Permalink
feat: Add extract method (#2750)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored Dec 13, 2022
1 parent 91ab060 commit dec7cdc
Show file tree
Hide file tree
Showing 10 changed files with 388 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"plugins": ["jsdoc"],
"plugins": ["jsdoc", "expect-type"],
"extends": [
"eslint:recommended",
"plugin:jsdoc/recommended",
"plugin:n/recommended",
"plugin:unicorn/recommended",
"plugin:expect-type/recommended",
"prettier"
],
"env": { "node": true },
Expand Down
109 changes: 109 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"benchmark": "^2.1.4",
"eslint": "^8.29.0",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-expect-type": "^0.2.1",
"eslint-plugin-jest": "^27.1.6",
"eslint-plugin-jsdoc": "^39.6.4",
"eslint-plugin-n": "^15.6.0",
Expand Down
120 changes: 120 additions & 0 deletions src/api/extract.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import * as fixtures from '../__fixtures__/fixtures.js';
import cheerio from '..';

interface RedSelObject {
red: string | undefined;
sel: string | undefined;
}

interface RedSelMultipleObject {
red: string[];
sel: string[];
}

describe('$.extract', () => {
it('() : should extract values for selectors', () => {
const $ = cheerio.load(fixtures.eleven);
const $root = cheerio.load(fixtures.eleven).root();
// An empty object should lead to an empty extraction.

// $ExpectType ExtractedMap<{}>
const emptyExtract = $root.extract({});
expect(emptyExtract).toStrictEqual({});
// Non-existent values should be undefined.

// $ExpectType ExtractedMap<{ foo: string; }>
const simpleExtract = $root.extract({ foo: 'bar' });
expect(simpleExtract).toStrictEqual({ foo: undefined });

// Existing values should be extracted.
expect<{ red: string | undefined }>(
$root.extract({ red: '.red' })
).toStrictEqual({
red: 'Four',
});
expect<RedSelObject>(
$root.extract({ red: '.red', sel: '.sel' })
).toStrictEqual({
red: 'Four',
sel: 'Three',
});
// Descriptors for extractions should be supported
expect<RedSelObject>(
$root.extract({
red: { selector: '.red' },
sel: { selector: '.sel' },
})
).toStrictEqual({ red: 'Four', sel: 'Three' });
// Should support extraction of multiple values.

// $ExpectType ExtractedMap<{ red: [string]; sel: [string]; }>
const multipleExtract = $root.extract({
red: ['.red'],
sel: ['.sel'],
});
expect<RedSelMultipleObject>(multipleExtract).toStrictEqual({
red: ['Four', 'Five', 'Nine'],
sel: ['Three', 'Nine', 'Eleven'],
});
// Should support custom `prop`s.
expect<RedSelObject>(
$root.extract({
red: { selector: '.red', value: 'outerHTML' },
sel: { selector: '.sel', value: 'tagName' },
})
).toStrictEqual({ red: '<li class="red">Four</li>', sel: 'LI' });
// Should support custom `prop`s for multiple values.
expect<{ red: string[] }>(
$root.extract({
red: [{ selector: '.red', value: 'outerHTML' }],
})
).toStrictEqual({
red: [
'<li class="red">Four</li>',
'<li class="red">Five</li>',
'<li class="red sel">Nine</li>',
],
});
// Should support custom extraction functions.
expect<{ red: string | undefined }>(
$root.extract({
red: {
selector: '.red',
value: (el, key) => `${key}=${$(el).text()}`,
},
})
).toStrictEqual({ red: 'red=Four' });
// Should support custom extraction functions for multiple values.
expect<{ red: string[] }>(
$root.extract({
red: [
{
selector: '.red',
value: (el, key) => `${key}=${$(el).text()}`,
},
],
})
).toStrictEqual({ red: ['red=Four', 'red=Five', 'red=Nine'] });
// Should support extraction objects

// $ExpectType ExtractedMap<{ section: { selector: string; value: { red: string; sel: string; }; }; }>
const subExtractObject = $root.extract({
section: {
selector: 'ul:nth(1)',
value: {
red: '.red',
sel: '.blue',
},
},
});

expect<{ section: RedSelObject | undefined }>(
subExtractObject
).toStrictEqual({
section: {
red: 'Five',
sel: 'Seven',
},
});
});
});
92 changes: 92 additions & 0 deletions src/api/extract.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import type { AnyNode, Element } from 'domhandler';
import type { Cheerio } from '../cheerio.js';
import type { prop } from './attributes.js';

type ExtractDescriptorFn = (
el: Element,
key: string,
// TODO: This could be typed with ExtractedMap
obj: Record<string, unknown>
) => unknown;

interface ExtractDescriptor {
selector: string;
value?: string | ExtractDescriptorFn | ExtractMap;
}

type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor];

export interface ExtractMap {
[key: string]: ExtractValue;
}

type ExtractedValue<V extends ExtractValue, M extends ExtractMap> = V extends [
string | ExtractDescriptor
]
? NonNullable<ExtractedValue<V[0], M>>[]
: V extends string
? string | undefined
: V extends ExtractDescriptor
? V['value'] extends ExtractMap
? ExtractedMap<V['value']> | undefined
: V['value'] extends ExtractDescriptorFn
? ReturnType<V['value']> | undefined
: ReturnType<typeof prop> | undefined
: never;

export type ExtractedMap<M extends ExtractMap> = {
[key in keyof M]: ExtractedValue<M[key], M>;
};

function getExtractDescr(
descr: string | ExtractDescriptor
): Required<ExtractDescriptor> {
if (typeof descr === 'string') {
return { selector: descr, value: 'textContent' };
}

return {
selector: descr.selector,
value: descr.value ?? 'textContent',
};
}

/**
* Extract multiple values from a document, and store them in an object.
*
* @param map - An object containing key-value pairs. The keys are the names of
* the properties to be created on the object, and the values are the
* selectors to be used to extract the values.
* @returns An object containing the extracted values.
*/
export function extract<M extends ExtractMap, T extends AnyNode>(
this: Cheerio<T>,
map: M
): ExtractedMap<M> {
const ret: Record<string, unknown> = {};

for (const key in map) {
const descr = map[key];
const isArray = Array.isArray(descr);

const { selector, value } = getExtractDescr(isArray ? descr[0] : descr);

const fn: ExtractDescriptorFn =
typeof value === 'function'
? value
: typeof value === 'string'
? (el: Element) => this._make(el).prop(value)
: (el: Element) => this._make(el).extract(value);

if (isArray) {
ret[key] = this._findBySelector(selector, Number.POSITIVE_INFINITY)
.map((_, el) => fn(el, key, ret))
.get();
} else {
const $ = this._findBySelector(selector, 1);
ret[key] = $.length > 0 ? fn($[0], key, ret) : undefined;
}
}

return ret as ExtractedMap<M>;
}
Loading

0 comments on commit dec7cdc

Please sign in to comment.