From 87670c1e9fe0a91aef86af920b23cf6a98cdf5d5 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Mon, 30 Dec 2019 11:02:18 +0100 Subject: [PATCH] feat: remove xss (#252) closes #251 --- README.md | 7 --- packages/metascraper-iframe/test/index.js | 4 +- .../__snapshots__/index.js.snap-shot | 2 +- .../__snapshots__/xss.js.snap-shot | 8 ---- packages/metascraper/package.json | 4 +- packages/metascraper/src/get-data.js | 24 +++++----- packages/metascraper/src/index.js | 9 +--- packages/metascraper/test/unit/interface.js | 30 ------------- packages/metascraper/test/unit/xss.js | 45 ------------------- 9 files changed, 15 insertions(+), 118 deletions(-) delete mode 100644 packages/metascraper/__snapshots__/xss.js.snap-shot delete mode 100644 packages/metascraper/test/unit/xss.js diff --git a/README.md b/README.md index e1d8a19da..0b176e347 100644 --- a/README.md +++ b/README.md @@ -249,13 +249,6 @@ Type: `String` The HTML markup for extracting the content. -##### escape - -Type: `Boolean`
-Default: `true` - -It sanetizes the ouptut to prevent xss attacks. - #### rules Type: `Array` diff --git a/packages/metascraper-iframe/test/index.js b/packages/metascraper-iframe/test/index.js index e299614d5..0c9d71648 100644 --- a/packages/metascraper-iframe/test/index.js +++ b/packages/metascraper-iframe/test/index.js @@ -44,7 +44,7 @@ describe('metascraper-iframe', () => { commonProviders.forEach(url => { it(url, async () => { const metascraper = createMetascraper([createMetascraperIframe()]) - const meta = await metascraper({ url, escape: false }) + const meta = await metascraper({ url }) should(meta.iframe).be.not.null() }) }) @@ -54,7 +54,7 @@ describe('metascraper-iframe', () => { const url = 'https://view.genial.ly/5dc53cfa759d2a0f4c7db5f4' const rules = [createMetascraperIframe()] const metascraper = createMetascraper(rules) - const meta = await metascraper({ url, html, escape: false }) + const meta = await metascraper({ url, html }) should(meta.iframe).be.not.null() }) }) diff --git a/packages/metascraper-soundcloud/__snapshots__/index.js.snap-shot b/packages/metascraper-soundcloud/__snapshots__/index.js.snap-shot index dc196566e..c7de48ffd 100644 --- a/packages/metascraper-soundcloud/__snapshots__/index.js.snap-shot +++ b/packages/metascraper-soundcloud/__snapshots__/index.js.snap-shot @@ -1,6 +1,6 @@ exports['song 1'] = { "author": "Beauty Brain", - "description": "Thanks for 5.000 likes on https://www.facebook.com/BeautyBrainMusic :D <3 <3 <3", + "description": "Thanks for 5.000 likes on https://www.facebook.com/BeautyBrainMusic :D <3 <3 <3", "date": "2014-01-27T16:19:55.000Z", "image": "https://i1.sndcdn.com/artworks-000069142357-nwttc6-t500x500.jpg", "lang": "en", diff --git a/packages/metascraper/__snapshots__/xss.js.snap-shot b/packages/metascraper/__snapshots__/xss.js.snap-shot deleted file mode 100644 index e82701a49..000000000 --- a/packages/metascraper/__snapshots__/xss.js.snap-shot +++ /dev/null @@ -1,8 +0,0 @@ -exports['escape when the value is not empty 1'] = { - "title": "<script src=‘http://127.0.0.1:8080/malware.js’></script>" -} - -exports['explicitily disable escape 1'] = { - "title": "" -} - diff --git a/packages/metascraper/package.json b/packages/metascraper/package.json index 3ebc39ed0..de3ba6ea2 100644 --- a/packages/metascraper/package.json +++ b/packages/metascraper/package.json @@ -61,9 +61,7 @@ "cheerio": "~1.0.0-rc.3", "cheerio-advanced-selectors": "~2.0.1", "lodash": "~4.17.15", - "map-values-deep": "~1.0.2", - "whoops": "~4.1.0", - "xss": "~1.0.6" + "whoops": "~4.1.0" }, "devDependencies": { "clear-module": "latest", diff --git a/packages/metascraper/src/get-data.js b/packages/metascraper/src/get-data.js index cf4df820c..05d198d4a 100644 --- a/packages/metascraper/src/get-data.js +++ b/packages/metascraper/src/get-data.js @@ -1,9 +1,7 @@ 'use strict' -const { isString, map, fromPairs } = require('lodash') -const mapValuesDeep = require('map-values-deep') +const { map, fromPairs } = require('lodash') const { has } = require('@metascraper/helpers') -const xss = require('xss') const truthyTest = () => true @@ -24,20 +22,18 @@ const getValue = async ({ htmlDom, url, rules, meta, ...props }) => { return value } -const escapeValue = (value, { escape }) => { - if (!has(value)) return null - if (!escape) return value - return mapValuesDeep(value, value => (isString(value) ? xss(value) : value)) -} +const normalizeValue = value => (has(value) ? value : null) -const getData = async ({ rules, htmlDom, url, escape, ...props }) => { +const getData = async ({ rules, htmlDom, url, ...props }) => { const data = await Promise.all( map(rules, async ([propName, innerRules]) => { - const value = escapeValue( - await getValue({ htmlDom, url, rules: innerRules, ...props }), - { escape } - ) - return [propName, value] + const value = await getValue({ + htmlDom, + url, + rules: innerRules, + ...props + }) + return [propName, normalizeValue(value)] }) ) diff --git a/packages/metascraper/src/index.js b/packages/metascraper/src/index.js index 665595cfb..1ef88a60d 100644 --- a/packages/metascraper/src/index.js +++ b/packages/metascraper/src/index.js @@ -11,13 +11,7 @@ const MetascraperError = whoops('MetascraperError') module.exports = rules => { const loadedRules = loadRules(rules) - return async ({ - url, - html, - rules: inlineRules, - escape = true, - ...props - } = {}) => { + return async ({ url, html, rules: inlineRules, ...props } = {}) => { if (!isUrl(url)) { throw new MetascraperError({ message: 'Need to provide a valid URL.', @@ -27,7 +21,6 @@ module.exports = rules => { return getData({ url, - escape, htmlDom: loadHTML(html), rules: mergeRules(inlineRules, loadedRules), ...props diff --git a/packages/metascraper/test/unit/interface.js b/packages/metascraper/test/unit/interface.js index bc66ab146..d0d4d6148 100644 --- a/packages/metascraper/test/unit/interface.js +++ b/packages/metascraper/test/unit/interface.js @@ -30,36 +30,6 @@ it('url is required', async () => { } }) -it('escape is enabled by default', async () => { - const html = ` - - - - - - metascraper - - - - - - - - - - - ` - const metascraper = createMetascraper([titleRules]) - const metadata = await metascraper({ - html, - url: 'http://127.0.0.1:8080' - }) - - should(metadata.title).be.equal( - '<script src=‘http://127.0.0.1:8080/malware.js’></script>' - ) -}) - it('load extra rules', async () => { const url = 'https://microlink.io' diff --git a/packages/metascraper/test/unit/xss.js b/packages/metascraper/test/unit/xss.js deleted file mode 100644 index f852a1801..000000000 --- a/packages/metascraper/test/unit/xss.js +++ /dev/null @@ -1,45 +0,0 @@ -'use strict' - -const metascraper = require('metascraper')([require('metascraper-title')()]) -const snapshot = require('snap-shot') - -const html = ` - - - - - - metascraper - - - - - - - - - - -` - -describe('xss', () => { - it('explicitily disable escape', async () => { - const metadata = await metascraper({ - html: html, - url: 'http://127.0.0.1:8080', - escape: false - }) - - snapshot(metadata) - }) - - it('escape when the value is not empty', async () => { - const metadata = await metascraper({ - html: html, - url: 'http://127.0.0.1:8080', - escape: true - }) - - snapshot(metadata) - }) -})