From 0c5834ed635833ef49fe10cc888025a5debebe21 Mon Sep 17 00:00:00 2001 From: Luke Karrys Date: Tue, 1 Nov 2022 09:48:41 -0700 Subject: [PATCH] fix: use hosted-git-info to parse registry urls (#5758) Previously this was using `new URL` which would fail on some urls that `hosted-git-info` is able to parse. But if we still get a url that can't be parsed, we now set it to be removed from the tree instead of erroring. Fixes: #5278 --- DEPENDENCIES.md | 2 + node_modules/hosted-git-info/lib/index.js | 11 ++- node_modules/hosted-git-info/lib/parse-url.js | 5 +- node_modules/hosted-git-info/lib/protocols.js | 9 --- node_modules/hosted-git-info/package.json | 2 +- package-lock.json | 9 ++- package.json | 2 +- workspaces/arborist/lib/arborist/reify.js | 26 ++++-- workspaces/arborist/package.json | 1 + workspaces/arborist/test/arborist/reify.js | 79 ++++++++++++++++++- 10 files changed, 119 insertions(+), 27 deletions(-) delete mode 100644 node_modules/hosted-git-info/lib/protocols.js diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index 861a2406fb250..97808f4a458be 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -157,6 +157,7 @@ graph LR; npm-registry-fetch-->proc-log; npmcli-arborist-->bin-links; npmcli-arborist-->cacache; + npmcli-arborist-->hosted-git-info; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->minify-registry-metadata; npmcli-arborist-->nopt; @@ -790,6 +791,7 @@ graph LR; npmcli-arborist-->cacache; npmcli-arborist-->chalk; npmcli-arborist-->common-ancestor-path; + npmcli-arborist-->hosted-git-info; npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"]; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->json-stringify-nice; diff --git a/node_modules/hosted-git-info/lib/index.js b/node_modules/hosted-git-info/lib/index.js index 65d3d5f37cb53..a7339c217e9a3 100644 --- a/node_modules/hosted-git-info/lib/index.js +++ b/node_modules/hosted-git-info/lib/index.js @@ -4,7 +4,6 @@ const LRU = require('lru-cache') const hosts = require('./hosts.js') const fromUrl = require('./from-url.js') const parseUrl = require('./parse-url.js') -const getProtocols = require('./protocols.js') const cache = new LRU({ max: 1000 }) @@ -22,7 +21,15 @@ class GitHost { } static #gitHosts = { byShortcut: {}, byDomain: {} } - static #protocols = getProtocols() + static #protocols = { + 'git+ssh:': { name: 'sshurl' }, + 'ssh:': { name: 'sshurl' }, + 'git+https:': { name: 'https', auth: true }, + 'git:': { auth: true }, + 'http:': { auth: true }, + 'https:': { auth: true }, + 'git+http:': { auth: true }, + } static addHost (name, host) { GitHost.#gitHosts[name] = host diff --git a/node_modules/hosted-git-info/lib/parse-url.js b/node_modules/hosted-git-info/lib/parse-url.js index 5f5ac4d37f383..7d5489c008ab4 100644 --- a/node_modules/hosted-git-info/lib/parse-url.js +++ b/node_modules/hosted-git-info/lib/parse-url.js @@ -1,5 +1,4 @@ const url = require('url') -const getProtocols = require('./protocols.js') const lastIndexOfBefore = (str, char, beforeChar) => { const startPosition = str.indexOf(beforeChar) @@ -73,7 +72,7 @@ const correctUrl = (giturl) => { return giturl } -module.exports = (giturl, protocols = getProtocols()) => { - const withProtocol = correctProtocol(giturl, protocols) +module.exports = (giturl, protocols) => { + const withProtocol = protocols ? correctProtocol(giturl, protocols) : giturl return safeUrl(withProtocol) || safeUrl(correctUrl(withProtocol)) } diff --git a/node_modules/hosted-git-info/lib/protocols.js b/node_modules/hosted-git-info/lib/protocols.js deleted file mode 100644 index 129988459a2b8..0000000000000 --- a/node_modules/hosted-git-info/lib/protocols.js +++ /dev/null @@ -1,9 +0,0 @@ -module.exports = () => ({ - 'git+ssh:': { name: 'sshurl' }, - 'ssh:': { name: 'sshurl' }, - 'git+https:': { name: 'https', auth: true }, - 'git:': { auth: true }, - 'http:': { auth: true }, - 'https:': { auth: true }, - 'git+http:': { auth: true }, -}) diff --git a/node_modules/hosted-git-info/package.json b/node_modules/hosted-git-info/package.json index 25c757b8c405b..612259948afe7 100644 --- a/node_modules/hosted-git-info/package.json +++ b/node_modules/hosted-git-info/package.json @@ -1,6 +1,6 @@ { "name": "hosted-git-info", - "version": "6.1.0", + "version": "6.1.1", "description": "Provides metadata and conversions from repository urls for GitHub, Bitbucket and GitLab", "main": "./lib/index.js", "repository": { diff --git a/package-lock.json b/package-lock.json index 01930b4f96eda..0f44f87fbd72a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -104,7 +104,7 @@ "fs-minipass": "^2.1.0", "glob": "^8.0.1", "graceful-fs": "^4.2.10", - "hosted-git-info": "^6.1.0", + "hosted-git-info": "^6.1.1", "ini": "^3.0.1", "init-package-json": "^4.0.1", "is-cidr": "^4.0.2", @@ -6018,9 +6018,9 @@ } }, "node_modules/hosted-git-info": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-6.1.0.tgz", - "integrity": "sha512-HGLEbnDnxaXOoVjyE4gR+zEzQ/jvdPBVbVvDiRedZsn7pKx45gic0G1HGZBZ94RyJz0e6pBMeInIh349TAvHCQ==", + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-6.1.1.tgz", + "integrity": "sha512-r0EI+HBMcXadMrugk0GCQ+6BQV39PiWAZVfq7oIckeGiN7sjRGyQxPdft3nQekFTCQbYxLBH+/axZMeH8UX6+w==", "inBundle": true, "dependencies": { "lru-cache": "^7.5.1" @@ -14050,6 +14050,7 @@ "bin-links": "^4.0.1", "cacache": "^17.0.1", "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^6.1.1", "json-parse-even-better-errors": "^3.0.0", "json-stringify-nice": "^1.1.4", "minimatch": "^5.1.0", diff --git a/package.json b/package.json index ad8a0ee507a47..c86f062aa7faf 100644 --- a/package.json +++ b/package.json @@ -73,7 +73,7 @@ "fs-minipass": "^2.1.0", "glob": "^8.0.1", "graceful-fs": "^4.2.10", - "hosted-git-info": "^6.1.0", + "hosted-git-info": "^6.1.1", "ini": "^3.0.1", "init-package-json": "^4.0.1", "is-cidr": "^4.0.2", diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 03b5c378d5052..08b38d002b6a9 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -9,6 +9,7 @@ const semver = require('semver') const debug = require('../debug.js') const walkUp = require('walk-up-path') const log = require('proc-log') +const hgi = require('hosted-git-info') const { dirname, resolve, relative } = require('path') const { depth: dfwalk } = require('treeverse') @@ -640,10 +641,15 @@ module.exports = cls => class Reifier extends cls { // and no 'bundled: true' setting. // Do the best with what we have, or else remove it from the tree // entirely, since we can't possibly reify it. - const res = node.resolved ? `${node.name}@${this[_registryResolved](node.resolved)}` - : node.packageName && node.version - ? `${node.packageName}@${node.version}` - : null + let res = null + if (node.resolved) { + const registryResolved = this[_registryResolved](node.resolved) + if (registryResolved) { + res = `${node.name}@${registryResolved}` + } + } else if (node.packageName && node.version) { + res = `${node.packageName}@${node.version}` + } // no idea what this thing is. remove it from the tree. if (!res) { @@ -721,12 +727,20 @@ module.exports = cls => class Reifier extends cls { // ${REGISTRY} or something. This has to be threaded through the // Shrinkwrap and Node classes carefully, so for now, just treat // the default reg as the magical animal that it has been. - const resolvedURL = new URL(resolved) + const resolvedURL = hgi.parseUrl(resolved) + + if (!resolvedURL) { + // if we could not parse the url at all then returning nothing + // here means it will get removed from the tree in the next step + return + } + if ((this.options.replaceRegistryHost === resolvedURL.hostname) || this.options.replaceRegistryHost === 'always') { // this.registry always has a trailing slash - resolved = `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}` + return `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}` } + return resolved } diff --git a/workspaces/arborist/package.json b/workspaces/arborist/package.json index eb468e253b3c1..eb5b60a1e83be 100644 --- a/workspaces/arborist/package.json +++ b/workspaces/arborist/package.json @@ -16,6 +16,7 @@ "bin-links": "^4.0.1", "cacache": "^17.0.1", "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^6.1.1", "json-parse-even-better-errors": "^3.0.0", "json-stringify-nice": "^1.1.4", "minimatch": "^5.1.0", diff --git a/workspaces/arborist/test/arborist/reify.js b/workspaces/arborist/test/arborist/reify.js index 0c68bdd4dd748..ea7c3f0c66e52 100644 --- a/workspaces/arborist/test/arborist/reify.js +++ b/workspaces/arborist/test/arborist/reify.js @@ -2936,7 +2936,20 @@ t.test('installLinks', (t) => { }) t.only('should preserve exact ranges, missing actual tree', async (t) => { - const Arborist = require('../../lib/index.js') + const Pacote = require('pacote') + const Arborist = t.mock('../../lib/arborist', { + pacote: { + ...Pacote, + extract: async (...args) => { + if (args[0].startsWith('gitssh')) { + // we just want to test that this url is handled properly + // but its not a real git url we can clone so return early + return true + } + return Pacote.extract(...args) + }, + }, + }) const abbrev = resolve(__dirname, '../fixtures/registry-mocks/content/abbrev/-/abbrev-1.1.1.tgz') const abbrevTGZ = fs.readFileSync(abbrev) @@ -2973,6 +2986,40 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { }, }) + const gitSshPackument = JSON.stringify({ + _id: 'gitssh', + _rev: 'lkjadflkjasdf', + name: 'gitssh', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'gitssh', + version: '1.1.1', + dist: { + // this is a url that `new URL()` cant parse + // https://github.com/npm/cli/issues/5278 + tarball: 'git+ssh://git@github.com:a/b/c.git#lkjadflkjasdf', + }, + }, + }, + }) + + const notAUrlPackument = JSON.stringify({ + _id: 'notaurl', + _rev: 'lkjadflkjasdf', + name: 'notaurl', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'notaurl', + version: '1.1.1', + dist: { + tarball: 'hey been trying to break this test', + }, + }, + }, + }) + t.only('host should not be replaced replaceRegistryHost=never', async (t) => { const testdir = t.testdir({ project: { @@ -2981,6 +3028,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -2994,6 +3043,14 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com', @@ -3011,6 +3068,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -3020,10 +3079,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev') .reply(200, abbrevPackument) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + tnock(t, 'https://registry.github.com') .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com', @@ -3041,6 +3108,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -3050,10 +3119,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev') .reply(200, abbrevPackument2) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + tnock(t, 'https://registry.github.com') .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com',