diff --git a/deps.edn b/deps.edn index 550d82b..81252fd 100644 --- a/deps.edn +++ b/deps.edn @@ -24,6 +24,7 @@ clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} hato/hato {:mvn/version "0.9.0"} + dev.weavejester/medley {:mvn/version "1.7.0"} miikka/clj-base62 {:mvn/version "0.1.1"} com.github.pmonks/clj-spdx {:mvn/version "1.0.91"} com.github.pmonks/rencg {:mvn/version "1.0.34"}} diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 148e154..b91600f 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -53,8 +53,8 @@ (when ga [(symbol (first (s/split (str ga) #"\$"))) info])) -(defmulti dep->expressions - "Attempt to detect the SPDX license expression(s) (a set) in a tools.deps +(defmulti dep->expressions-info + "Attempt to detect the SPDX license expression(s) (a map) in a tools.deps style dep (a MapEntry or two-element sequence of `[groupId/artifactId dep-info]`). @@ -63,7 +63,7 @@ {:arglists '([[ga info]])} (fn [[_ info]] (:deps/manifest info))) -(defmethod dep->expressions :mvn +(defmethod dep->expressions-info :mvn [dep] (when dep (let [[ga info] (normalise-dep dep) @@ -73,38 +73,48 @@ ; override (let [pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) expressions ;(check-fallbacks ga - (if-let [expressions (lcmvn/pom->expressions pom-uri)] + (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] expressions - (apply lcimd/union (mapcat lcf/zip->expressions (:paths info))))];)] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too + (into {} (pmap #(lcimd/prepend-source (lcf/zip->expressions-info %) dep) (:paths info))))];)] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too expressions))));) -(defmethod dep->expressions :deps +(defmethod dep->expressions-info :deps [dep] (when dep (let [[ga info] (normalise-dep dep) version (:git/sha info)] ; (if-let [override (check-overrides ga version)] ; override -; (check-fallbacks ga - (lcf/dir->expressions (:deps/root info)))));)) +; (check-fallbacks ga + (lcf/dir->expressions-info (:deps/root info)))));)) -(defmethod dep->expressions nil +(defmethod dep->expressions-info nil [_]) -(defmethod dep->expressions :default +(defmethod dep->expressions-info :default [dep] (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) {:dep dep}))) +(defn dep->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a tools.deps + style dep (a MapEntry or two-element sequence of + `[groupId/artifactId dep-info]`). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + [dep] + (some-> (dep->expressions-info dep) + keys + set)) + (defn deps-expressions "Attempt to detect the SPDX license expression(s) in a tools.deps 'lib map', returning a new lib map with the licenses assoc'ed in (in key - `:lice-comb/license-expressions`)" + `:lice-comb/license-info`)" [deps] (when deps - (into {} -;####TODO: CHECK WHETHER METADATA MAPS NEED TO BE MERGED!!!! - (pmap #(let [[k v] %] [k (assoc v :lice-comb/license-expressions (dep->expressions [k v]))]) deps)))) + (into {} (pmap #(let [[k v] %] [k (assoc v :lice-comb/license-info (dep->expressions-info [k v]))]) deps)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index b3360a0..c8c4fa3 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -40,7 +40,7 @@ (throw (java.nio.file.NotDirectoryException. (str dir)))) (throw (java.io.FileNotFoundException. (str dir))))))) -(defn probable-license-file? +(defn- probable-license-file? "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." [f] @@ -50,35 +50,52 @@ (or (contains? probable-license-filenames fname) (s/ends-with? fname ".pom")))))) -(defn probable-license-files +(defn- probable-license-files "Returns all probable license files in the given directory, recursively, as a set of java.io.File objects. dir may be a String or a java.io.File, either of which must refer to a readable directory." [dir] (when-let [dir (ensure-readable-dir dir)] - (lcu/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq dir))))) + (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq dir))) + set))) -(defn file->expressions - "Attempts to determine the SPDX license expression(s) (a set) from the given +(defn file->expressions-info + "Attempts to determine the SPDX license expression(s) (a map) from the given file (an InputStream or something that can have an io/input-stream opened on it). If an InputStream is provided, it must already be open and the associated - filename should also be provided as the second parameter (it is optional in + filepath should also be provided as the second parameter (it is optional in other cases). The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([f] (file->expressions f (lcu/filename f))) - ([f fname] - (when (and f fname) - (let [lfname (s/lower-case fname)] - (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions f fname) - (s/ends-with? lfname ".pom") (lcmvn/pom->expressions f fname) + ([f] (file->expressions-info f (lcu/filepath f))) + ([f filepath] + (when (and f (not (s/blank? filepath))) + (let [fname (lcu/filename filepath) + lfname (s/lower-case fname)] + (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) + (s/ends-with? lfname ".pom") (lcmvn/pom->expressions-info f fname) (instance? java.io.InputStream f) (lcmtch/text->ids f) :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids is)))) ; Default is to assume it's a plain text file containing license text(s) - fname))))) + filepath))))) -(defn zip->expressions - "Attempt to detect the SPDX license expression(s) in a ZIP file. zip may be a +(defn file->expressions + "Attempts to determine the SPDX license expression(s) (a set) from the given + file (an InputStream or something that can have an io/input-stream opened on + it). If an InputStream is provided, it must already be open and the associated + filepath should also be provided as the second parameter (it is optional in + other cases). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([f] (file->expressions f (lcu/filepath f))) + ([f filepath] + (some-> (file->expressions-info f filepath) + keys + set))) + +(defn zip->expressions-info + "Attempt to detect the SPDX license expression(s) (a map) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file. @@ -91,14 +108,28 @@ (let [zip-file (io/file zip)] (java.util.zip.ZipFile. zip-file) ; This no-op forces validation of the zip file - ZipInputStream does not reliably perform validation (with-open [zip-is (java.util.zip.ZipInputStream. (io/input-stream zip-file))] - (loop [result #{} + (loop [result {} entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) - (recur (lcimd/union result (lcimd/prepend-source (file->expressions zip-is (lcu/filename entry)) (lcu/filename zip-file))) + (recur (merge result (lcimd/prepend-source (file->expressions-info zip-is (lcu/filename entry)) (lcu/filepath zip-file))) (.getNextEntry zip-is)) (recur result (.getNextEntry zip-is))) - (doall (some-> (seq result) set)))))))) ; De-lazy the result before we exit the with-open scope + (when-not (empty? result) result))))))) + +(defn zip->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a ZIP file. zip may be a + String or a java.io.File, both of which must refer to a ZIP-format compressed + file. + + Throws on invalid zip file (doesn't exist, not readable, not ZIP format, etc.). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + [zip] + (some-> (zip->expressions-info zip) + keys + set)) (defn- zip-compressed-files "Returns all probable ZIP compressed files in the given directory, @@ -106,12 +137,13 @@ java.io.File, either of which must refer to a readable directory." [dir] (when-let [dir (ensure-readable-dir dir)] - (lcu/nset (filter #(and (.isFile ^java.io.File %) - (or (s/ends-with? (str %) ".zip") - (s/ends-with? (str %) ".jar"))) - (file-seq dir))))) + (some-> (seq (filter #(and (.isFile ^java.io.File %) + (or (s/ends-with? (str %) ".zip") + (s/ends-with? (str %) ".jar"))) + (file-seq dir))) + set))) -(defn dir->expressions +(defn dir->expressions-info "Attempt to detect the SPDX license expression(s) (a set) in a directory. dir may be a String or a java.io.File, both of which must refer to a readable directory. @@ -122,15 +154,32 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([dir] (dir->expressions dir nil)) + ([dir] (dir->expressions-info dir nil)) ([dir {:keys [include-zips?] :or {include-zips? false}}] (when dir - (let [file-expressions (apply lcimd/union (map file->expressions (probable-license-files dir)))] + (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] (if include-zips? - (let [zip-expressions (apply lcimd/union (map #(try (zip->expressions %) (catch Exception _ nil)) (zip-compressed-files dir)))] - (lcimd/union file-expressions zip-expressions)) + (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] + (merge file-expressions zip-expressions)) file-expressions))))) +(defn dir->expressions + "Attempt to detect the SPDX license expression(s) (a map) in a directory. dir + may be a String or a java.io.File, both of which must refer to a + readable directory. + + The optional `opts` map has these keys: + * `include-zips?` (boolean, default false) - controls whether zip compressed + files found in the directory are included in the scan or not + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([dir] (dir->expressions dir nil)) + ([dir opts] + (some-> (dir->expressions-info dir opts) + keys + set))) + (defn init! "Initialises this namespace upon first call (and does nothing on subsequent calls), returning nil. Consumers of this namespace are not required to call diff --git a/src/lice_comb/impl/http.clj b/src/lice_comb/impl/http.clj index 7b7eabd..9d27bfc 100644 --- a/src/lice_comb/impl/http.clj +++ b/src/lice_comb/impl/http.clj @@ -33,14 +33,15 @@ Note: does not throw - returns false on errors." [uri] - (when (lcu/valid-http-uri? (str uri)) - (try - (when-let [response (hc/head (str uri) - {:http-client @http-client-d - :header {"user agent" "com.github.pmonks/lice-comb"}})] - (= 200 (:status response))) - (catch Exception _ - false)))) + (boolean + (when (lcu/valid-http-uri? (str uri)) + (try + (when-let [response (hc/head (str uri) + {:http-client @http-client-d + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (= 200 (:status response))) + (catch Exception _ + false))))) (defn- cdn-uri "Converts raw URIs into CDN URIs, for these 'known' hosts: diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index 4f4b7d9..21209e4 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -55,8 +55,14 @@ "LGPL-2.1" "LGPL-3.0"}) +(defn- dis + "Remove the given key(s) from the associative collection (set or map)." + [associative & ks] + (cond (set? associative) (apply disj associative ks) + (map? associative) (apply dissoc associative ks))) + (defn- fix-gpl-only-or-later - "If the set of ids includes both an 'only' and an 'or-later' variant of the + "If the keys of ids includes both an 'only' and an 'or-later' variant of the same underlying GNU family identifier, remove the 'only' variant." [ids] (loop [result ids @@ -65,47 +71,42 @@ (if f (recur (if (and (contains? result (str f "-only")) (contains? result (str f "-or-later"))) - (disj result (str f "-only")) + (dis result (str f "-only")) result) (first r) (rest r)) result))) (defn- fix-public-domain-cc0 - "If the set of ids includes both CC0-1.0 and lice-comb's public domain + "If the keys of ids includes both CC0-1.0 and lice-comb's public domain LicenseRef, remove the LicenseRef as it's redundant." [ids] (if (and (contains? ids (lcis/public-domain)) (contains? ids "CC0-1.0")) - (disj ids (lcis/public-domain)) + (dis ids (lcis/public-domain)) ids)) (defn- fix-mpl-2 - "If the set of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception, + "If the keys of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception, remove the MPL-2.0-no-copyleft-exception as it's redundant." [ids] (if (and (contains? ids "MPL-2.0") (contains? ids "MPL-2.0-no-copyleft-exception")) - (disj ids "MPL-2.0-no-copyleft-exception") + (dis ids "MPL-2.0-no-copyleft-exception") ids)) (defn manual-fixes - "Manually fix certain invalid combinations of license identifiers in a set." + "Manually fix certain invalid combinations of license identifiers in a set or + map." [ids] - (when ids - (let [m (meta ids) - result (some-> ids - direct-replacements - fix-gpl-only-or-later - fix-public-domain-cc0 - fix-mpl-2 - set) - removed-ids (apply disj (set (keys m)) result) - m (apply dissoc m removed-ids)] - (with-meta result m)))) + (some-> ids + direct-replacements + fix-gpl-only-or-later + fix-public-domain-cc0 + fix-mpl-2)) (defmulti text->ids - "Attempts to determine the SPDX license and/or exception identifier(s) (a set) + "Attempts to determine the SPDX license and/or exception identifier(s) (a map) within the given license text (a String, Reader, InputStream, or something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). The result has metadata attached that describes how the identifiers were @@ -126,9 +127,9 @@ ; These clj-spdx APIs are *expensive*, so we paralellise them (let [f-lic (future (sm/licenses-within-text s @lcis/license-ids-d)) f-exc (future (sm/exceptions-within-text s @lcis/exception-ids-d)) - ids (manual-fixes (set/union @f-lic @f-exc))] + ids (set/union @f-lic @f-exc)] (when ids - (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-text-matching}]) ids)))))) + (manual-fixes (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-text-matching})) ids)))))) (defmethod text->ids java.io.Reader [r] @@ -147,7 +148,7 @@ (text->ids r)))) (defn uri->ids - "Returns the SPDX license and/or exception identifiers (a set) for the given + "Returns the SPDX license and/or exception identifiers (a map) for the given uri, or nil if there aren't any. It does this via two steps: 1. Seeing if the given URI is in the license or exception list, and returning the ids of the associated licenses and/or exceptions if so @@ -171,19 +172,18 @@ (let [suri (lcu/simplify-uri uri)] ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) (if-let [ids (get @lcis/index-uri-to-id-d suri)] - (let [metadata (into {} (map #(vec [% {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)}]) ids))] - (with-meta ids metadata)) + (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids)) ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it (when-let [license-text (lcihttp/get-text uri)] (when-let [ids (text->ids license-text)] - (lcimd/prepend-source ids (str uri " (retrieved text)"))))))))) + (lcimd/prepend-source ids (str "Text retrieved from " uri))))))))) (defn- string->ids-info - "Converts the given String into a sequence of singleton maps, each of which - has a key is that is an SPDX identifier (either a listed SPDX license or - exception id), and whose value is meta-information about how that identifier - was found. The result sequence is ordered in the same order of appearance as - the source values in s. + "Converts the given String into a sequence of singleton maps (NOT A SINGLE + MAP!), each of which has a key is that is an SPDX identifier (either a listed + SPDX license or exception id), and whose value is a list of meta-information + about how that identifier was found. The result sequence is ordered in the + same order of appearance as the source values in s. If no listed SPDX license or exception identifiers are found, returns a singleton sequence containing a map with a lice-comb specific 'unlisted' @@ -202,19 +202,20 @@ (let [s (s/trim s)] (if-let [id (get @lcis/spdx-ids-d (s/lower-case s))] (if (= id s) - (list {id {:type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)}}) - (list {id {:type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)}})) + (list {id (list {:id id :type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)})}) + (list {id (list {:id id :type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)})})) ; 2. Is it an SPDX license or exception name? (if-let [ids (get @lcis/index-name-to-id-d (s/trim (s/lower-case s)))] - (map #(hash-map % {:type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)}) ids) + (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)})) ids) ; 3. Is it a URI? If so, perform URI matching on it (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) (if-let [ids (uri->ids s)] - (let [metadata (meta ids)] - (map #(hash-map % (get metadata %)) ids)) ; Convert metadata from uri->ids back into a regular map (so that it survives expression building) + ids ; 4. Attempt regex name matching - (if-let [ids (lcirm/match-regexes s)] - (map #(hash-map % (get (meta ids) %)) ids) ; Convert metadata from match-regexes back into a regular map (so that it survives expression building) - (list {(lcis/name->unlisted s) {:type :concluded :confidence :low :strategy :unlisted :source (list s)}})))))))) + (if-let [ids (lcirm/matches s)] + ids + ; 5. No clue, so return a single unlisted SPDX LicenseRef + (let [id (lcis/name->unlisted s)] + (list {id (list {:id id :type :concluded :confidence :low :strategy :unlisted :source (list s)})}))))))))) (defn- filter-blanks "Filter blank strings out of coll" @@ -252,7 +253,8 @@ (defn- process-expression-element "Processes a single new expression element e (either a keyword representing - an SPDX operator, or an SPDX identifier) in the context of stack (list) s." + an SPDX operator, or a map representing an SPDX identifier) in the context of + stack (list) s." [s e] (if (keyword? e) ; e is a keyword (SPDX operator): only push a keyword if the prior element was an id, or it's different to the prior keyword @@ -272,41 +274,42 @@ (if (nil? prior) (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s - (se/listed-id? e)) - (push s-minus-2 (s/join " " [prior operator e])) + (se/listed-id? (first (keys e)))) + (let [k (s/join " " [(first (keys prior)) operator (first (keys e))]) + v (distinct (concat (list {:type :concluded :confidence :low :strategy :expression-inference}) + (first (vals prior)) + (first (vals e))))] + (push s-minus-2 {k v})) (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on ; Many keywords? That's invalid (since we dedupe them when they get pushed on, so this means they're different), so drop all of them and push e onto s (push (drop-while keyword? s) e)))) -(defn- build-spdx-expressions - "Builds a set of SPDX expressions from the given list of strings & keywords." +(defn- build-spdx-expressions-map + "Builds a single SPDX expressions map from the given list of keywords and SPDX expession maps." [l] (loop [result '() f (first l) r (rest l)] (if f (recur (process-expression-element result f) (first r) (rest r)) - (some-> (seq (reverse result)) ; Remember to reverse the expressions, since lists-as-stacks grow at the front, not the end - set - manual-fixes)))) + (manual-fixes (into {} result))))) (defn attempt-to-build-expressions - "Attempts to build SPDX expression(s) (a set of strings) from the - given name. The result has metadata attached that describes how the - identifiers were determined." + "Attempts to build SPDX expression(s) (a map) from the given name. + + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." [name] - (when-let [partial-expressions (some->> (split-on-operators name) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - (map #(if (keyword? %) % (string->ids-info %))) - flatten - (filter identity) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - seq)] - (let [spdx-expressions (build-spdx-expressions (map #(if (keyword? %) % (first (keys %))) partial-expressions)) - metadata (into {} (filter (complement keyword?) partial-expressions))] - (with-meta spdx-expressions metadata)))) + (some->> (split-on-operators name) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + (map #(if (keyword? %) % (string->ids-info %))) + flatten + (filter identity) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + seq + build-spdx-expressions-map)) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/metadata.clj b/src/lice_comb/impl/metadata.clj index a627109..6c530eb 100644 --- a/src/lice_comb/impl/metadata.clj +++ b/src/lice_comb/impl/metadata.clj @@ -19,76 +19,30 @@ (ns lice-comb.impl.metadata "Metadata helper functionality. Note: this namespace is not part of the public API of lice-comb and may change without notice." - (:require [clojure.string :as s] - [clojure.set :as set] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s])) (defn prepend-source - "Prepends the given source (a string) onto the list of sources for all of - the entries of the metadata for object o. Returns o with the new metadata." - [o s] - (if (and o (not (s/blank? s))) - (if-let [m (meta o)] - (with-meta o (lcu/mapfonv #(if (map? %) (assoc % :source (conj (seq (:source %)) s)) %) m)) - o) - o)) + "Prepends the given source s (a String) onto all metadata sub-maps in m (a + lice-comb id+metadata-list map)." + [m s] + (if (or (empty? m) (s/blank? s)) + m + (into {} (map #(if (sequential? (val %)) + (let [id (key %) + metadata-list (val %)] + (hash-map id (map (fn [x] (assoc x :source (conj (seq (:source x)) s))) metadata-list))) + %) + m)))) -(defn- merge-conflicting-key - "Merges the metadata values for a single key that exists in both m1 and m2." - [m1 m2 k] -;####TODO: IMPROVE THIS SIMPLISTIC "PICK A WINNER" IMPLEMENTATION!!!!! - (let [m1v (get m1 k) - m2v (get m2 k)] - ; If both values are maps, perhaps lice-comb specific metadata merging - (if (and (map? m1v) (map? m2v)) - (if (= :declared (:type m1v)) - m1v - (if (= :declared (:type m2v)) - m2v - (case [(:confidence m1v) (:confidence m2v)] - ([:high :high] [:high :medium] [:high :low] [:high nil]) m1v - ([:medium :medium] [:medium :low] [:medium nil]) m1v - ([:low :low] [:low nil]) m1v - m2v))) - (throw (ex-info "Attempt to merge non-lice-comb metadata maps" {}))))) - - -(defn merge-metadata - "Merges lice-comb metadata maps." - ([] {}) - ([m] m) - ([m1 m2] - (if (and m1 m2) - (let [keys-in-both (set/intersection (set (keys m1)) (set (keys m2))) - keys-in-m1-only (apply disj (set (keys m1)) keys-in-both) - keys-in-m2-only (apply disj (set (keys m2)) keys-in-both)] - (merge {} - (into {} (map #(vec [% (merge-conflicting-key m1 m2 %)]) keys-in-both)) - (into {} (map #(vec [% (get m1 %)]) keys-in-m1-only)) - (into {} (map #(vec [% (get m2 %)]) keys-in-m2-only)))) - (if m1 - m1 - m2))) - ([m1 m2 & maps] - (loop [result (merge-metadata m1 m2) - f (first maps) - r (rest maps)] - (if f - (recur (merge-metadata result f) (first r) (rest r)) - result)))) - -(defn union - "Equivalent to set/union, but preserves lice-comb metadata from the sets using - merge-metadata." - ([] #{}) - ([s] s) - ([s1 s2] - (with-meta (set/union s1 s2) - (merge-metadata (meta s1) (meta s2)))) - ([s1 s2 & sets] - (let [data (apply set/union (concat [s1 s2] sets)) - metadata (apply merge-metadata (concat [(meta s1) (meta s2)] (filter identity (map meta sets))))] - (with-meta data metadata)))) +(defn merge-maps + "Merges any number of lice-comb maps, by concatenating and de-duping values + for the same key (expression)." + [& maps] + (let [maps (filter identity maps)] + (when-not (empty? maps) + (let [grouped-maps (group-by first (mapcat identity maps))] + (into {} (map #(vec [% (seq (distinct (mapcat second (get grouped-maps %))))]) + (keys grouped-maps))))))) (def ^:private strategies { :spdx-expression "SPDX expression" @@ -97,25 +51,56 @@ :spdx-text-matching "SPDX license text matching" :spdx-listed-name "SPDX listed name (case insensitive match)" :spdx-listed-uri "SPDX listed URI (relaxed matching)" - :regex-name-matching "Regular expression name matching" + :expression-inference "Inferred SPDX expression" + :regex-matching "Regular expression matching" :unlisted "Unlisted"}) +(defn- metadata-keyfn + "sort-by keyfn for lice-comb metadata maps" + [metadata] + (str (case (:id metadata) + nil "0" + "1") + "-" + (case (:type metadata) + :declared "0" + :concluded "1") + "-" + (case (:confidence metadata) + nil "0" + :high "1" + :medium "2" + :low "3") + "-" + (case (:strategy metadata) + :spdx-expression "0" + :spdx-listed-identifier-exact-match "1" + :spdx-listed-identifier-case-insensitive-match "2" + :spdx-text-matching "3" + :spdx-listed-name "4" + :spdx-listed-uri "5" + :expression-inference "6" + :regex-matching "7" + :unlisted "8"))) + (defn- metadata-element->string - "Converts a single element in a lice-comb metadata map (identified by id) - into a human-readable string." + "Converts the metadata list for the given identifier into a human-readable + string." [m id] - (when-let [metadata (get m id)] - (str id ": " - (name (:type metadata)) - (when-let [confidence (:confidence metadata)] - (str "\n Confidence: " (name confidence))) - (when-let [strategy (:strategy metadata)] - (str "\n Strategy: " (get strategies strategy (str "#### MISSING VALUE: " strategy " ####")))) - (when-let [source (seq (:source metadata))] - (str "\n Source: " (s/join " > " source)))))) + (str id ":\n" + (when-let [metadata-list (sort-by metadata-keyfn (seq (get m id)))] + (s/join "\n" (map #(str " " + (when-let [md-id (:id %)] (when (not= id md-id) (str md-id " "))) + (case (:type %) + :declared "Declared" + :concluded "Concluded") + (when-let [confidence (:confidence %)] (str "\n Confidence: " (name confidence))) + (when-let [strategy (:strategy %)] (str "\n Strategy: " (get strategies strategy (name strategy)))) + (when-let [source (seq (:source %))] (str "\n Source:\n > " (s/join "\n > " source)))) + metadata-list))))) (defn metadata->string - "Converts a lice-comb metadata map m into a human-readable string." + "Converts lice-comb map m into a human-readable string." [m] (when m (let [ids (sort (keys m))] diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj index 94f1c3f..7d3fab4 100644 --- a/src/lice_comb/impl/regex_matching.clj +++ b/src/lice_comb/impl/regex_matching.clj @@ -19,10 +19,11 @@ (ns lice-comb.impl.regex-matching "Helper functionality focused on regex matching. Note: this namespace is not part of the public API of lice-comb and may change without notice." - (:require [clojure.string :as s] - [rencg.api :as rencg] - [lice-comb.impl.spdx :as lcis] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [medley.core :as med] + [rencg.api :as rencg] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.utils :as lcu])) (defn- get-rencgs "Get a value for an re-ncg, potentially looking at multiple ncgs in order @@ -50,7 +51,7 @@ (contains? @lcis/exception-ids-d id)) id (throw (ex-info (str "Invalid SPDX id constructed: '" id - "'' - please raise an issue at " + "' - please raise an issue at " "https://github.com/pmonks/lice-comb/issues/new?assignees=pmonks&labels=bug&template=Invalid_id_constructed.md&title=Invalid+SPDX+identifer+constructed:+" id) {:id id})))) @@ -269,7 +270,7 @@ :pad-ver? true :latest-ver "1.0"} {:id "Creative commons family" - :regex #"(?i)(\bCC\sBY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?" + :regex #"(?i)(\bCC[\s-]BY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?" :fn cc-id-constructor :pad-ver? true :latest-ver "4.0"} @@ -341,16 +342,18 @@ :fn (constantly ["Zlib" :medium])} ]))) -(defn- match-regex +(defn- match "If a match occured for the given regex element when tested against string s, - returns a map containing the following keys, or nil if there was no match: - * :id The SPDX identifier of the found license or exception + returns a map containing the following keys: + * :id The SPDX license or exception identifier that was determined * :type The 'type' of match - will always have the value :concluded * :confidence The confidence of the match: either :high, :medium, or :low - * :strategy The matching strategy - will always have the value :regex-name-matching + * :strategy The matching strategy - will always have the value :regex-matching * :source A list of strings containing source information (specifically the portion of the string s that matched this regex element) - *: start The start index of the given match within s" + *: start The start index of the given match within s + + Returns nil if there was no match." [s elem] (when-let [match (rencg/re-find-ncg (:regex elem) s)] (let [[id confidence] ((:fn elem) (merge {:name s} elem match)) @@ -358,29 +361,33 @@ {:id id :type :concluded :confidence (if (= source id) :high confidence) - :strategy :regex-name-matching + :strategy :regex-matching :source (list source) :start (:start match)}))) -(defn match-regexes - "Returns a sequence (NOT A SET!) of the SPDX license or exception ids that - were found in the string s, or nil if there were no matches. Results are in - the order in which they appear in the string. The result also has metadata - attached, which is a map whose keys are each of the SPDX license or exception - ids, and whose values are a map containing these keys: +(defn matches + "Returns a sequence (NOT A SET!) of maps where each key is a SPDX license or + exception identifier (a String) that was found in s, and the value is a + sequence containing a single map describing how the identifier was determined. + The map contains these keys: * :type The 'type' of match - will always have the value :concluded * :confidence The confidence of the match: either :high, :medium, or :low - * :strategy The matching strategy - will always have the value :regex-name-matching - * :source A list of strings containing source information (specifically - the portion of the string s that matched this identifier" + * :strategy The matching strategy - will always have the value :regex-matching + * :source A sequence of strings containing source information + (specifically the substring of s that matched this identifier) + + Results are in the order in which they appear in the string, and the function + returns nil if there were no matches." [s] - (when-let [matches (seq (distinct (filter identity (pmap (partial match-regex s) @license-name-matching-d))))] - (let [ids (some->> matches - (sort-by :start) - (map :id) - (distinct)) - metadata (into {} (map #(vec [% (dissoc (first (filter (fn [x] (= % (:id x))) matches)) :start :id)]) ids))] - (with-meta ids metadata)))) + (when-let [matches (seq (filter identity (map (partial match s) @license-name-matching-d)))] + (some->> matches + (med/distinct-by :id) ;####TODO: THINK ABOUT MERGING INSTEAD OF DROPPING + (sort-by :start) + (map #(hash-map (:id %) (list {:id (:id %) ; We duplicate this here in case the result gets merged into an expression + :type (:type %) + :confidence (:confidence %) + :strategy (:strategy %) + :source (:source %)})))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index 1d1d143..d635402 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -124,9 +124,45 @@ (s/replace #"\.[\p{Alnum}]{3,}\z" "")) ; Strip file type extension (if any) luri))))) +(defmulti filepath + "Returns the full path and name of the given file-like thing (String, File, + ZipEntry, URI, URL)." + type) + +(defmethod filepath nil + [_]) + +(defmethod filepath java.io.File + [^java.io.File f] + (.getPath f)) + +(defmethod filepath java.lang.String + [s] + (when s + (let [s (s/trim s)] + (if (valid-http-uri? s) + (filepath (io/as-url s)) + (filepath (io/file s)))))) + +(defmethod filepath java.util.zip.ZipEntry + [^java.util.zip.ZipEntry ze] + (.getName ze)) + +(defmethod filepath java.net.URI + [^java.net.URI uri] + (str uri)) + +(defmethod filepath java.net.URL + [^java.net.URL url] + (str url)) + +(defmethod filepath java.io.InputStream + [_] + (throw (ex-info "Cannot determine filepath of an InputStream - did you forget to provide it separately?" {}))) + (defmulti filename - "Returns just the name component of the given file or path string, excluding - any parents." + "Returns just the name component of the given file-like thing (String, File, + ZipEntry, URI, URL), excluding any parents." type) (defmethod filename nil @@ -138,11 +174,15 @@ (defmethod filename java.lang.String [s] - (filename (io/file s))) + (when s + (let [s (s/trim s)] + (if (valid-http-uri? s) + (filename (io/as-url s)) + (filename (io/file s)))))) (defmethod filename java.util.zip.ZipEntry [^java.util.zip.ZipEntry ze] - (filename (.getName ze))) ; Note that Zip Entry names include the entire path + (filename (.getName ze))) (defmethod filename java.net.URI [^java.net.URI uri] diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index 7ee7601..7d12888 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -56,11 +56,14 @@ (unlisted? id) (lcis/unlisted->name id) :else id))) -(defn text->ids - "Attempts to determine the SPDX license and/or exception identifier(s) (a set) +(defn text->ids-info + "Attempts to determine the SPDX license and/or exception identifier(s) (a map) within the given license text (a String, Reader, InputStream, or something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined. + Notes: * this function implements the SPDX matching guidelines (via clj-spdx). See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ @@ -74,8 +77,29 @@ [text] (lcim/text->ids text)) -(defn uri->ids - "Returns the SPDX license and/or exception identifiers (a set) for the given +(defn text->ids + "Attempts to determine the SPDX license and/or exception identifier(s) (a set + of Strings) within the given license text (a String, Reader, InputStream, or + something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, + etc.). + + Notes: + * this function implements the SPDX matching guidelines (via clj-spdx). + See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ + * the caller is expected to open & close a Reader or InputStream passed to + this function (e.g. using clojure.core/with-open) + * you cannot pass a String representation of a filename to this method - you + should pass filenames through clojure.java.io/file first + + The result has metadata attached that describes how the identifiers were + determined." + [text] + (some-> (text->ids-info text) + keys + set)) + +(defn uri->ids-info + "Returns the SPDX license and/or exception identifiers (a map) for the given uri, or nil if there aren't any. It does this via two steps: 1. Seeing if the given URI is in the license or exception list, and returning the ids of the associated licenses and/or exceptions if so @@ -91,39 +115,72 @@ 2. URIs in the SPDX license and exception lists are not unique - the same URI may represent multiple licenses and/or exceptions. - The result has metadata attached that describes how the identifiers were - determined." + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." [uri] (lcim/uri->ids uri)) -(defn name->expressions - "Attempts to determine the SPDX license expression(s) (a set of Strings) - from the given 'license name' (a String), or nil if there aren't any. - This involves: +(defn uri->ids + "Returns the SPDX license and/or exception identifiers (a set of Strings) for + the given uri, or nil if there aren't any. It does this via two steps: + 1. Seeing if the given URI is in the license or exception list, and returning + the ids of the associated licenses and/or exceptions if so + 2. Attempting to retrieve the plain text content of the given URI and + performing full SPDX license matching on the result if there was one + + Notes on step 1: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + See lice-comb.impl.utils/simplify-uri for exact details. + 2. URIs in the SPDX license and exception lists are not unique - the same URI + may represent multiple licenses and/or exceptions." + [uri] + (some-> (uri->ids-info uri) + keys + set)) + +(defn name->expressions-info + "Attempts to determine the SPDX license expression(s) (a map) from the given + 'license name' (a String), or nil if there aren't any. This involves: 1. Determining whether the name is a valid SPDX license expression, and if so normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it - 2. attempting to construct one or more SPDX license expressions from the + 2. Checking if the name is actually a URI, and if so performing URL matching + on it (as per url->ids-info) + 3. attempting to construct one or more SPDX license expressions from the name - The result has metadata attached that describes how the identifiers were - determined." + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." [name] (when-not (s/blank? name) (let [name (s/trim name)] ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set - (if-let [parsed-expression (sexp/parse name)] - (let [ids (sexp/extract-ids parsed-expression) - normalised-expression (sexp/unparse parsed-expression) - metadata (into {} (map #(vec [% {:type :declared :strategy :spdx-expression :source (list normalised-expression)}]) ids))] - (with-meta #{normalised-expression} metadata)) + (if-let [normalised-expression (sexp/normalise name)] + {normalised-expression (list {:type :declared :strategy :spdx-expression :source (list name)})} ; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI) (if (lcu/valid-http-uri? name) - (if-let [ids (uri->ids name)] + (if-let [ids (uri->ids-info name)] ids - (with-meta #{(lcis/name->unlisted name)} {(lcis/name->unlisted name) {:type :concluded :confidence :low :strategy :unlisted :source (list name)}})) + {(lcis/name->unlisted name) (list {:type :concluded :confidence :low :strategy :unlisted :source (list name)})}) ; It was a URL, but we weren't able to resolve it to any ids, so return it as unlisted ; 3. Attempt to build SPDX expression(s) from the name (lcim/attempt-to-build-expressions name)))))) +(defn name->expressions + "Attempts to determine the SPDX license expression(s) (a set of Strings) from + the given 'license name' (a String), or nil if there aren't any. This involves: + 1. Determining whether the name is a valid SPDX license expression, and if so + normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it + 2. Checking if the name is actually a URI, and if so performing URL matching + on it (as per url->ids) + 3. attempting to construct one or more SPDX license expressions from the + name" + [name] + (some-> (name->expressions-info name) + keys + set)) + (defn init! "Initialises this namespace upon first call (and does nothing on subsequent calls), returning nil. Consumers of this namespace are not required to call diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index c45fc4c..d452023 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -25,7 +25,6 @@ [clojure.java.shell :as sh] [clojure.tools.logging :as log] [xml-in.core :as xi] - [spdx.expressions :as sexp] [lice-comb.matching :as lcmtch] [lice-comb.impl.matching :as lcim] [lice-comb.impl.metadata :as lcimd] @@ -48,7 +47,6 @@ ; TODO: make this configurable (def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) -;####TODO: MOVE THIS TO AN IMPL NS?? (defn pom-uri-for-gav "Attempts to locate the POM for the given GAV, which is a URI that may point to a file in the local Maven repository or a remote Maven repository (e.g. on @@ -66,44 +64,72 @@ (first (filter lcihttp/uri-resolves? (map #(str % "/" gav-path) remote-maven-repos)))))))) (defn- licenses-from-pair - "Attempts to determine the license(s) (a set) from a POM license name/URL pair. - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + "Attempts to determine the license(s) (a map) from a POM license name/URL + pair. Returns nil if no matches were found." [{:keys [name url]}] - (let [name-expressions (when-not (s/blank? name) (lcmtch/name->expressions name)) - name-ids (some-> (seq (mapcat #(sexp/extract-ids (sexp/parse %)) name-expressions)) set) - uri-ids (when-not (s/blank? url) (apply disj (lcmtch/uri->ids url) name-ids))] ; Only include ids detected from the URL that weren't already detected in the name - (lcimd/union name-expressions uri-ids))) + ; 1. Look in the name field(s) + (if-let [name-expressions (lcimd/prepend-source (lcmtch/name->expressions-info name) " tag")] + name-expressions + ; 2. If the names didn't give us any licenses, look in the url field(s) (this tends to be slower and less accurate) + (when-let [uri-ids (lcimd/prepend-source (lcmtch/uri->ids-info url) " tag")] + uri-ids))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") -(defmulti pom->expressions - "Attempt to detect the license expression(s) (a set) reported in a pom.xml +(defn- xml-find-all-alts + "As for xi/find-all, but supports an alternative fallback set of tags (to + help with namespace messes in pom.xml files)." + [xml ks1 ks2] + (if-let [result (seq (xi/find-all xml ks1))] + result + (seq (xi/find-all xml ks2)))) + +(defn- xml-find-first-string + "As for xi/find-first, but assumes the target is a single content tag (and + returns that, or nil if it's blank or the tag doesn't exist." + [xml ks] + (when-let [result (first (xi/find-first xml ks))] + (when-not (s/blank? result) + result))) + +(defn- xml-find-first-string-alts + "As for xml-find-first-string, but supports an alternative fallback set of + tags (to help with namespace messes in pom.xml files)." + [xml ks1 ks2] + (if-let [result (xml-find-first-string xml ks1)] + result + (xml-find-first-string xml ks2))) + +(defmulti pom->expressions-info + "Attempt to detect the license expression(s) (a map) reported in a pom.xml file. pom may be a java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream. Note that if an InputStream is provided: 1. it's the caller's responsibility to open and close it - 2. a filename *must* be provided along with the stream (2nd arg) + 2. a filepath *must* be provided along with the stream (the 2nd arg) The result has metadata attached that describes how the identifiers in the expression(s) were determined." - {:arglists '([pom] [pom file-name])} + {:arglists '([pom] [pom filepath])} (fn [& args] (type (first args)))) -; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags here -(defmethod pom->expressions java.io.InputStream - [pom-is fname] - (let [pom-xml (xml/parse pom-is) - licenses (seq (xi/find-all pom-xml [::pom/project ::pom/licenses ::pom/license])) - licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] - (if (or licenses licenses-no-ns) +; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags +(defmethod pom->expressions-info java.io.InputStream + [pom-is filepath] + (let [pom-xml (xml/parse pom-is)] + (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] ; block exists - process it - (let [name-uri-pairs (lcu/nset (concat (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) - (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url])))) - licenses (map #(lcimd/prepend-source (licenses-from-pair %) fname) name-uri-pairs)] - (lcim/manual-fixes (apply lcimd/union licenses))) + (let [name-uri-pairs (some->> pom-licenses + (filter map?) ; Get rid of non-tag content (whitespace etc.) + (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) + (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) + url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] + (when (or name url) + {:name name :url url})))) + set) + licenses (into {} (map #(lcimd/prepend-source (licenses-from-pair %) filepath) name-uri-pairs))] + (lcim/manual-fixes licenses)) ; License block doesn't exist, so attempt to lookup the parent pom and get it from there (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) @@ -115,16 +141,33 @@ :artifact-id (lcu/strim (first (xi/find-first parent-no-ns [:artifactId]))) :version (lcu/strim (first (xi/find-first parent-no-ns [:version])))}))] (when-not (empty? parent-gav) - (pom->expressions (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + (pom->expressions-info (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep -(defmethod pom->expressions :default - ([pom] (pom->expressions pom (lcu/filename pom))) - ([pom fname] +(defmethod pom->expressions-info :default + ([pom] (pom->expressions-info pom (lcu/filepath pom))) + ([pom filepath] (when pom (with-open [pom-is (io/input-stream pom)] - (if-let [expressions (pom->expressions pom-is fname)] + (if-let [expressions (pom->expressions-info pom-is filepath)] expressions - (log/info (str "'" pom "'") "contains no license information")))))) + (log/info (str "'" filepath "'") "contains no license information")))))) + +(defn pom->expressions + "Attempt to detect the license expression(s) (a set) reported in a pom.xml + file. pom may be a java.io.InputStream, or anything that can be opened by + clojure.java.io/input-stream. + + Note that if an InputStream is provided: + 1. it's the caller's responsibility to open and close it + 2. a filepath *must* be provided along with the stream (the 2nd arg) + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([pom] (pom->expressions pom (lcu/filepath pom))) + ([pom filepath] + (some-> (pom->expressions-info pom filepath) + keys + set))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/test/lice_comb/impl/metadata_test.clj b/test/lice_comb/impl/metadata_test.clj index 8d69bec..cc95a43 100644 --- a/test/lice_comb/impl/metadata_test.clj +++ b/test/lice_comb/impl/metadata_test.clj @@ -19,60 +19,70 @@ (ns lice-comb.impl.metadata-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.impl.metadata :refer [prepend-source union]])) + [lice-comb.impl.metadata :refer [prepend-source merge-maps]])) (use-fixtures :once fixture) (def md1 { - "Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("MIT")} -}) + "Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")})}) (def md2 { - "Apache-2.0" {:type :concluded :confidence :low :strategy :regex-matching :source '("Apache style license")} - "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")} - }) + "Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")})}) + +(def md3 { + "Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})}) + +(def mds (list md1 md2 md3)) (deftest prepend-source-tests (testing "nil/empty/blank" - (is (nil? (prepend-source nil nil))) - (is (nil? (prepend-source nil ""))) - (is (= #{} (prepend-source #{} nil))) - (is (nil? (meta (prepend-source #{} nil)))) - (is (= #{} (prepend-source #{} ""))) - (is (nil? (meta (prepend-source #{} ""))))) + (is (nil? (prepend-source nil nil))) + (is (nil? (prepend-source nil ""))) + (is (= {} (prepend-source {} nil))) + (is (= {} (prepend-source {} "")))) (testing "non-nil metadata that isn't lice-comb specific" - (is (= {} (meta (prepend-source (with-meta #{:a} {}) "foo")))) - (is (= {:foo "foo"} (meta (prepend-source (with-meta #{:a} {:foo "foo"}) "bar")))) + (is (= {:a "a"} (prepend-source {:a "a"} "foo")))) (testing "non-nil metadata that is lice-comb specific" - (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("pom.xml" "Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("pom.xml" "MIT")}} - (meta (prepend-source (with-meta #{:a} md1) "pom.xml")))) - (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("library.jar" "pom.xml" "Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("library.jar" "pom.xml" "MIT")}} - (meta (prepend-source (prepend-source (with-meta #{:a} md1) "pom.xml") "library.jar"))))))) - -(deftest union-tests - (testing "zero arg" - (is (= #{} (union)))) - (testing "one arg" - (is (nil? (union nil))) - (is (= #{} (union #{}))) - (is (= #{:foo} (union #{:foo})))) - (testing "two arg" - (is (= #{:foo :bar} (union #{:foo} #{:bar})))) - (testing "multi-arg" - (is (= #{:a :b :c} (union #{:a} #{:b} #{:c}))) - (is (= #{:a :b :c :d} (union #{:a} #{:b} #{:c} #{:d}))) - (is (= #{:a :b :c :d :e :f :g :h :i :j :k :l :m :n :o} (union #{:a :b} #{:c :d :e} #{:f :g :h :i} #{:j :k :l :m :n :o})))) - (testing "metadata" - (is (= {:foo "foo"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}))))) - (is (= {:foo "foo" :bar "bar"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:bar "bar"}))))) - (is (= {:foo "foo" :bar "bar" :blah "blah"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:bar "bar"}) (with-meta #{:g :h :i} {:blah "blah"}))))) - (is (thrown? clojure.lang.ExceptionInfo (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:foo "bar"}))))) ; Non lice-comb conflicting key in metadata maps = exception - (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("MIT")} - "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")} - } - (meta (union (with-meta #{:a :b :c} md1) (with-meta #{:d :e :f} md2))))))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("pom.xml" "Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "MIT")})} + (prepend-source md1 "pom.xml"))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("library.jar" "pom.xml" "Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("library.jar" "pom.xml" "MIT")})} + (prepend-source (prepend-source md1 "pom.xml") "library.jar"))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "Apache style license")} + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("pom.xml" "apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "Apache-2.0")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "GNU General Public License 3.0 or later")})} + (prepend-source md3 "pom.xml"))))) +(deftest merge-maps-tests + (testing "nil/empty" + (is (nil? (merge-maps))) + (is (nil? (merge-maps nil)))) + (testing "identity" + (is (= md1 (merge-maps md1)))) + (testing "merges" + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")} + {:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")})} + (merge-maps md1 md2))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} ; Note de-duping + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})} + (merge-maps md2 md3))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")} + {:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} ; Note de-duping + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})} + (apply merge-maps mds))))) diff --git a/test/lice_comb/impl/regex_matching_test.clj b/test/lice_comb/impl/regex_matching_test.clj index 44116f9..4bffced 100644 --- a/test/lice_comb/impl/regex_matching_test.clj +++ b/test/lice_comb/impl/regex_matching_test.clj @@ -22,7 +22,7 @@ [rencg.api :as rencg] [lice-comb.impl.utils :as lcu] [lice-comb.test-boilerplate :refer [fixture testing-with-data]] - [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re match-regexes]])) + [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re matches]])) (use-fixtures :once fixture) @@ -234,5 +234,5 @@ (is (every? not-nil? (map (partial test-regex gnu-re) gnu-licenses))))) (deftest match-regexes-tests - (testing-with-data "GNU Family Regexes - correct identifier results" match-regexes gnu-licenses-and-ids) - (testing-with-data "CC Family Regexes - correct identifier results" match-regexes cc-by-licenses-and-ids)) + (testing-with-data "GNU Family Regexes - correct identifier results" #(mapcat keys (matches %)) gnu-licenses-and-ids) + (testing-with-data "CC Family Regexes - correct identifier results" #(mapcat keys (matches %)) cc-by-licenses-and-ids)) diff --git a/test/lice_comb/utils_test.clj b/test/lice_comb/impl/utils_test.clj similarity index 50% rename from test/lice_comb/utils_test.clj rename to test/lice_comb/impl/utils_test.clj index 67511a3..29927c2 100644 --- a/test/lice_comb/utils_test.clj +++ b/test/lice_comb/impl/utils_test.clj @@ -16,10 +16,11 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.utils-test +(ns lice-comb.impl.utils-test (:require [clojure.test :refer [deftest testing is use-fixtures]] + [clojure.java.io :as io] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.impl.utils :refer [simplify-uri]])) + [lice-comb.impl.utils :refer [simplify-uri filepath filename]])) (use-fixtures :once fixture) @@ -58,3 +59,60 @@ (is (= "http://gnu.org/software/classpath/license" (simplify-uri "https://www.gnu.org/software/classpath/license.html"))) (is (= "http://raw.githubusercontent.com/pmonks/lice-comb/main/license" (simplify-uri "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) (is (= "http://github.com/pmonks/lice-comb/blob/main/license" (simplify-uri "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))))) + +(deftest filepath-tests + (testing "Nil, empty or blank values" + (is (nil? (filepath nil))) + (is (= "" (filepath ""))) + (is (= "" (filepath " "))) + (is (= "" (filepath "\n"))) + (is (= "" (filepath "\t")))) + (testing "Files" + (is (= "/file.txt" (filepath (io/file "/file.txt")))) + (is (= "/some/path/or/other/file.txt" (filepath (io/file "/some/path/or/other/file.txt"))))) + (testing "Strings" + (is (= "file.txt" (filepath "file.txt"))) + (is (= "/some/path/or/other/file.txt" (filepath "/some/path/or/other/file.txt"))) + (is (= "https://www.google.com/" (filepath "https://www.google.com/"))) + (is (= "https://www.google.com/" (filepath " https://www.google.com/ "))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath "https://github.com/pmonks/lice-comb/blob/main/deps.edn")))) + (testing "ZipEntries" + (is (= "file.txt" (filepath (java.util.zip.ZipEntry. "file.txt")))) + (is (= "/some/path/or/other/file.txt" (filepath (java.util.zip.ZipEntry. "/some/path/or/other/file.txt"))))) + (testing "URLs" + (is (= "https://www.google.com/" (filepath (io/as-url "https://www.google.com/")))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath (io/as-url "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "URIs" + (is (= "https://www.google.com/" (filepath (java.net.URI. "https://www.google.com/")))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath (java.net.URI. "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "InputStream" + (is (thrown? clojure.lang.ExceptionInfo (filepath (io/input-stream "deps.edn")))))) + +(deftest filename-tests + (testing "Nil, empty or blank values" + (is (nil? (filename nil))) + (is (= "" (filename ""))) + (is (= "" (filename " "))) + (is (= "" (filename "\n"))) + (is (= "" (filename "\t")))) + (testing "Files" + (is (= "file.txt" (filename (io/file "file.txt")))) + (is (= "file.txt" (filename (io/file "/some/path/or/other/file.txt"))))) + (testing "Strings" + (is (= "file.txt" (filename "file.txt"))) + (is (= "file.txt" (filename "/some/path/or/other/file.txt"))) + (is (= "" (filename "https://www.google.com"))) + (is (= "" (filename "https://www.google.com/"))) + (is (= "deps.edn" (filename "https://github.com/pmonks/lice-comb/blob/main/deps.edn")))) + (testing "ZipEntries" + (is (= "file.txt" (filename (java.util.zip.ZipEntry. "file.txt")))) + (is (= "file.txt" (filename (java.util.zip.ZipEntry. "/some/path/or/other/file.txt"))))) + (testing "URLs" + (is (= "" (filename (io/as-url "https://www.google.com/")))) + (is (= "deps.edn" (filename (io/as-url "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "URIs" + (is (= "" (filename (java.net.URI. "https://www.google.com/")))) + (is (= "deps.edn" (filename (java.net.URI. "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "InputStream" + (is (thrown? clojure.lang.ExceptionInfo (filename (io/input-stream "deps.edn")))))) + diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 2da63c3..561fd4b 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -18,9 +18,9 @@ (ns lice-comb.matching-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.test-boilerplate :refer [fixture valid= valid-info=]] [lice-comb.impl.spdx :as lcis] - [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions uri->ids]] + [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions name->expressions-info uri->ids]] [spdx.licenses :as sl] [spdx.exceptions :as se])) @@ -71,7 +71,7 @@ (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary"))) (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Commercial"))) (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All rights reserved")))) - (testing "Expressions that are valid SPDX" + (testing "SPDX expressions" (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) @@ -704,31 +704,56 @@ (is (unlisted-only? (name->expressions "wisdragon"))) (is (unlisted-only? (name->expressions "wiseloong"))))) -;####TEST!!!! -(comment +(deftest name->expressions-info-tests + (testing "Nil, empty or blank" + (is (nil? (name->expressions-info nil))) + (is (nil? (name->expressions-info ""))) + (is (nil? (name->expressions-info " "))) + (is (nil? (name->expressions-info "\n"))) + (is (nil? (name->expressions-info "\t")))) + (testing "SPDX license ids" + (is (valid-info= {"AGPL-3.0-only" {:type :declared :strategy :spdx-expression :source '("AGPL-3.0")}} + (name->expressions-info "AGPL-3.0"))) + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0-with-classpath-exception")}} + (name->expressions-info "GPL-2.0-with-classpath-exception")))) + (testing "SPDX expressions" + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0 WITH Classpath-exception-2.0")}} + (name->expressions-info "GPL-2.0 WITH Classpath-exception-2.0")))) + (testing "Single expressions that are not valid SPDX" + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0 WITH Classpath-exception-2.0")}} + (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception")))) + (testing "Multiple expressions" + (is (valid-info= {"MIT" {:type :declared :strategy :spdx-listed-identifier-exact-match :source '("MIT")} + "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-name-matching :source '("BSD")}} + (name->expressions-info "MIT / BSD")))) + (testing "All names seen in POMs on Clojars as of 2023-07-13" + (is (valid-info= {"BSD-3-Clause" {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source '("https://opensource.org/licenses/BSD-3-Clause")}} + (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) + (is (valid-info= {"EPL-2.0" {:type :concluded :confidence :medium :strategy :regex-name-matching :source '("Eclipse Public License - v 2.0")}} + (name->expressions-info "Eclipse Public License - v 2.0"))))) + (deftest uri->ids-tests (testing "Nil, empty or blank uri" - (is (nil? (uri->ids nil))) - (is (nil? (uri->ids ""))) - (is (nil? (uri->ids " "))) - (is (nil? (uri->ids "\n"))) - (is (nil? (uri->ids "\t")))) + (is (nil? (uri->ids nil))) + (is (nil? (uri->ids ""))) + (is (nil? (uri->ids " "))) + (is (nil? (uri->ids "\n"))) + (is (nil? (uri->ids "\t")))) (testing "URIs that appear verbatim in the SPDX license or exception lists" - (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (uri->ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace - (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (uri->ids "https://www.gnu.org/licenses/agpl.txt"))) - (is (= #{"CC-BY-SA-4.0"} (uri->ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= #{"Classpath-exception-2.0"} (uri->ids "https://www.gnu.org/software/classpath/license.html")))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace + (is (= #{"AGPL-3.0-or-later"} (uri->ids "https://www.gnu.org/licenses/agpl.txt"))) + (is (= #{"CC-BY-SA-4.0"} (uri->ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= #{"Classpath-exception-2.0"} (uri->ids "https://www.gnu.org/software/classpath/license.html")))) (testing "URI variations that should be handled identically" - (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"Apache-2.0"} (uri->ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (uri->ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) (testing "URIs that appear in licensey things, but aren't in the SPDX license list as shown" - (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) (testing "URIs that aren't in the SPDX license list, but do match via retrieval and full text matching" - (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) - (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) - (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) -) \ No newline at end of file + (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index 3afaef7..c4496b5 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -58,26 +58,48 @@ (defn valid= "Returns true if all of the following are true: - * s2 has metadata - * s2 is a set - * s2 is equal to s1 - * every entry in s2 is a valid SPDX license expression + * actual is a set + * actual equals expected + * everything in actual is a valid SPDX license expression Also prints (to stdout) which of the above is not true, in the event that any of them are not true." - [s1 s2] - (let [metadata? (or (nil? s2) (not-nil? (meta s2))) - is-a-set? (or (nil? s2) (set? s2)) - is-equal? (= s1 s2) - all-valid-expressions? (and (set? s2) (every? true? (map sexp/valid? s2))) - result (and metadata? - is-a-set? + [expected actual] + (let [is-a-set? (or (nil? actual) (set? actual)) + is-equal? (= (set expected) actual) + all-valid-expressions? (and is-a-set? (every? true? (map sexp/valid? actual))) + result (and is-a-set? is-equal? all-valid-expressions?)] ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) - (when-not metadata? (print "\n* Missing metadata")) (when-not is-a-set? (print "\n* Not a set")) - (when-not is-equal? (print "\n* Not equal to expected value")) + (when-not is-equal? (print "\n* Not equal to expected")) (when-not all-valid-expressions? (print "\n* Not all valid SPDX expressions")) result)) + +(defn valid-info= + "Returns true if all of the following are true: + * actual is a map + * the keys in actual are identical to expected-keys + * all vals in actual are maps + * every key in actual is a valid SPDX license expression + + Also prints (to stdout) which of the above is not true, in the event that any + of them are not true." + [expected actual] + (let [is-a-map? (or (nil? actual) (map? actual)) + is-equal? (= expected actual) + values-are-maps? (or (nil? actual) (every? map? (vals actual))) + all-valid-expressions? (and is-a-map? (every? true? (map sexp/valid? (keys actual)))) + result (and values-are-maps? + is-a-map? + is-equal? + all-valid-expressions?)] + ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message + (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) + (when-not is-a-map? (print "\n* Not a map")) + (when-not is-equal? (print "\n* Not equal to expected")) + (when-not values-are-maps? (print "\n* Not all values are maps")) + (when-not all-valid-expressions? (print "\n* Not all keys are valid SPDX expressions")) + result))