Skip to content

Commit

Permalink
Keep going if rdf data unexpectedly fails
Browse files Browse the repository at this point in the history
  • Loading branch information
cldwalker committed Aug 29, 2021
1 parent b04b340 commit 98052ae
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions bin/bb-logseq-convert
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,8 @@ process that generates this map from your logseq data"
;; rdf data is converted
{"http://www.w3.org/1999/02/22-rdf-syntax-ns#type" "type"})}))

(defn- get-rdf-properties [url-obj config {:keys [debug]}]
(defn- get-rdf-properties* [triples url-obj config {:keys [debug]}]
(let [dynamic-config (build-dynamic-config config)
triples (-> ["rdf-dereference" (str url-obj)]
;; Need a timeout as rdf-dereference has been wierdly hanging
;; out for imdb and wikipedia
(process-by-timeout 2500)
(json/parse-string true))
host (str/replace-first (.getHost url-obj) #"^www\." "")
_ (when debug (println "[DEBUG] Host: " host))
properties-to-keep (get-in config [:host-properties host]
Expand All @@ -138,6 +133,19 @@ process that generates this map from your logseq data"
(into {}))]
properties))

(defn get-rdf-properties [url-obj config options]
(let [triples (try (-> ["rdf-dereference" (str url-obj)]
;; Need a timeout as rdf-dereference has been wierdly hanging
;; out for imdb and wikipedia
(process-by-timeout 2500)
(json/parse-string true)
doall)
;; nytimes intermittently fails on partially streamed data
(catch Exception e
(println "Rdf properties unexpectedly failed with: " (str e))))]
(when triples
(get-rdf-properties* triples url-obj config options))))

(defn- url->properties [url-obj config options]
(merge (get-rdf-properties url-obj config options)
(get-url-properties url-obj config options)))
Expand Down

0 comments on commit 98052ae

Please sign in to comment.