Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Better boot lib version scraping #109

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 39 additions & 40 deletions stack/global-hints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,45 @@
# This file auto-generated by update-global-hints.hs.
# Please ensure this is updated when a new version of GHC is released.

ghc-9.4.0.20220721:
exceptions: 0.10.5
ghc: 9.4.0.20220721
bytestring: 0.11.3.1
system-cxx-std-lib: '1.0'
unix: 2.7.3
haskeline: 0.8.2
stm: 2.5.1.0
Cabal: 3.8.0.20220526
base: 4.17.0.0
time: 1.12.2
xhtml: 3000.2.2.1
text: '2.0'
hpc: 0.6.1.0
ghc-bignum: '1.3'
filepath: 1.4.2.2
process: 1.6.14.0
parsec: 3.1.15.0
ghc-compact: 0.1.0.0
array: 0.5.4.0
Win32: 2.12.0.0
integer-gmp: '1.1'
Cabal-syntax: 3.8.0.20220526
containers: 0.6.5.1
libiserv: 9.4.0.20220721
ghc-boot: 9.4.0.20220721
binary: 0.8.9.0
ghc-prim: 0.9.0
mtl: 2.2.2
ghc-heap: 9.4.0.20220721
ghci: 9.4.0.20220721
rts: 1.0.2
terminfo: 0.4.1.5
transformers: 0.5.6.2
deepseq: 1.4.8.0
ghc-boot-th: 9.4.0.20220721
pretty: 1.1.3.6
template-haskell: 2.19.0.0
directory: 1.3.7.1
ghc-8.4.2:
ghc: 8.4.2
bytestring: 0.10.8.2
Expand Down Expand Up @@ -1034,7 +1073,6 @@ ghc-9.2.4:
parsec: 3.1.15.0
ghc-compact: 0.1.0.0
array: 0.5.4.0
Win32: 2.12.0.1
integer-gmp: '1.1'
containers: 0.6.5.1
libiserv: 9.2.4
Expand Down Expand Up @@ -1148,42 +1186,3 @@ ghc-7.10.1:
pretty: 1.1.2.0
template-haskell: 2.10.0.0
directory: 1.2.2.0
ghc-9.4.0.20220721:
Cabal-syntax: 3.8.0.20220526
Cabal: 3.8.0.20220526
Win32: 2.12.0.0
array: 0.5.4.0
base: 4.17.0.0
binary: 0.8.9.0
bytestring: 0.11.3.1
containers: 0.6.5.1
deepseq: 1.4.8.0
directory: 1.3.7.1
exceptions: 0.10.5
filepath: 1.4.2.2
ghc-bignum: '1.3'
ghc-boot-th: 9.4.0.20220721
ghc-boot: 9.4.0.20220721
ghc-compact: 0.1.0.0
ghc-heap: 9.4.0.20220721
ghc-prim: 0.9.0
ghc: 9.4.0.20220721
ghci: 9.4.0.20220721
haskeline: 0.8.2
hpc: 0.6.1.0
integer-gmp: '1.1'
libiserv: 9.4.0.20220721
mtl: 2.2.2
parsec: 3.1.15.0
pretty: 1.1.3.6
process: 1.6.14.0
rts: 1.0.2
stm: 2.5.1.0
system-cxx-std-lib: '1.0'
template-haskell: 2.19.0.0
terminfo: 0.4.1.5
text: '2.0'
time: 1.12.2
transformers: 0.5.6.2
unix: 2.7.3
xhtml: 3000.2.2.1
38 changes: 17 additions & 21 deletions stack/update-global-hints.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,21 @@
-- stack --resolver lts-18.5 script
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS -Wall #-}
import Data.Yaml
import RIO
import Data.String.Conversions
import Data.List ((!!))
import Data.Maybe (fromJust)
import qualified RIO.Map as Map
import qualified RIO.Text as T
import System.Environment (getArgs)
import RIO.Process
import qualified Data.Text as T
import Distribution.Types.PackageId
import qualified RIO.ByteString.Lazy as BL
import qualified Distribution.Text as DT (simpleParse, display)

import Text.HTML.DOM (parseBSChunks)
import Text.XML.Cursor
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Network.HTTP.Simple

comment :: ByteString
Expand All @@ -29,6 +31,7 @@ type PackageId' = Text
type PackageVersion = Text
type GlobalHintsFragment = Map GhcVer (Map PackageId' PackageVersion)

globalHintsFile :: FilePath
globalHintsFile = "global-hints.yaml"

readGlobalHintsFile :: RIO SimpleApp GlobalHintsFragment
Expand Down Expand Up @@ -80,29 +83,22 @@ scrapeGhcReleaseNotes vers = do
pairs <- for vers myScrapeURL
pure $ Map.fromList pairs
where
url ver = T.unpack $ mconcat
[ "https://downloads.haskell.org/~ghc/"
, ver'
, "/docs/html/users_guide/"
, ver'
, "-notes.html"
] where ver' = fromMaybe ver (T.stripPrefix "ghc-" ver)
url _ = "https://gitlab.haskell.org/bgamari/ghc-utils/-/raw/master/library-versions/pkg_versions.txt"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In respect of this URL, looking at https://gitlab.haskell.org/bgamari/ghc-utils/-/tree/master/library-versions, I think things have moved on. When I put this URL into my browser, I get a 404 error.

myScrapeURL ghcVer = do
let url' = url ghcVer
req <- parseRequest url'
response <- httpBS req
let doc = parseBSChunks [getResponseBody response]
cursor = fromDocument doc
rows = cursor $// attributeIs "id" "included-libraries" &// element "tbody" &/ element "tr"
pairs <- traverse toPair rows
if null pairs
then error $ "Unable to parse HTML at " ++ url'
else pure (ghcVer, Map.fromList pairs)
let body = getResponseBody response
let ghcVerNo = fromJust $ T.stripPrefix "ghc-" ghcVer
pure (ghcVer, Map.fromList $ parsePkgVersions ghcVerNo body)

toPair row =
case map (\td -> fold $ td $// content) $ row $/ element "td" of
(pkg:ver:_) -> pure (pkg, ver)
_ -> error $ "Could not parse row " ++ show row
parsePkgVersions :: Text -> ByteString -> [(Text, Text)]
parsePkgVersions ghcVerNo = concatMap fun . T.lines . cs
where
fun :: Text -> [(Text, Text)]
fun t = case T.stripPrefix (ghcVerNo <> " ") t of
Nothing -> []
Just rest -> map (\e -> let pv = T.splitOn "/" e in if length pv /= 2 then error (show e) else (pv !! 0, pv !! 1)) $ T.splitOn " " $ T.dropWhile (== ' ') rest

globalHintsFragmentProviders :: [GhcVer] -> [RIO SimpleApp GlobalHintsFragment]
globalHintsFragmentProviders vers =
Expand Down