diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index 1319db3a0..b1d945b4f 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -54,7 +54,7 @@ jobs: shell: bash run: | if [ ${{ matrix.ospath }} = "true" ]; then - cabal configure --constraint="filepath ^>= 1.4.100.0" + cabal configure --flags="force-ospath" fi - name: Build using cabal run: cabal build all diff --git a/lsp-types/lsp-types.cabal b/lsp-types/lsp-types.cabal index b71798582..0b8b63dd5 100644 --- a/lsp-types/lsp-types.cabal +++ b/lsp-types/lsp-types.cabal @@ -16,6 +16,11 @@ category: Development build-type: Simple extra-source-files: ChangeLog.md, README.md +flag force-ospath + default: False + manual: False + description: Force a version bound on filepath library, to enable 'OsPath'. + library exposed-modules: Language.LSP.Types , Language.LSP.Types.Capabilities @@ -79,7 +84,6 @@ library , deepseq , Diff >= 0.2 , dlist - , filepath , hashable , lens >= 4.15.2 , mtl < 2.4 @@ -92,7 +96,10 @@ library , unordered-containers , exceptions , safe - , bytestring + if flag(force-ospath) + build-depends: filepath ^>= 1.4.100.0 + else + build-depends: filepath hs-source-dirs: src default-language: Haskell2010 default-extensions: StrictData diff --git a/lsp-types/src/Language/LSP/Types/Uri.hs b/lsp-types/src/Language/LSP/Types/Uri.hs index 4f80348cb..c7235d26e 100644 --- a/lsp-types/src/Language/LSP/Types/Uri.hs +++ b/lsp-types/src/Language/LSP/Types/Uri.hs @@ -26,27 +26,21 @@ module Language.LSP.Types.Uri where import Control.DeepSeq -import qualified Data.Aeson as A -import Data.Binary (Binary, Get, get, put) -import Data.ByteString.Short (ShortByteString) -import qualified Data.ByteString.Short as BS +import qualified Data.Aeson as A +import Data.Binary (Binary, Get, get, put) import Data.Hashable -import Data.List (stripPrefix) -import Data.String (IsString (fromString)) -import Data.Text (Text) -import qualified Data.Text as T -import qualified Data.Text.Encoding as T -import Data.Text.Encoding.Error (UnicodeException) +import Data.List (stripPrefix) +import Data.String (IsString (fromString)) +import Data.Text (Text) +import qualified Data.Text as T import GHC.Generics -import GHC.Stack (HasCallStack) -import Network.URI hiding (authority) -import Safe (tailMay) -import qualified System.FilePath as FP -import qualified System.FilePath.Posix as FPP -import qualified System.FilePath.Windows as FPW +import Network.URI hiding (authority) +import Safe (tailMay) +import qualified System.FilePath as FP +import qualified System.FilePath.Posix as FPP +import qualified System.FilePath.Windows as FPW import qualified System.Info - newtype Uri = Uri { getUri :: Text } deriving (Eq,Ord,Read,Show,Generic,A.FromJSON,A.ToJSON,Hashable,A.ToJSONKey,A.FromJSONKey) @@ -164,14 +158,30 @@ platformAdjustToUriPath systemOS srcPath FPP.addTrailingPathSeparator (init drv) | otherwise = drv --- | A file path that is already normalized. It is stored as an UTF-8 encoded 'ShortByteString' --- --- The 'NormalizedUri' is cached to avoided --- repeated normalisation when we need to compute them (which is a lot). --- --- This is one of the most performance critical parts of ghcide, do not --- modify it without profiling. -data NormalizedFilePath = NormalizedFilePath !NormalizedUri {-# UNPACK #-} !ShortByteString +{-| A file path that is already normalized. + +The 'NormalizedUri' is cached to avoided +repeated normalisation when we need to compute them (which is a lot). + +This is one of the most performance critical parts of HLS, do not +modify it without profiling. + +== Adoption Plan of OsPath + +Currently we store 'Text'. We may change it to OsPath in the future if +the following steps are executed. + +1. In the client codebase, use 'osPathToNormalizedFilePath' and 'normalizedFilePathToOsPath' instead of 'fromNormalizedFilePath' + and 'toNormalizedFilePath'. For HLS, we could wait until GHC 9.6 becomes the oldest + GHC we support, then change 'FilePath' to OsPath everywhere in the codebase. +2. Deprecate and remove 'fromNormalizedFilePath' and 'toNormalizedFilePath'. +3. Change 'Text' to OsPath and benchmark it to make sure performance doesn't go down. Don't forget to check Windows, + as OsPath on Windows uses UTF-16, which may consume more memory. + +See [#453](https://github.com/haskell/lsp/pull/453) and [#446](https://github.com/haskell/lsp/pull/446) +for more discussions on this topic. +-} +data NormalizedFilePath = NormalizedFilePath !NormalizedUri {-# UNPACK #-} !Text deriving (Generic, Eq, Ord) instance NFData NormalizedFilePath @@ -179,17 +189,8 @@ instance NFData NormalizedFilePath instance Binary NormalizedFilePath where put (NormalizedFilePath _ fp) = put fp get = do - v <- Data.Binary.get :: Get ShortByteString - case decodeFilePath v of - Left e -> fail (show e) - Right v' -> - return (NormalizedFilePath (internalNormalizedFilePathToUri v') v) - -encodeFilePath :: String -> ShortByteString -encodeFilePath = BS.toShort . T.encodeUtf8 . T.pack - -decodeFilePath :: ShortByteString -> Either UnicodeException String -decodeFilePath = fmap T.unpack . T.decodeUtf8' . BS.fromShort + v <- Data.Binary.get :: Get Text + return (NormalizedFilePath (internalNormalizedFilePathToUri (T.unpack v)) v) -- | Internal helper that takes a file path that is assumed to -- already be normalized to a URI. It is up to the caller @@ -213,24 +214,20 @@ instance IsString NormalizedFilePath where fromString = toNormalizedFilePath toNormalizedFilePath :: FilePath -> NormalizedFilePath -toNormalizedFilePath fp = NormalizedFilePath nuri . encodeFilePath $ nfp +toNormalizedFilePath fp = NormalizedFilePath nuri . T.pack $ nfp where nfp = FP.normalise fp nuri = internalNormalizedFilePathToUri nfp -- | Extracts 'FilePath' from 'NormalizedFilePath'. --- The function is total. The 'HasCallStack' constraint is added for debugging purpose only. -fromNormalizedFilePath :: HasCallStack => NormalizedFilePath -> FilePath -fromNormalizedFilePath (NormalizedFilePath _ fp) = - case decodeFilePath fp of - Left e -> error $ show e - Right x -> x +fromNormalizedFilePath :: NormalizedFilePath -> FilePath +fromNormalizedFilePath (NormalizedFilePath _ fp) = T.unpack fp normalizedFilePathToUri :: NormalizedFilePath -> NormalizedUri normalizedFilePathToUri (NormalizedFilePath uri _) = uri uriToNormalizedFilePath :: NormalizedUri -> Maybe NormalizedFilePath -uriToNormalizedFilePath nuri = fmap (NormalizedFilePath nuri . encodeFilePath) mbFilePath +uriToNormalizedFilePath nuri = fmap (NormalizedFilePath nuri . T.pack) mbFilePath where mbFilePath = platformAwareUriToFilePath System.Info.os (fromNormalizedUri nuri) emptyNormalizedUri :: NormalizedUri diff --git a/lsp-types/src/Language/LSP/Types/Uri/OsPath.hs b/lsp-types/src/Language/LSP/Types/Uri/OsPath.hs index bdb7fb7c0..317b1dd33 100644 --- a/lsp-types/src/Language/LSP/Types/Uri/OsPath.hs +++ b/lsp-types/src/Language/LSP/Types/Uri/OsPath.hs @@ -10,38 +10,44 @@ module Language.LSP.Types.Uri.OsPath #ifdef OS_PATH osPathToNormalizedFilePath , normalizedFilePathToOsPath + , EncodingException #endif ) where #ifdef OS_PATH -import Control.DeepSeq (NFData, force) import Control.Exception hiding (try) import Control.Monad.Catch +import GHC.IO.Encoding (getFileSystemEncoding) import Language.LSP.Types.Uri +import System.IO import System.IO.Unsafe (unsafePerformIO) import System.OsPath +import System.OsPath.Encoding (EncodingException) {-| -Constructs 'NormalizedFilePath' from 'OsPath'. Throws 'IOException' if the conversion fails. +Constructs 'NormalizedFilePath' from 'OsPath'. Throws 'EncodingException' if the conversion fails. + +We store a 'Text' in 'NormalizedFilePath', which is UTF-16 or UTF-8 depending on the verion of text library. +'OsPath' may have a different encoding than 'Text', so this function may fail. +But DO NOTE THAT encoding mismatch doesn't always mean an exception will be thrown. +[Possibly your encoding simply won't throw exception on failure](https://hackage.haskell.org/package/base-4.17.0.0/docs/src/GHC.IO.Encoding.html#initFileSystemEncoding). +Possibly the conversion function can't find any invalid byte sequence, giving a sucessful but wrong result. -} osPathToNormalizedFilePath :: MonadThrow m => OsPath -> m NormalizedFilePath -osPathToNormalizedFilePath = fmap toNormalizedFilePath . unsafePerformIO' . decodeFS +osPathToNormalizedFilePath = fmap toNormalizedFilePath . liftException . decodeWith systemEnc utf16le {-| -Extracts 'OsPath' from 'NormalizedFilePath'. Throws 'IOException' if the conversion fails. +Extracts 'OsPath' from 'NormalizedFilePath'. Throws 'EncodingException' if the conversion fails. -} normalizedFilePathToOsPath :: MonadThrow m => NormalizedFilePath -> m OsPath -normalizedFilePathToOsPath = unsafePerformIO' . encodeFS . fromNormalizedFilePath - -unsafePerformIO' :: (MonadThrow m, NFData a) => IO a -> m a -unsafePerformIO' action = - case fp of - Left (e :: SomeException) -> throwM e - Right fp' -> pure fp' - where - fp = unsafePerformIO . try $ do - x <- action - evaluate . force $ x +normalizedFilePathToOsPath = liftException . encodeWith systemEnc utf16le . fromNormalizedFilePath + +liftException :: (MonadThrow m, Exception e) => Either e a -> m a +liftException (Right x) = pure x +liftException (Left err) = throwM err + +systemEnc :: TextEncoding +systemEnc = unsafePerformIO getFileSystemEncoding #endif diff --git a/lsp-types/test/URIFilePathSpec.hs b/lsp-types/test/URIFilePathSpec.hs index f63104609..f1c2bdfeb 100644 --- a/lsp-types/test/URIFilePathSpec.hs +++ b/lsp-types/test/URIFilePathSpec.hs @@ -307,5 +307,5 @@ normalizedFilePathSpec = beforeAll (setFileSystemEncoding utf8) $ do case OsPath.encodeWith utf16be utf16be "\184921" of Left err -> throwIO err Right osPath -> do - osPathToNormalizedFilePath osPath `shouldThrow` \(_ :: IOException) -> True + osPathToNormalizedFilePath osPath `shouldThrow` \(_ :: EncodingException) -> True #endif