Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add support for NumericValue #71

Merged
merged 9 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion lib/Unicode/Char/Numeric.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,18 @@ isNumber c = case generalCategory c of
OtherNumber -> True
_ -> False


-- | Numeric value of a character, if relevant.
--
-- __Note:__ a character may have a numeric value but return 'False' with
-- the predicate 'isNumber', because 'isNumber' only tests 'GeneralCategory'.
wismill marked this conversation as resolved.
Show resolved Hide resolved
{-# INLINE numericValue #-}
numericValue :: Char -> Maybe Rational
numericValue = V.numericValue

-- | Integer value of a character, if relevant.
--
-- __Note:__ a character may have a numeric value but return 'False' with
-- the predicate 'isNumber', because 'isNumber' only tests 'GeneralCategory'.
intValue :: Char -> Maybe Int
intValue c = V.numericValue c >>= \r -> if denominator r == 1
wismill marked this conversation as resolved.
Show resolved Hide resolved
then Just (fromIntegral (numerator r))
Expand Down
15 changes: 13 additions & 2 deletions test/Unicode/CharSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ module Unicode.CharSpec
) where

import qualified Data.Char as Char
import Data.Maybe (isJust)
import qualified Unicode.Char as UChar
-- [TODO] Remove the following qualified imports once isLetter and isSpace
-- are removed from Unicode.Char.General
import qualified Unicode.Char.General.Compat as UCharCompat
-- [TODO] Remove the following qualified imports once isUpper and isLower
-- are removed from Unicode.Char.Case
import qualified Unicode.Char.Case.Compat as UCharCompat
import qualified Unicode.Char.Numeric as UNumeric
import Data.Foldable (traverse_)
import Test.Hspec

Expand All @@ -35,9 +37,12 @@ spec :: Spec
spec = do
#ifdef COMPATIBLE_GHC_UNICODE
let describe' = describe
let it' = it
#else
let describe' t = before_ (pendingWith "Incompatible GHC Unicode version")
. describe t
let it' = before_ (pendingWith "Incompatible GHC Unicode version")
. it t
#endif
describe' "Unicode general categories" do
it "generalCategory" do
Expand All @@ -54,8 +59,6 @@ spec = do
UCharCompat.isLetter `shouldBeEqualTo` Char.isLetter
it "isMark" do
UChar.isMark `shouldBeEqualTo` Char.isMark
it "isNumber" do
UChar.isNumber `shouldBeEqualTo` Char.isNumber
it "isPrint" do
UChar.isPrint `shouldBeEqualTo` Char.isPrint
it "isPunctuation" do
Expand All @@ -77,6 +80,14 @@ spec = do
UChar.toUpper `shouldBeEqualTo` Char.toUpper
it "toTitle" do
UChar.toTitle `shouldBeEqualTo` Char.toTitle
describe "Numeric" do
it' "isNumber" do
UNumeric.isNumber `shouldBeEqualTo` Char.isNumber
it "isNumber implies a numeric value" do
-- [NOTE] the following does not hold with the current predicate `isNumber`.
-- 'let check c = (UNumeric.isNumber c `xor` isNothing (UNumeric.numericValue c))
let check c = not (UNumeric.isNumber c) || isJust (UNumeric.numericValue c)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder how many such cases are there where isNumber returns False but the value is numeric. Is it a lot of cases or just a few?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

81 CJK characters:

ns = filter (\c -> not (isNumber c `xor`  isNothing (numericValue c))) [minBound .. maxBound]
-- 81
(\c -> (showHex (ord c) "", generalCategory c)) <$> ns
-- [("3405",OtherLetter),("3483",OtherLetter),("382a",OtherLetter),("3b4d",OtherLetter),("4e00",OtherLetter),("4e03",OtherLetter),("4e07",OtherLetter),("4e09",OtherLetter),("4e5d",OtherLetter),("4e8c",OtherLetter),("4e94",OtherLetter),("4e96",OtherLetter),("4ebf",OtherLetter),("4ec0",OtherLetter),("4edf",OtherLetter),("4ee8",OtherLetter),("4f0d",OtherLetter),("4f70",OtherLetter),("5104",OtherLetter),("5146",OtherLetter),("5169",OtherLetter),("516b",OtherLetter),("516d",OtherLetter),("5341",OtherLetter),("5343",OtherLetter),("5344",OtherLetter),("5345",OtherLetter),("534c",OtherLetter),("53c1",OtherLetter),("53c2",OtherLetter),("53c3",OtherLetter),("53c4",OtherLetter),("56db",OtherLetter),("58f1",OtherLetter),("58f9",OtherLetter),("5e7a",OtherLetter),("5efe",OtherLetter),("5eff",OtherLetter),("5f0c",OtherLetter),("5f0d",OtherLetter),("5f0e",OtherLetter),("5f10",OtherLetter),("62fe",OtherLetter),("634c",OtherLetter),("67d2",OtherLetter),("6f06",OtherLetter),("7396",OtherLetter),("767e",OtherLetter),("8086",OtherLetter),("842c",OtherLetter),("8cae",OtherLetter),("8cb3",OtherLetter),("8d30",OtherLetter),("9621",OtherLetter),("9646",OtherLetter),("964c",OtherLetter),("9678",OtherLetter),("96f6",OtherLetter),("f96b",OtherLetter),("f973",OtherLetter),("f978",OtherLetter),("f9b2",OtherLetter),("f9d1",OtherLetter),("f9d3",OtherLetter),("f9fd",OtherLetter),("20001",OtherLetter),("20064",OtherLetter),("200e2",OtherLetter),("20121",OtherLetter),("2092a",OtherLetter),("20983",OtherLetter),("2098c",OtherLetter),("2099c",OtherLetter),("20aea",OtherLetter),("20afd",OtherLetter),("20b19",OtherLetter),("22390",OtherLetter),("22998",OtherLetter),("23b1b",OtherLetter),("2626d",OtherLetter),("2f890",OtherLetter)]d","4f70","5104","5146","5169","516b","516d","5341","5343","5344","5345","534c","53c1","53c2","53c3","53c4","56db","58f1","58f9","5e7a","5efe","5eff","5f0c","5f0d","5f0e","5f10","62fe","634c","67d2","6f06","7396","767e","8086","842c","8cae","8cb3","8d30","9621","9646","964c","9678","96f6","f96b","f973","f978","f9b2","f9d1","f9d3","f9fd","20001","20064","200e2","20121","2092a","20983","2098c","2099c","20aea","20afd","20b19","22390","22998","23b1b","2626d","2f890"]

traverse_ (`shouldSatisfy` check) [minBound..maxBound]
where
shouldBeEqualTo
:: forall a b. (Bounded a, Enum a, Show a, Eq b, Show b)
Expand Down