-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCharacter.hs
50 lines (40 loc) · 2.1 KB
/
Character.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
--- Character.hs
--- Generate character stylometric features
module Character (genSentenceCharLength, genCharCount, genAlphaRatio, genUpperRatio, genDigitRatio, genWhitespaceRatio, genCharFrequency) where
import AnalyzedText
import Data.List (genericLength, elemIndices)
import Features
import Data.Char (isAlpha, isUpper, isDigit, isSeparator, toLower)
import Data.Set (Set)
import qualified Data.Set as Set
genSentenceCharLength :: AnalyzedText -> Set Feature
genSentenceCharLength t = Set.singleton $ AverageSentenceCharLength $
div (sum lengths) (length lengths)
where
lengths = map length $ sentences t
genCharCount :: AnalyzedText -> Set Feature
genCharCount t = Set.singleton $ CharCount $ round $ logBase 2 $
genericLength $ text t
genAlphaRatio :: AnalyzedText -> Set Feature
genAlphaRatio t = Set.singleton $ AlphaRatio $ round $ logBase 2 $
(genericLength $ filter isAlpha $ text t)/
(genericLength $ text t)
genUpperRatio :: AnalyzedText -> Set Feature
genUpperRatio t = Set.singleton $ UpperRatio $ round $ logBase 2 $
(genericLength $ filter isUpper $ text t)/
(genericLength $ text t)
genDigitRatio :: AnalyzedText -> Set Feature
genDigitRatio t = Set.singleton $ DigitRatio $ round $ logBase 2 $
(genericLength $ filter isDigit $ text t)/
(genericLength $ text t)
genWhitespaceRatio :: AnalyzedText -> Set Feature
genWhitespaceRatio t = Set.singleton $ WhitespaceRatio $ round $ logBase 2 $
(genericLength $ filter isSeparator $ text t)/
(genericLength $ text t)
genCharFrequency :: AnalyzedText -> Set Feature
genCharFrequency t = Set.fromList $ map (\x -> CharRatio x (ratio x)) $
['a'..'z'] ++ "~@#$%^&*-_=+><[]{}/\\|"
where
ratio letter = round $ logBase 2 $ (genericLength $
elemIndices letter $ map toLower $ text t) /
(genericLength $ text t)