Safe Haskell | None |
---|---|
Language | Haskell2010 |
Unicode.Grapheme
Description
Text grapheme utilities.
Since: 0.1
Synopsis
- data UnicodeFunction a b
- breakGraphemeClusters :: UnicodeFunction Text [Text]
- textWidth :: UnicodeFunction Text Int
- clusterWidth :: UnicodeFunction Text Int
- dimap :: (c -> a) -> (b -> d) -> UnicodeFunction a b -> UnicodeFunction c d
- map :: ((a -> b) -> c -> d) -> UnicodeFunction a b -> UnicodeFunction c d
- runUnicodeFunction :: UnicodeFunction a b -> a -> b
- runUnicodeFunctionVersion :: UnicodeVersion -> UnicodeFunction a b -> a -> b
- data UnicodeVersion
- getBaseUnicodeVersion :: Either UnsupportedUnicodeE UnicodeVersion
- getBaseUnicodeVersionIO :: IO UnicodeVersion
- getBaseUnicodeVersionOrLatest :: UnicodeVersion
- displayVersion :: IsString s => UnicodeVersion -> s
- newtype UnsupportedUnicodeE = MkUnsupportedUnicodeE Version
Documentation
Unicode functions are defined in terms of the abstract UnicodeFunction
type, which allows us to conveniently wrap functionality across multiple
unicode versions.
These can then be combined in a variety of ways for handling the unicode version.
For example, the following function will break the text into grapheme
clusters, using either base
's unicode version if it is supported, or
falling back to the latest supported version.
>>>
:{
break :: Text -> [Text] break = runUnicodeFunction breakGraphemeClusters :}
data UnicodeFunction a b Source #
UnicodeFunction
represents some function that works across all
UnicodeVersion
s. It can be extended via its Category
and Arrow
instances.
>>>
:{
textWidth :: UnicodeFunction Text Int textWidth = arr F.sum . map fmap clusterWidth . breakGraphemeClusters :}
Since: 0.1
Instances
Construction
breakGraphemeClusters :: UnicodeFunction Text [Text] Source #
Breaks Text
into grapheme clusters.
Examples
>>>
runUnicodeFunction breakGraphemeClusters "abc"
["a","b","c"]
>>>
-- U+004F U+0308
>>>
runUnicodeFunction breakGraphemeClusters "Ö"
["O\776"]
>>>
-- 🧑🌾
>>>
runUnicodeFunction breakGraphemeClusters "\x1F9D1\x200D\x1F33E"
["\129489\8205\127806"]
Since: 0.1
textWidth :: UnicodeFunction Text Int Source #
Splits the text into grapheme clusters and counts each cluster width.
Examples
>>>
runUnicodeFunction textWidth "abc"
3
>>>
-- U+004F U+0308
>>>
runUnicodeFunction textWidth "Ö"
1
>>>
-- 🧑🌾
>>>
runUnicodeFunction textWidth "\x1F9D1\x200D\x1F33E"
2
Since: 0.1
clusterWidth :: UnicodeFunction Text Int Source #
Given a single grapheme cluster -- of possibly multiple codepoints -- returns the width 1 or 2. This is based on heuristics i.e. if the text contains at least one codepoint with the following properties:
- East_Asian_Width = Fullwidth or Wide
- Emoji_Presentation
- U+FE0F (emoji-style)
Then width is 2. Otherwise it is 1.
Examples
>>>
runUnicodeFunction clusterWidth "a"
1
>>>
runUnicodeFunction clusterWidth "🇯🇵"
2
>>>
-- Used with multiple clusters can lead to unexpected results!
>>>
runUnicodeFunction clusterWidth "abc"
1
Since: 0.1
Operations
Arguments
:: (c -> a) | Contravariantly map input. |
-> (b -> d) | Covariantly map output. |
-> UnicodeFunction a b | |
-> UnicodeFunction c d |
Dimaps a UnicodeFunction
.
Since: 0.1
Arguments
:: ((a -> b) -> c -> d) | Function mapper. |
-> UnicodeFunction a b | Unicode function. |
-> UnicodeFunction c d |
Maps a UnicodeFunction
.
Since: 0.1
Elimination
runUnicodeFunction :: UnicodeFunction a b -> a -> b Source #
Runs the UnicodeFunction
with base
's unicode version, if it is
supported. Otherwise uses the latest supported version.
Since: 0.1
runUnicodeFunctionVersion :: UnicodeVersion -> UnicodeFunction a b -> a -> b Source #
Runs the UnicodeFunction
with the given unicode version.
Since: 0.1
Unicode versions
data UnicodeVersion #
Instances
Functions
Display
displayVersion :: IsString s => UnicodeVersion -> s #
Errors
newtype UnsupportedUnicodeE #
Constructors
MkUnsupportedUnicodeE Version |
Instances
Exception UnsupportedUnicodeE | |
Defined in Unicode.Grapheme.Internal.Version | |
Show UnsupportedUnicodeE | |
Defined in Unicode.Grapheme.Internal.Version Methods showsPrec :: Int -> UnsupportedUnicodeE -> ShowS # show :: UnsupportedUnicodeE -> String # showList :: [UnsupportedUnicodeE] -> ShowS # | |
Eq UnsupportedUnicodeE | |
Defined in Unicode.Grapheme.Internal.Version Methods (==) :: UnsupportedUnicodeE -> UnsupportedUnicodeE -> Bool # (/=) :: UnsupportedUnicodeE -> UnsupportedUnicodeE -> Bool # |