-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Haskell implementation of the Unicode Collation Algorithm
--   
--   This library provides a pure Haskell implementation of the Unicode
--   Collation Algorithm described at
--   <a>http://www.unicode.org/reports/tr10/</a>. It is not as
--   fully-featured or as performant as <tt>text-icu</tt>, but it avoids a
--   dependency on a large C library. Locale-specific tailorings are also
--   provided.
@package unicode-collation
@version 0.1.3.6

module Text.Collate.Lang

-- | Represents a BCP 47 language tag
--   (<a>https://tools.ietf.org/html/bcp47</a>).
data Lang
Lang :: Text -> Maybe Text -> Maybe Text -> [Text] -> [(Text, [(Text, Text)])] -> [Text] -> Lang
[langLanguage] :: Lang -> Text
[langScript] :: Lang -> Maybe Text
[langRegion] :: Lang -> Maybe Text
[langVariants] :: Lang -> [Text]
[langExtensions] :: Lang -> [(Text, [(Text, Text)])]
[langPrivateUse] :: Lang -> [Text]

-- | Parse a BCP 47 language tag as a <a>Lang</a>.
parseLang :: Text -> Either String Lang

-- | Render a <a>Lang</a> in BCP 47 form.
renderLang :: Lang -> Text

-- | Find best match for a <a>Lang</a> in an association list.
lookupLang :: Lang -> [(Lang, a)] -> Maybe (Lang, a)
instance Language.Haskell.TH.Syntax.Lift Text.Collate.Lang.Lang
instance GHC.Show.Show Text.Collate.Lang.Lang
instance GHC.Classes.Ord Text.Collate.Lang.Lang
instance GHC.Classes.Eq Text.Collate.Lang.Lang
instance Data.String.IsString Text.Collate.Lang.Lang
instance Data.Binary.Class.Binary Text.Collate.Lang.Lang


-- | We define our own normalization function instead of depending on
--   unicode-transforms, because we need a lazy (streaming) normalization
--   function for maximum efficiency. No point normalizing two whole
--   <tt>Text</tt>s if we can see from the first few characters how they
--   should be ordered. See <a>https://unicode.org/reports/tr15/</a> for a
--   description of the algorithm implemented here.
module Text.Collate.Normalize

-- | Lazily normalize a list of code points to its canonical decomposition
--   (NFD).
toNFD :: [Int] -> [Int]


-- | This library provides a pure Haskell implementation of the <a>Unicode
--   Collation Algorithm</a>, allowing proper sorting of Unicode strings.
--   
--   The simplest way to use the library is to use the <tt>IsString</tt>
--   instance of <a>Collator</a> (together with the
--   <tt>OverloadedStrings</tt> extension):
--   
--   <pre>
--   &gt;&gt;&gt; import Data.List (sortBy)
--   
--   &gt;&gt;&gt; import qualified Data.Text.IO as T
--   
--   &gt;&gt;&gt; mapM_ T.putStrLn $ sortBy (collate "en-US") ["𝒶bc","abC","𝕒bc","Abc","abç","äbc"]
--   abC
--   𝒶bc
--   𝕒bc
--   Abc
--   abç
--   äbc
--   </pre>
--   
--   Note the difference from the default sort:
--   
--   <pre>
--   &gt;&gt;&gt; import Data.List (sort)
--   
--   &gt;&gt;&gt; import qualified Data.Text.IO as T
--   
--   &gt;&gt;&gt; mapM_ T.putStrLn $ sort ["𝒶bc","abC","𝕒bc","Abc","abç","äbc"]
--   Abc
--   abC
--   abç
--   äbc
--   𝒶bc
--   𝕒bc
--   </pre>
--   
--   A <a>Collator</a> provides a function <a>collate</a> that compares two
--   texts, and a function <a>sortKey</a> that returns the sort key. Most
--   users will just need <a>collate</a>.
--   
--   <pre>
--   &gt;&gt;&gt; let de = collatorFor "de"
--   
--   &gt;&gt;&gt; let se = collatorFor "se"
--   
--   &gt;&gt;&gt; collate de "ö" "z"
--   LT
--   
--   &gt;&gt;&gt; collate se "ö" "z"
--   GT
--   
--   &gt;&gt;&gt; sortKey de "ö"
--   SortKey [0x213C,0x0000,0x0020,0x002B,0x0000,0x0002,0x0002]
--   
--   &gt;&gt;&gt; sortKey se "ö"
--   SortKey [0x22FD,0x0000,0x0020,0x0000,0x0002]
--   </pre>
--   
--   To sort a string type other than <tt>Text</tt>, the function
--   <a>collateWithUnpacker</a> may be used. It takes as a parameter a
--   function that lazily unpacks the string type into a list of
--   <a>Char</a>.
--   
--   <pre>
--   &gt;&gt;&gt; let seCollateString = collateWithUnpacker "se" id
--   
--   &gt;&gt;&gt; seCollateString ("ö" :: String) ("z" :: String)
--   GT
--   </pre>
--   
--   Because <a>Collator</a> and <a>Lang</a> have <tt>IsString</tt>
--   instances, you can just specify them using string literals, as in the
--   above examples. Note, however, that you won't get any feedback if the
--   string doesn't parse correctly as a BCP47 language tag, or if no
--   collation is defined for the specified language; instead, you'll just
--   get the default (root) collator. For this reason, we don't recommend
--   relying on the <tt>IsString</tt> instance.
--   
--   If you won't know the language until run time, use <a>parseLang</a> to
--   parse it to a <a>Lang</a>, handling parse errors, and then pass the
--   <a>Lang</a> to <a>collatorFor</a>.
--   
--   <pre>
--   &gt;&gt;&gt; let handleParseError = error  -- or something fancier
--   
--   &gt;&gt;&gt; lang &lt;- either handleParseError return $ parseLang "bs-Cyrl"
--   
--   &gt;&gt;&gt; collate (collatorFor lang) "a" "b"
--   LT
--   </pre>
--   
--   If you know the language at compile-time, use the <a>collator</a>
--   quasi-quoter and you'll get compile-time errors and warnings:
--   
--   <pre>
--   &gt;&gt;&gt; :set -XQuasiQuotes
--   
--   &gt;&gt;&gt; let esTraditional = [collator|es-u-co-trad|]
--   
--   &gt;&gt;&gt; let esStandard = [collator|es|]
--   
--   &gt;&gt;&gt; collate esStandard "Co" "Ch"
--   GT
--   
--   &gt;&gt;&gt; collate esTraditional "Co" "Ch"
--   LT
--   </pre>
--   
--   Note that the unicode extension syntax for BCP47 can be used to
--   specify a particular collation for the language (here, Spanish
--   "traditional" instead of the default ordering; the alias <tt>trad</tt>
--   is used because of length limits for BCP47 keywords).
--   
--   The extension syntax can also be used to set collator options. The
--   keyword <tt>kb</tt> can be used to specify the "backwards" accent
--   sorting that is sometimes used in French:
--   
--   <pre>
--   &gt;&gt;&gt; collate "fr" "côte" "coté"
--   GT
--   
--   &gt;&gt;&gt; collate "fr-u-kb" "côte" "coté"
--   LT
--   </pre>
--   
--   The keyword <tt>ka</tt> can be used to specify the variable weighting
--   options which affect how punctuation and whitespace are treated:
--   
--   <pre>
--   &gt;&gt;&gt; collate "en-u-ka-shifted" "de-luge" "de Luge"
--   LT
--   
--   &gt;&gt;&gt; collate "en-u-ka-noignore" "de-luge" "de Luge"
--   GT
--   </pre>
--   
--   The keyword <tt>kk</tt> can be used to turn off the normalization step
--   (which is required by the algorithm but can be omitted for better
--   performance if the input is already in NFD form (canonical
--   decomposition).
--   
--   <pre>
--   &gt;&gt;&gt; let noNormalizeCollator = [collator|en-u-kk-false|]
--   </pre>
--   
--   The keyword <tt>kf</tt> can be used to say whether uppercase or
--   lowercase letters should be sorted first.
--   
--   <pre>
--   &gt;&gt;&gt; collate "en-u-kf-upper" "A" "a"
--   LT
--   
--   &gt;&gt;&gt; collate "en-u-kf-lower" "A" "a"
--   GT
--   </pre>
--   
--   These options be combined:
--   
--   <pre>
--   &gt;&gt;&gt; collate "de-DE-u-co-phonebk-kb-false-ka-shifted" "Udet" "Über"
--   LT
--   </pre>
--   
--   Options can also be set using the functions
--   <a>setVariableWeighting</a>, <a>setNormalization</a>,
--   <a>setUpperBeforeLower</a>, and <a>setFrenchAccents</a>:
--   
--   <pre>
--   &gt;&gt;&gt; let frC = setFrenchAccents True [collator|fr|]
--   
--   &gt;&gt;&gt; collate frC "côte" "coté"
--   LT
--   </pre>
module Text.Collate
data Collator

-- | Compare two <a>Text</a>s
collate :: Collator -> Text -> Text -> Ordering

-- | Compare two strings of any type that can be unpacked lazily into a
--   list of <a>Char</a>s.
collateWithUnpacker :: Collator -> forall a. Eq a => (a -> [Char]) -> a -> a -> Ordering

-- | Returns a collator based on a BCP 47 language tag. If no exact match
--   is found, we try to find the best match (falling back to the root
--   collation if nothing else succeeds). If something other than the
--   default collation for a language is desired, the <tt>co</tt> keyword
--   of the unicode extensions can be used (e.g. <tt>es-u-co-trad</tt> for
--   traditional Spanish). Other unicode extensions affect the collator
--   options:
--   
--   <ul>
--   <li>The <tt>kb</tt> keyword has the same effect as
--   <a>setFrenchAccents</a> (e.g. <tt>fr-FR-u-kb-true</tt>).</li>
--   <li>The <tt>ka</tt> keyword has the same effect as
--   <tt>setVariableWeight</tt> (e.g. <tt>fr-FR-u-kb-ka-shifted</tt> or
--   <tt>en-u-ka-noignore</tt>).</li>
--   <li>The <tt>kf</tt> keyword has the same effect as
--   <a>setUpperBeforeLower</a> (e.g. <tt>fr-u-kf-upper</tt> or
--   <tt>fr-u-kf-lower</tt>).</li>
--   <li>The <tt>kk</tt> keyword has the same effect as
--   <a>setNormalization</a> (e.g. <tt>fr-u-kk-false</tt>).</li>
--   </ul>
collatorFor :: Lang -> Collator

-- | Create a collator at compile time based on a BCP 47 language tag:
--   e.g., <tt>[collator|es-u-co-trad|]</tt>. Requires the
--   <tt>QuasiQuotes</tt> extension.
collator :: QuasiQuoter

-- | Default collator based on DUCET table (<tt>allkeys.txt</tt>).
rootCollator :: Collator
newtype SortKey
SortKey :: [Word16] -> SortKey

-- | The sort key used to compare a <a>Text</a>
sortKey :: Collator -> Text -> SortKey

-- | Render sort key in the manner used in the CLDR collation test data:
--   the character '|' is used to separate the levels of the key and
--   corresponds to a 0 in the actual sort key.
renderSortKey :: SortKey -> String

-- | <a>VariableWeighting</a> affects how punctuation is treated. See
--   <a>http://www.unicode.org/reports/tr10/#Variable_Weighting</a>.
data VariableWeighting

-- | Don't ignore punctuation (Deluge &lt; deluge-)
NonIgnorable :: VariableWeighting

-- | Completely ignore punctuation (Deluge = deluge-)
Blanked :: VariableWeighting

-- | Consider punctuation at lower priority (de-luge &lt; delu-ge &lt;
--   deluge &lt; deluge- &lt; Deluge)
Shifted :: VariableWeighting

-- | Variant of Shifted (deluge &lt; de-luge &lt; delu-ge)
ShiftTrimmed :: VariableWeighting
data CollatorOptions
CollatorOptions :: Maybe Lang -> VariableWeighting -> Bool -> Bool -> Bool -> CollatorOptions

-- | <a>Lang</a> used for tailoring. Note that because of fallback rules,
--   this may be somewhat different from the <a>Lang</a> passed to
--   <a>collatorFor</a>. This <a>Lang</a> won't contain unicode extensions
--   used to set options, but it will specify the collation if a
--   non-default collation is being used.
[optLang] :: CollatorOptions -> Maybe Lang

-- | Method for handling variable elements (see
--   <a>http://www.unicode.org/reports/tr10/</a>, Tables 11 and 12).
[optVariableWeighting] :: CollatorOptions -> VariableWeighting

-- | If True, secondary weights are scanned in reverse order, so we get the
--   sorting "cote côte coté côté" instead of "cote coté côte côté"
[optFrenchAccents] :: CollatorOptions -> Bool

-- | Sort uppercase letters before lower
[optUpperBeforeLower] :: CollatorOptions -> Bool

-- | If True, strings are normalized to NFD before collation elements are
--   constructed. If the input is already normalized, this option can be
--   set to False for better performance.
[optNormalize] :: CollatorOptions -> Bool

-- | The options used for this <a>Collator</a>
collatorOptions :: Collator -> CollatorOptions

-- | <a>Lang</a> used for tailoring. Because of fallback rules, this may be
--   somewhat different from the <a>Lang</a> passed to <a>collatorFor</a>.
--   This <a>Lang</a> won't contain unicode extensions used to set options,
--   but it will specify the collation if a non-default collation is being
--   used.

-- | <i>Deprecated: Use (optLang . collatorOptions)</i>
collatorLang :: Collator -> Maybe Lang

-- | Set method for handling variable elements (punctuation and spaces):
--   see <a>http://www.unicode.org/reports/tr10/</a>, Tables 11 and 12.
setVariableWeighting :: VariableWeighting -> Collator -> Collator

-- | The Unicode Collation Algorithm expects input to be normalized into
--   its canonical decomposition (NFD). By default, collators perform this
--   normalization. If your input is already normalized, you can increase
--   performance by disabling this step: <tt>setNormalization False</tt>.
setNormalization :: Bool -> Collator -> Collator

-- | <tt>setFrenchAccents True</tt> causes secondary weights to be scanned
--   in reverse order, so we get the sorting <tt>cote côte coté côté</tt>
--   instead of <tt>cote coté côte côté</tt>. The default is usually
--   <tt>False</tt>, except for <tt>fr-CA</tt> where it is <tt>True</tt>.
setFrenchAccents :: Bool -> Collator -> Collator

-- | Most collations default to sorting lowercase letters before uppercase
--   (exceptions: <tt>mt</tt>, <tt>da</tt>, <tt>cu</tt>). To select the
--   opposite behavior, use <tt>setUpperBeforeLower True</tt>.
setUpperBeforeLower :: Bool -> Collator -> Collator

-- | An association list matching <a>Lang</a>s with tailored
--   <a>Collation</a>s.
tailorings :: [(Lang, Collation)]

-- | Represents a BCP 47 language tag
--   (<a>https://tools.ietf.org/html/bcp47</a>).
data Lang
Lang :: Text -> Maybe Text -> Maybe Text -> [Text] -> [(Text, [(Text, Text)])] -> [Text] -> Lang
[langLanguage] :: Lang -> Text
[langScript] :: Lang -> Maybe Text
[langRegion] :: Lang -> Maybe Text
[langVariants] :: Lang -> [Text]
[langExtensions] :: Lang -> [(Text, [(Text, Text)])]
[langPrivateUse] :: Lang -> [Text]

-- | Find best match for a <a>Lang</a> in an association list.
lookupLang :: Lang -> [(Lang, a)] -> Maybe (Lang, a)

-- | Parse a BCP 47 language tag as a <a>Lang</a>.
parseLang :: Text -> Either String Lang

-- | Render a <a>Lang</a> in BCP 47 form.
renderLang :: Lang -> Text
