{-# LANGUAGE OverloadedStrings #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  HSP.XML.PCDATA
-- Copyright   :  (c) Niklas Broberg 2008-2012
-- License     :  BSD-style (see the file LICENSE.txt)
--
-- Maintainer  :  Niklas Broberg, nibro@cs.chalmers.se
-- Stability   :  experimental
-- Portability :  Haskell 98
--
-- Escaping between CDATA <=> PCDATA
-----------------------------------------------------------------------------
module HSP.XML.PCDATA (
	  escape
--	, unescape
        , escaper
--        , unescaper
        , xmlEscapeChars
	) where

import Data.Monoid              ((<>), mempty)
import Data.Text.Lazy           (Text, uncons)
import qualified Data.Text.Lazy as Text
import Data.Text.Lazy.Builder   (Builder, fromLazyText, singleton)

-- | Take a normal string and transform it to PCDATA by escaping special characters.
-- calls 'escaper' with 'xmlEscapeChars'
-- See also: 'escaper'
escape :: Text -> Builder
escape :: Text -> Builder
escape = [(Char, Builder)] -> Text -> Builder
escaper [(Char, Builder)]
xmlEscapeChars

-- | Take a normal string and transform it to PCDATA by escaping special characters.
-- See also: 'escape', 'xmlEscapeChars'
escaper :: [(Char, Builder)] -- ^ table of escape characters
        -> Text -- ^ String to escape
        -> Builder -- ^ Escaped String
escaper :: [(Char, Builder)] -> Text -> Builder
escaper escapeTable :: [(Char, Builder)]
escapeTable = Text -> Builder
go
    where
      escapeChars :: [Char]
escapeChars = ((Char, Builder) -> Char) -> [(Char, Builder)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Builder) -> Char
forall a b. (a, b) -> a
fst [(Char, Builder)]
escapeTable
      go :: Text -> Builder
go txt :: Text
txt | Text -> Bool
Text.null Text
txt = Builder
forall a. Monoid a => a
mempty
      go txt :: Text
txt =
          case (Char -> Bool) -> Text -> (Text, Text)
Text.break (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
escapeChars) Text
txt of
            (hd :: Text
hd,tl :: Text
tl) ->
                case Text -> Maybe (Char, Text)
uncons Text
tl of
                  Nothing -> Text -> Builder
fromLazyText Text
hd
                  (Just (c :: Char
c, tl' :: Text
tl')) -> Text -> Builder
fromLazyText Text
hd Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> [(Char, Builder)] -> Char -> Builder
pChar [(Char, Builder)]
escapeTable Char
c Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Text -> Builder
go Text
tl'

pChar :: [(Char, Builder)] -- ^ table of escape characters
      -> Char              -- ^ character to escape
      -> Builder           -- ^ escaped character
pChar :: [(Char, Builder)] -> Char -> Builder
pChar escapeChars :: [(Char, Builder)]
escapeChars c :: Char
c =
    case Char -> [(Char, Builder)] -> Maybe Builder
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Char
c [(Char, Builder)]
escapeChars of
      Nothing -> Char -> Builder
singleton Char
c
      Just s :: Builder
s  -> Char -> Builder
singleton '&' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
s Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
singleton ';'

-- This list should be extended.
xmlEscapeChars :: [(Char, Builder)]
xmlEscapeChars :: [(Char, Builder)]
xmlEscapeChars = [
	('&',	"amp"	),
	('\"',	"quot"	),
	('\'',	"apos"	),
	('<',	"lt"	),
	('>',	"gt"	)
	]
{-
-- | Take a PCDATA string and translate all escaped characters in it to the normal
-- characters they represent.
-- Does no error checking of input string, will fail if input is not valid PCDATA.
-- calls 'unescaper' with 'xmlEscapeChars'
-- See also: 'unescaper'
unescape :: String -> String
unescape = unescaper xmlEscapeChars

-- | Take a PCDATA string and translate all escaped characters in it to the normal
-- characters they represent.
-- Does no error checking of input string, will fail if input is not valid PCDATA.
-- See also: 'unescape', 'xmlEscapeChars'
unescaper :: [(Char, Builder)] -- ^ table of escape characters
          -> Text -- ^ String to unescape
          -> Builder -- ^ unescaped String
unescaper escapeChars = reverse . unE ""
  where unE acc "" = acc
  	unE acc (c:cs) =
  	  case c of
  	    '&' -> let (esc, ';':rest) = break (==';') cs
  	               Just ec = revLookup esc escapeChars
  	            in unE (ec:acc) rest
  	    _ -> unE (c:acc) cs

  	revLookup e = lookup e . map (\(a,b) -> (b,a))
-}