Package char-utf8: The UTF-8 encoding of Unicode characters

Information

namechar-utf8
version1.51
descriptionThe UTF-8 encoding of Unicode characters
authorJoe Hurd <joe@gilith.com>
licenseMIT
requiresbool
pair
natural
option
list
byte
word16
parser
char-def
char-thm
showData.Bool
Data.Byte
Data.Byte.Bits
Data.Char
Data.Char.UTF8
Data.List
Data.Option
Data.Pair
Data.Word16
Data.Word16.Bits
Number.Natural
Parser
Parser.Stream

Files

Defined Constants

Theorems

isParser decoder.parse

inverse decoder encoder

strongInverse decoder encoder

parseContinuationByte = parseSome isContinuationByte

decoder = mkParser decoder.parse

decodeStream = parseStream decoder

destParser decoder = decoder.parse

parseTwoContinuationBytes =
  parsePair parseContinuationByte parseContinuationByte

parseThreeContinuationBytes =
  parsePair parseContinuationByte parseTwoContinuationBytes

parse decoder = case none none decoder.parse

cs. length cs length (encode cs)

cs. decode (encode cs) = some cs

bs. length (decodeStream bs) length bs

bs. decode bs = toList (decodeStream (fromList bs))

chs. encode chs = concat (map encoder chs)

bs. case decode bs of none | some cs encode cs = bs

bs. case decode bs of none | some cs length cs length bs

b. isContinuationByte b bit b 7 ¬bit b 6

p0 p1.
    encoder.encode1 p0 p1 =
    let b00 shiftLeft p0 2 in
    let b01 shiftRight (and p1 192) 6 in
    let b0 or 192 (or b00 b01) in
    let b10 and p1 63 in
    let b1 or 128 b10 in
    b0 :: b1 :: []

ch.
    encoder ch =
    let (pl, pos) destChar ch in
    let p destPlane pl in
    let (p0, p1) toBytes (destPosition pos) in
    if p = 0 then
      if p0 = 0 ¬bit p1 7 then p1 :: []
      else if and 248 p0 = 0 then encoder.encode1 p0 p1
      else encoder.encode2 p0 p1
    else encoder.encode3 p p0 p1

b0 s.
    decoder.parse b0 s =
    if bit b0 7 then
      if bit b0 6 then
        if bit b0 5 then
          if bit b0 4 then
            if bit b0 3 then none
            else
              parse
                (partialMap (decoder.decode3 b0)
                   parseThreeContinuationBytes) s
          else
            parse
              (partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
        else
          parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
      else none
    else
      let pl mkPlane 0 in
      let pos mkPosition (fromBytes 0 b0) in
      let ch mkChar (pl, pos) in
      some (ch, s)

b0 b1.
    decoder.decode1 b0 b1 =
    let pl mkPlane 0 in
    let p0 shiftRight (and b0 28) 2 in
    let y1 shiftLeft (and b0 3) 6 in
    let x1 and b1 63 in
    let p1 or y1 x1 in
    if p0 = 0 p1 < 128 then none
    else
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

p0 p1.
    encoder.encode2 p0 p1 =
    let b00 shiftRight (and p0 240) 4 in
    let b0 or 224 b00 in
    let b10 shiftLeft (and p0 15) 2 in
    let b11 shiftRight (and p1 192) 6 in
    let b1 or 128 (or b10 b11) in
    let b20 and p1 63 in
    let b2 or 128 b20 in
    b0 :: b1 :: b2 :: []

b0 b1 b2 b3.
    decoder.decode3 b0 (b1, b2, b3) =
    let w shiftLeft (and b0 7) 2 in
    let z shiftRight (and b1 48) 4 in
    let p or w z in
    if p = 0 16 < p then none
    else
      let pl mkPlane p in
      let z0 shiftLeft (and b1 15) 4 in
      let y0 shiftRight (and b2 60) 2 in
      let p0 or z0 y0 in
      let y1 shiftLeft (and b2 3) 6 in
      let x1 and b3 63 in
      let p1 or y1 x1 in
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

b0 b1 b2.
    decoder.decode2 b0 (b1, b2) =
    let z0 shiftLeft (and b0 15) 4 in
    let y0 shiftRight (and b1 60) 2 in
    let p0 or z0 y0 in
    if p0 < 8 216 p0 p0 223 then none
    else
      let y1 shiftLeft (and b1 3) 6 in
      let x1 and b2 63 in
      let p1 or y1 x1 in
      if p0 = 255 254 p1 then none
      else
        let pl mkPlane 0 in
        let pos mkPosition (fromBytes p0 p1) in
        let ch mkChar (pl, pos) in
        some ch

p p0 p1.
    encoder.encode3 p p0 p1 =
    let b00 shiftRight (and p 28) 2 in
    let b0 or 240 b00 in
    let b10 shiftLeft (and p 3) 4 in
    let b11 shiftRight (and p0 240) 4 in
    let b1 or 128 (or b10 b11) in
    let b20 shiftLeft (and p0 15) 2 in
    let b21 shiftRight (and p1 192) 6 in
    let b2 or 128 (or b20 b21) in
    let b30 and p1 63 in
    let b3 or 128 b30 in
    b0 :: b1 :: b2 :: b3 :: []

Input Type Operators

Input Constants

Assumptions

¬

¬

length [] = 0

bit0 0 = 0

x. x = x

t. t t

n. 0 n

x. isSuffix x x

p. p

toByte [] = 0

toWord [] = 0

t. t ¬t

(¬) = λp. p

t. (x. t) t

t. (λx. t x) = t

() = λp. p = λx.

a'. ¬(none = some a')

x. replicate 0 x = []

t. ¬¬t t

t. ( t) t

t. (t ) t

t. t

t. t t

t. t

t. t t

t. t t t

t. t

t. t t

t. t

t. t t

t. t

t. t t

t. t

r. destPosition (mkPosition r) = r

n. ¬(suc n = 0)

n. 0 * n = 0

m. m * 0 = 0

n. 0 + n = n

m. m + 0 = m

l. [] @ l = l

l. drop 0 l = l

l. take 0 l = []

s. append [] s = s

p. parse p eof = none

p. parse p error = none

width = 8

t. ( t) ¬t

t. (t ) ¬t

t. t ¬t

q. compare q [] [] q

q. compare q [] [] q

n. bit1 n = suc (bit0 n)

m. m * 1 = m

m. 1 * m = m

l. length (fromList l) = length l

l. toList (fromList l) = some l

f. zipWith f [] [] = []

width = 16

() = λp q. p q p

t. (t ) (t )

n. even (suc n) ¬even n

n. odd (suc n) ¬odd n

m. m 0 m = 0

t1 t2. (if then t1 else t2) = t2

t1 t2. (if then t1 else t2) = t1

x y. fst (x, y) = x

x y. snd (x, y) = y

b f. case b f none = b

p x. p x p ((select) p)

r. isPlane r destPlane (mkPlane r) = r

n. bit0 (suc n) = suc (suc (bit0 n))

r. isChar r destChar (mkChar r) = r

r. isParser r destParser (mkParser r) = r

x y. x = y y = x

x y. x = y y = x

h t. nth 0 (h :: t) = h

t1 t2. t1 t2 t2 t1

m n. m + n = n + m

m n. m + n - n = m

x y. isProperSuffix x y isSuffix x y

p s. length (parseStream p s) length s

n. 2 * n = n + n

h t. length (h :: t) = suc (length t)

m n. ¬(m < n n m)

m n. ¬(m n n < m)

m n. ¬(m n) n < m

m n. suc m n m < n

x. x = none a. x = some a

s. case toList s of none | some l length l = length s

() = λp q. (λf. f p q) = λf. f

e b f. case e b f eof = b

e b f. case e b f error = e

p. ¬(x. p x) x. ¬p x

() = λp. q. (x. p x q) q

a a'. some a = some a' a = a'

w1 w2. fromByte w1 = fromByte w2 w1 = w2

pos. w. pos = mkPosition w destPosition pos = w

m n. m + suc n = suc (m + n)

m n. suc m + n = suc (m + n)

m n. suc m = suc n m = n

b f a. case b f (some a) = f a

x n. replicate (suc n) x = x :: replicate n x

w1 w2. compare (fromByte w1) (fromByte w2) w1 < w2

w1 w2. compare (fromByte w1) (fromByte w2) w1 w2

b0 b1. toWord (fromByte b1 @ fromByte b0) = fromBytes b0 b1

w1 w2. compare (fromWord w1) (fromWord w2) w1 < w2

m n. even (m * n) even m even n

m n. even (m + n) even m even n

m n. m * suc n = m + m * n

m n. suc m * n = m * n + n

p. isPlane p p < 17

f g. (x. f x = g x) f = g

() = λp q. r. (p r) (q r) r

w1 w2. and w1 w2 = toByte (zipWith () (fromByte w1) (fromByte w2))

w1 w2. or w1 w2 = toByte (zipWith () (fromByte w1) (fromByte w2))

m n. m n m < n m = n

m n. odd (m + n) ¬(odd m odd n)

m n. m n n m m = n

l n. shiftLeft (toByte l) n = toByte (replicate n @ l)

PAIR'. fn. a0 a1. fn (a0, a1) = PAIR' a0 a1

p a s. parse p (stream a s) = destParser p a s

p q. p (x. q x) x. p q x

pl. b. isPlane b pl = mkPlane b destPlane pl = b

h t s. append (h :: t) s = stream h (append t s)

t1 t2 t3. (t1 t2) t3 t1 t2 t3

m n p. m + (n + p) = m + n + p

n. toByte (odd n :: fromByte (fromNatural (n div 2))) = fromNatural n

n. toWord (odd n :: fromWord (fromNatural (n div 2))) = fromNatural n

l h t. (h :: t) @ l = h :: t @ l

m n. m suc n m = suc n m n

m n. m * n = 0 m = 0 n = 0

w. b0 b1. w = fromBytes b0 b1 toBytes w = (b0, b1)

P. P 0 (n. P n P (suc n)) n. P n

b t1 t2. (if b then t1 else t2) (¬b t1) (b t2)

m n p. m * (n + p) = m * n + m * p

m n p. (m + n) * p = m * p + n * p

x. x = error x = eof a0 a1. x = stream a0 a1

p.
    parse (parseSome p) =
    case none none (λa s. if p a then some (a, s) else none)

b f x y. f (if b then x else y) = if b then f x else f y

b f g x. (if b then f else g) x = if b then f x else g x

e b f a s. case e b f (stream a s) = f a s

p q. (x. p x q x) (x. p x) x. q x

p q. (x. p x) (x. q x) x. p x q x

p e l.
    inverse p e parseStream p (fromList (concat (map e l))) = fromList l

h t n. n < length t nth (suc n) (h :: t) = nth n t

n h t. n length t drop (suc n) (h :: t) = drop n t

m n p. m * n = m * p m = 0 n = p

m n p. m * n m * p m = 0 n p

l n. bit (toByte l) n n < width n < length l nth n l

m n p. m * n < m * p ¬(m = 0) n < p

h1 h2 t1 t2. h1 :: t1 = h2 :: t2 h1 = h2 t1 = t2

x y a b. (x, y) = (a, b) x = a y = b

p e. inverse p e x s. parse p (append (e x) s) = some (x, s)

n h t. n length t take (suc n) (h :: t) = h :: take n t

l.
    fromByte (toByte l) =
    if length l width then l @ replicate (width - length l)
    else take width l

l.
    fromWord (toWord l) =
    if length l width then l @ replicate (width - length l)
    else take width l

c.
    pl pos.
      isChar (pl, pos) c = mkChar (pl, pos) destChar c = (pl, pos)

m n q r. m = q * n + r r < n m div n = q

m n q r. m = q * n + r r < n m mod n = r

p e s.
    strongInverse p e
    case toList (parseStream p s) of
      none
    | some l toList s = some (concat (map e l))

w.
    (toByte (drop 8 (fromWord w)), toByte (take 8 (fromWord w))) =
    toBytes w

p s.
    parse p s = none
    b s'. parse p s = some (b, s') isProperSuffix s' s

p.
    isParser p
    x xs. case p x xs of none | some (y, xs') isSuffix xs' xs

p e.
    strongInverse p e
    inverse p e s x s'. parse p s = some (x, s') s = append (e x) s'

f h1 h2 t1 t2.
    length t1 = length t2
    zipWith f (h1 :: t1) (h2 :: t2) = f h1 h2 :: zipWith f t1 t2

q h1 h2 t1 t2.
    compare q (h1 :: t1) (h2 :: t2)
    compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

q h1 h2 t1 t2.
    compare q (h1 :: t1) (h2 :: t2)
    compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

b.
    x0 x1 x2 x3 x4 x5 x6 x7.
      b = toByte (x0 :: x1 :: x2 :: x3 :: x4 :: x5 :: x6 :: x7 :: [])

f p s.
    parse (partialMap f p) s =
    case parse p s of
      none none
    | some (b, s') case f b of none none | some c some (c, s')

l n.
    shiftRight (toByte l) n =
    if length l width then
      if length l n then toByte [] else toByte (drop n l)
    else if width n then toByte []
    else toByte (drop n (take width l))

pb pc s.
    parse (parsePair pb pc) s =
    case parse pb s of
      none none
    | some (b, s')
        case parse pc s' of
          none none
        | some (c, s'') some ((b, c), s'')

pl pos.
    isChar (pl, pos)
    let pli destPlane pl in
    let posi destPosition pos in
    ¬(pli = 0) posi < 55296 57343 < posi posi < 65534