Package char-utf8: The UTF-8 encoding of Unicode characters

Information

namechar-utf8
version1.98
descriptionThe UTF-8 encoding of Unicode characters
authorJoe Leslie-Hurd <joe@gilith.com>
licenseMIT
requiresbool
byte
char-def
char-thm
list
natural
option
pair
parser
word16
showData.Bool
Data.Byte
Data.Byte.Bits
Data.Char
Data.Char.UTF8
Data.List
Data.Option
Data.Pair
Data.Word16
Data.Word16.Bits
Number.Natural
Parser
Parser.Stream

Files

Defined Constants

Theorems

isParser decoder.parse

inverse decoder encoder

strongInverse decoder encoder

parseContinuationByte = parseSome isContinuationByte

decoder = mkParser decoder.parse

decodeStream = parseStream decoder

destParser decoder = decoder.parse

parseTwoContinuationBytes =
  parsePair parseContinuationByte parseContinuationByte

parseThreeContinuationBytes =
  parsePair parseContinuationByte parseTwoContinuationBytes

parse decoder = case none none decoder.parse

cs. length cs length (encode cs)

cs. decode (encode cs) = some cs

bs. length (decodeStream bs) length bs

bs. decode bs = toList (decodeStream (fromList bs))

chs. encode chs = concat (map encoder chs)

bs. case decode bs of none | some cs encode cs = bs

bs. case decode bs of none | some cs length cs length bs

b. isContinuationByte b bit b 7 ¬bit b 6

p1 p0.
    encoder.encode1 p1 p0 =
    let b00 shiftLeft p1 2 in
    let b01 shiftRight (and p0 192) 6 in
    let b0 or 192 (or b00 b01) in
    let b10 and p0 63 in
    let b1 or 128 b10 in
    b0 :: b1 :: []

b0 b1.
    decoder.decode1 b0 b1 =
    let pl mkPlane 0 in
    let p1 shiftRight (and b0 28) 2 in
    let y0 shiftLeft (and b0 3) 6 in
    let x0 and b1 63 in
    let p0 or y0 x0 in
    if p1 = 0 ¬bit p0 7 then none
    else
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

ch.
    encoder ch =
    let (pl, pos) destChar ch in
    let p destPlane pl in
    let (p0, p1) toBytes (destPosition pos) in
    if p = 0 then
      if p1 = 0 ¬bit p0 7 then p0 :: []
      else if and 248 p1 = 0 then encoder.encode1 p1 p0
      else encoder.encode2 p1 p0
    else encoder.encode3 p p1 p0

b0 s.
    decoder.parse b0 s =
    if bit b0 7 then
      if bit b0 6 then
        if bit b0 5 then
          if bit b0 4 then
            if bit b0 3 then none
            else
              parse
                (partialMap (decoder.decode3 b0)
                   parseThreeContinuationBytes) s
          else
            parse
              (partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
        else
          parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
      else none
    else
      let pl mkPlane 0 in
      let pos mkPosition (fromBytes b0 0) in
      let ch mkChar (pl, pos) in
      some (ch, s)

p1 p0.
    encoder.encode2 p1 p0 =
    let b00 shiftRight (and p1 240) 4 in
    let b0 or 224 b00 in
    let b10 shiftLeft (and p1 15) 2 in
    let b11 shiftRight (and p0 192) 6 in
    let b1 or 128 (or b10 b11) in
    let b20 and p0 63 in
    let b2 or 128 b20 in
    b0 :: b1 :: b2 :: []

b0 b1 b2 b3.
    decoder.decode3 b0 (b1, b2, b3) =
    let w shiftLeft (and b0 7) 2 in
    let z shiftRight (and b1 48) 4 in
    let p or w z in
    if p = 0 16 < p then none
    else
      let pl mkPlane p in
      let z1 shiftLeft (and b1 15) 4 in
      let y1 shiftRight (and b2 60) 2 in
      let p1 or z1 y1 in
      let y0 shiftLeft (and b2 3) 6 in
      let x0 and b3 63 in
      let p0 or y0 x0 in
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

b0 b1 b2.
    decoder.decode2 b0 (b1, b2) =
    let z1 shiftLeft (and b0 15) 4 in
    let y1 shiftRight (and b1 60) 2 in
    let p1 or z1 y1 in
    if p1 < 8 216 p1 p1 223 then none
    else
      let y0 shiftLeft (and b1 3) 6 in
      let x0 and b2 63 in
      let p0 or y0 x0 in
      if p1 = 255 254 p0 then none
      else
        let pl mkPlane 0 in
        let pos mkPosition (fromBytes p0 p1) in
        let ch mkChar (pl, pos) in
        some ch

p p1 p0.
    encoder.encode3 p p1 p0 =
    let b00 shiftRight (and p 28) 2 in
    let b0 or 240 b00 in
    let b10 shiftLeft (and p 3) 4 in
    let b11 shiftRight (and p1 240) 4 in
    let b1 or 128 (or b10 b11) in
    let b20 shiftLeft (and p1 15) 2 in
    let b21 shiftRight (and p0 192) 6 in
    let b2 or 128 (or b20 b21) in
    let b30 and p0 63 in
    let b3 or 128 b30 in
    b0 :: b1 :: b2 :: b3 :: []

External Type Operators

External Constants

Assumptions

¬

¬

length [] = 0

bit0 0 = 0

x. x = x

t. t t

n. 0 n

n. n n

x. isSuffix x x

p. p

toByte [] = 0

toWord [] = 0

t. t ¬t

(¬) = λp. p

t. (x. t) t

t. (λx. t x) = t

() = λp. p = λx.

a. ¬(some a = none)

x. replicate x 0 = []

t. ¬¬t t

t. ( t) t

t. (t ) t

t. t

t. t t

t. t

t. t t

t. t t t

t. t

t. t t

t. t

t. t t

t. t

t. t t

t. t

r. destPosition (mkPosition r) = r

n. ¬(suc n = 0)

n. 0 * n = 0

m. m * 0 = 0

n. 0 + n = n

m. m + 0 = m

l. [] @ l = l

l. drop 0 l = l

l. take 0 l = []

s. append [] s = s

p. parse p eof = none

p. parse p error = none

width = 8

t. ( t) ¬t

t. (t ) ¬t

t. t ¬t

q. compare q [] [] q

q. compare q [] [] q

n. bit1 n = suc (bit0 n)

m. m 0 = 1

m. m * 1 = m

m. 1 * m = m

l. length (fromList l) = length l

l. toList (fromList l) = some l

f. zipWith f [] [] = []

m n. m m + n

width = 16

() = λp q. p q p

t. (t ) (t )

n. even (suc n) ¬even n

n. odd (suc n) ¬odd n

m. m 0 m = 0

t1 t2. (if then t1 else t2) = t2

t1 t2. (if then t1 else t2) = t1

a b. fst (a, b) = a

a b. snd (a, b) = b

b f. case b f none = b

p x. p x p ((select) p)

r. isPlane r destPlane (mkPlane r) = r

n. bit0 (suc n) = suc (suc (bit0 n))

r. isChar r destChar (mkChar r) = r

r. isParser r destParser (mkParser r) = r

x y. x = y y = x

x y. x = y y = x

h t. nth (h :: t) 0 = h

t1 t2. t1 t2 t2 t1

m n. m * n = n * m

m n. m + n = n + m

m n. m + n - n = m

x y. isProperSuffix x y isSuffix x y

p s. length (parseStream p s) length s

n. n 2 = n * n

n. 2 * n = n + n

h t. length (h :: t) = suc (length t)

m n. ¬(m < n n m)

m n. ¬(m n n < m)

m n. ¬(m n) n < m

m n. suc m n m < n

x. x = none a. x = some a

s. case toList s of none | some l length l = length s

() = λp q. (λf. f p q) = λf. f

e b f. case e b f eof = b

e b f. case e b f error = e

p. ¬(x. p x) x. ¬p x

p. ¬(x. p x) x. ¬p x

() = λp. q. (x. p x q) q

a a'. some a = some a' a = a'

w1 w2. fromByte w1 = fromByte w2 w1 = w2

pos. w. pos = mkPosition w destPosition pos = w

m n. m + suc n = suc (m + n)

m n. suc m + n = suc (m + n)

m n. suc m = suc n m = n

b f a. case b f (some a) = f a

x n. replicate x (suc n) = x :: replicate x n

t1 t2. ¬(t1 t2) ¬t1 ¬t2

t1 t2. ¬(t1 t2) ¬t1 ¬t2

w1 w2. compare (fromByte w1) (fromByte w2) w1 < w2

w1 w2. compare (fromByte w1) (fromByte w2) w1 w2

b0 b1. toWord (fromByte b0 @ fromByte b1) = fromBytes b0 b1

w1 w2. compare (fromWord w1) (fromWord w2) w1 < w2

m n. even (m * n) even m even n

m n. even (m + n) even m even n

m n. m * suc n = m + m * n

m n. m suc n = m * m n

m n. suc m * n = m * n + n

p. isPlane p p < 17

f g. (x. f x = g x) f = g

() = λp q. r. (p r) (q r) r

w1 w2. and w1 w2 = toByte (zipWith () (fromByte w1) (fromByte w2))

w1 w2. or w1 w2 = toByte (zipWith () (fromByte w1) (fromByte w2))

m n. m n m < n m = n

m n. odd (m + n) ¬(odd m odd n)

m n. m n n m m = n

l n. shiftLeft (toByte l) n = toByte (replicate n @ l)

f. fn. a b. fn (a, b) = f a b

m n. m < n d. n = m + suc d

p a s. parse p (cons a s) = destParser p a s

p q. p (x. q x) x. p q x

pl. b. isPlane b pl = mkPlane b destPlane pl = b

h t s. append (h :: t) s = cons h (append t s)

t1 t2 t3. (t1 t2) t3 t1 t2 t3

m n p. m * (n * p) = n * (m * p)

m n p. m * (n * p) = m * n * p

m n p. m + (n + p) = m + n + p

n. toByte (odd n :: fromByte (fromNatural (n div 2))) = fromNatural n

n. toWord (odd n :: fromWord (fromNatural (n div 2))) = fromNatural n

l h t. (h :: t) @ l = h :: t @ l

m n. m suc n m = suc n m n

m n. m * n = 0 m = 0 n = 0

w. b0 b1. w = fromBytes b0 b1 toBytes w = (b0, b1)

p. p 0 (n. p n p (suc n)) n. p n

b t1 t2. (if b then t1 else t2) (¬b t1) (b t2)

m n p. m * (n + p) = m * n + m * p

m n p. m (n + p) = m n * m p

m n p. (m + n) * p = m * p + n * p

x. x = error x = eof a0 a1. x = cons a0 a1

p.
    parse (parseSome p) =
    case none none (λa s. if p a then some (a, s) else none)

b f x y. f (if b then x else y) = if b then f x else f y

b f g x. (if b then f else g) x = if b then f x else g x

e b f a s. case e b f (cons a s) = f a s

p q. (x. p x q x) (x. p x) x. q x

p q. (x. p x) (x. q x) x. p x q x

p e l.
    inverse p e parseStream p (fromList (concat (map e l))) = fromList l

h t n. n < length t nth (h :: t) (suc n) = nth t n

n h t. n length t drop (suc n) (h :: t) = drop n t

m n p. m * n = m * p m = 0 n = p

m n p. m * n m * p m = 0 n p

l n. bit (toByte l) n n < width n < length l nth l n

m n p. m * n < m * p ¬(m = 0) n < p

h1 h2 t1 t2. h1 :: t1 = h2 :: t2 h1 = h2 t1 = t2

a b a' b'. (a, b) = (a', b') a = a' b = b'

p e. inverse p e x s. parse p (append (e x) s) = some (x, s)

n h t. n length t take (suc n) (h :: t) = h :: take n t

l.
    fromByte (toByte l) =
    if length l width then l @ replicate (width - length l)
    else take width l

l.
    fromWord (toWord l) =
    if length l width then l @ replicate (width - length l)
    else take width l

c.
    pl pos.
      isChar (pl, pos) c = mkChar (pl, pos) destChar c = (pl, pos)

m n q r. m = q * n + r r < n m div n = q

m n q r. m = q * n + r r < n m mod n = r

p e s.
    strongInverse p e
    case toList (parseStream p s) of
      none
    | some l toList s = some (concat (map e l))

w.
    (toByte (take 8 (fromWord w)), toByte (drop 8 (fromWord w))) =
    toBytes w

p s.
    parse p s = none
    b s'. parse p s = some (b, s') isProperSuffix s' s

p.
    isParser p
    x xs. case p x xs of none | some (y, xs') isSuffix xs' xs

p e.
    strongInverse p e
    inverse p e s x s'. parse p s = some (x, s') s = append (e x) s'

f h1 h2 t1 t2.
    length t1 = length t2
    zipWith f (h1 :: t1) (h2 :: t2) = f h1 h2 :: zipWith f t1 t2

q h1 h2 t1 t2.
    compare q (h1 :: t1) (h2 :: t2)
    compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

q h1 h2 t1 t2.
    compare q (h1 :: t1) (h2 :: t2)
    compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

b.
    x0 x1 x2 x3 x4 x5 x6 x7.
      b = toByte (x0 :: x1 :: x2 :: x3 :: x4 :: x5 :: x6 :: x7 :: [])

f p s.
    parse (partialMap f p) s =
    case parse p s of
      none none
    | some (b, s') case f b of none none | some c some (c, s')

l n.
    shiftRight (toByte l) n =
    if length l width then
      if length l n then toByte [] else toByte (drop n l)
    else if width n then toByte []
    else toByte (drop n (take width l))

pb pc s.
    parse (parsePair pb pc) s =
    case parse pb s of
      none none
    | some (b, s')
        case parse pc s' of
          none none
        | some (c, s'') some ((b, c), s'')

pl pos.
    isChar (pl, pos)
    let pli destPlane pl in
    let posi destPosition pos in
    ¬(pli = 0) posi < 55296 57343 < posi posi < 65534