Package char-utf8: The UTF-8 encoding of Unicode characters

Information

namechar-utf8
version1.115
descriptionThe UTF-8 encoding of Unicode characters
authorJoe Leslie-Hurd <joe@gilith.com>
licenseMIT
checksum05626571180d834245631e7e0ac05ab643088ba0
requiresbase
byte
char-def
char-thm
natural-bits
parser
showData.Bool
Data.Byte
Data.Char
Data.List
Data.Option
Data.Pair
Data.Sum
Number.Natural
Parser
Parser.Stream

Files

Defined Constants

Theorems

inverse UTF8.parseChar UTF8.encodeChar

strongInverse UTF8.parseChar UTF8.encodeChar

UTF8.parseNatural = orelse UTF8.parseAscii UTF8.parseMultibyte

b. UTF8.reencode (UTF8.decode b) = b

c. 1 length (UTF8.encodeChar c)

c. UTF8.reencodeChar (right c) = UTF8.encodeChar c

n. length (UTF8.encodeAscii n) = 1

b. length (UTF8.decode b) length b

b. UTF8.reencode (map left b) = b

c. length c length (UTF8.encode c)

c. 1 length (UTF8.reencodeChar c)

c. length c length (UTF8.reencode c)

b. UTF8.reencodeChar (left b) = b :: []

n. UTF8.encodeAscii n = fromNatural n :: []

n. length (UTF8.encodeChar.encode2 n) = 2

n. length (UTF8.encodeChar.encode3 n) = 3

c. UTF8.encode c = concat (map UTF8.encodeChar c)

c. UTF8.decode (UTF8.encode c) = map right c

c. UTF8.reencode (map right c) = UTF8.encode c

c. UTF8.reencode c = concat (map UTF8.reencodeChar c)

UTF8.parse = orelse (map UTF8.parseChar right) (map any left)

n. length (UTF8.encodeChar.encode4 n) = 4

b. UTF8.decode b = fst (toList (parse UTF8.parse (fromList b)))

n b. apply UTF8.parseAscii (append (UTF8.encodeChar.encode2 n) b) = none

n b. apply UTF8.parseAscii (append (UTF8.encodeChar.encode3 n) b) = none

n b. apply UTF8.parseAscii (append (UTF8.encodeChar.encode4 n) b) = none

UTF8.parseChar =
  mapPartial UTF8.parseNatural
    (λn. if invariant n then some (mk n) else none)

x.
    UTF8.reencodeChar x =
    case x of left b b :: [] | right c UTF8.encodeChar c

UTF8.parseAscii =
  token (λb. if bit b 7 then none else some (toNatural b))

b. UTF8.isContinuationByte b bit b 7 ¬bit b 6

n. UTF8.isContinuationByte (or 128 (fromNatural (Bits.bound n 6)))

n b.
    n < 128
    apply UTF8.parseAscii (append (UTF8.encodeAscii n) b) = some (n, b)

b n.
    UTF8.parseMultibyte.addContinuationByte b n =
    if UTF8.isContinuationByte b then
      some (toNatural (and b 63) + Bits.shiftLeft n 6)
    else none

b.
    UTF8.parseMultibyte.parse2 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 0
         (toNatural (and b 31))) (λn. 128 n)

b.
    UTF8.parseMultibyte.parse3 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 1
         (toNatural (and b 15))) (λn. 2048 n)

n m.
    UTF8.parseMultibyte.addContinuationByte
      (or 128 (fromNatural (Bits.bound n 6))) m =
    some (Bits.bound n 6 + Bits.shiftLeft m 6)

b.
    UTF8.parseMultibyte.parse4 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 2
         (toNatural (and b 7))) (λn. 65536 n)

n b.
    128 n n < 2048
    apply UTF8.parseMultibyte (append (UTF8.encodeChar.encode2 n) b) =
    some (n, b)

UTF8.parseMultibyte =
  sequence
    (token
       (λb.
          if bit b 6 then
            if bit b 5 then
              if bit b 4 then
                if bit b 3 then none
                else some (UTF8.parseMultibyte.parse4 b)
              else some (UTF8.parseMultibyte.parse3 b)
            else some (UTF8.parseMultibyte.parse2 b)
          else none))

n b.
    65536 n Bits.width n 21
    apply UTF8.parseMultibyte (append (UTF8.encodeChar.encode4 n) b) =
    some (n, b)

n.
    UTF8.encodeChar.encode2 n =
    let n1 Bits.shiftRight n 6 in
    let b0 or 192 (fromNatural n1) in
    let b1 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: []

n b.
    2048 n n < 65536
    apply UTF8.parseMultibyte (append (UTF8.encodeChar.encode3 n) b) =
    some (n, b)

UTF8.encodeChar =
  λc.
    let n dest c in
    if n < 128 then UTF8.encodeAscii n
    else if n < 2048 then UTF8.encodeChar.encode2 n
    else if n < 65536 then UTF8.encodeChar.encode3 n
    else UTF8.encodeChar.encode4 n

c.
    UTF8.encodeChar c =
    let n dest c in
    if n < 128 then UTF8.encodeAscii n
    else if n < 2048 then UTF8.encodeChar.encode2 n
    else if n < 65536 then UTF8.encodeChar.encode3 n
    else UTF8.encodeChar.encode4 n

n.
    UTF8.encodeChar.encode3 n =
    let n1 Bits.shiftRight n 6 in
    let n2 Bits.shiftRight n1 6 in
    let b0 or 224 (fromNatural n2) in
    let b1 or 128 (fromNatural (Bits.bound n1 6)) in
    let b2 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: b2 :: []

n.
    UTF8.encodeChar.encode4 n =
    let n1 Bits.shiftRight n 6 in
    let n2 Bits.shiftRight n1 6 in
    let n3 Bits.shiftRight n2 6 in
    let b0 or 240 (fromNatural n3) in
    let b1 or 128 (fromNatural (Bits.bound n2 6)) in
    let b2 or 128 (fromNatural (Bits.bound n1 6)) in
    let b3 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: b2 :: b3 :: []

External Type Operators

External Constants

Assumptions

Bits.head 1

¬

¬

length [] = 0

bit0 0 = 0

Bits.fromList [] = 0

concat [] = []

fromList [] = eof

t. t t

n. 0 n

n. n n

xs. isSuffix xs xs

p. p

t. t ¬t

m. ¬(m < 0)

n. ¬(n < n)

n. 0 < suc n

n. n < suc n

(¬) = λp. p

t. (x. t) t

t. (λx. t x) = t

() = λp. p = λx.

a. ¬(some a = none)

x. replicate x 0 = []

t. ¬¬t t

t. ( t) t

t. (t ) t

t. t

t. t t

t. t

t. t t

t. t t t

t. t

t. t t

t. t

t. t t

t. t

t. t t

t. t

x. fromNatural (toNatural x) = x

n. ¬(suc n = 0)

n. 0 * n = 0

m. m * 0 = 0

n. 0 + n = n

m. m + 0 = m

m. m - 0 = m

n. n - n = 0

n. min n n = n

n. Bits.or 0 n = n

l. [] @ l = l

f. map f [] = []

f. map f none = none

p. parse p eof = eof

p. parse p error = error

p. apply p eof = none

p. apply p error = none

width = 8

t. ( t) ¬t

t. (t ) ¬t

t. t ¬t

q. Bits.compare q [] [] q

n. bit1 n = suc (bit0 n)

n. Bits.bit n 0 Bits.head n

m. m 0 = 1

m. m * 1 = m

m. 1 * m = m

l. length (fromList l) = length l

l. Bits.toByte l = fromNatural (Bits.fromList l)

m n. m m + n

n k. Bits.bound n k n

() = λp q. p q p

t. (t ) (t )

w. Bits.bound (toNatural w) width = toNatural w

n. even (suc n) ¬even n

m. m 0 m = 0

n. toNatural (fromNatural n) = Bits.bound n width

l. toList (fromList l) = (l, )

x. (fst x, snd x) = x

t1 t2. (if then t1 else t2) = t2

t1 t2. (if then t1 else t2) = t1

a b. fst (a, b) = a

b f. case b f none = b

n k. Bits.width (Bits.bound n k) k

p x. p x p ((select) p)

r. invariant r dest (mk r) = r

n. bit0 (suc n) = suc (suc (bit0 n))

f y. (let x y in f x) = f y

x. a b. x = (a, b)

x y. x = y y = x

x y. x = y y = x

h t. nth (h :: t) 0 = h

t1 t2. t1 t2 t2 t1

m n. m * n = n * m

m n. m + n = n + m

m n. m + n - m = n

m n. m + n - n = m

n k. Bits.bound (Bits.shiftLeft n k) k = 0

n k. Bits.shiftRight (Bits.bound n k) k = 0

f l. length (map f l) = length l

p xs. length (parse p xs) length xs

n. Bits.cons n = 2 * n

n. n 2 = n * n

n. 2 * n = n + n

h t. length (h :: t) = suc (length t)

w n. bit w n Bits.bit (toNatural w) n

m n. ¬(m < n n m)

m n. ¬(m n n < m)

n i. Bits.bit n i i < Bits.width n

m n. ¬(m < n) n m

m n. ¬(m n) n < m

m n. m < suc n m n

m n. suc m n m < n

f a. map f (some a) = some (f a)

() = λp q. (λf. f p q) = λf. f

apply any = case none none (λx xs. some (x, xs))

k. Bits.shiftLeft 1 k = 2 k

p. ¬(x. p x) x. ¬p x

p. ¬(x. p x) x. ¬p x

() = λp. q. (x. p x q) q

a b. some a = some b a = b

h t. (h :: []) @ t = h :: t

t1 t2. ¬t1 ¬t2 t2 t1

h t. Bits.fromList (h :: t) = Bits.cons h (Bits.fromList t)

x y. x < y toNatural x < toNatural y

m n. m + suc n = suc (m + n)

m n. suc m + n = suc (m + n)

m n. suc m = suc n m = n

m n. suc m < suc n m < n

m n. suc m suc n m n

n k. Bits.bound (Bits.bound n k) k = Bits.bound n k

l1 l2. fromList (l1 @ l2) = append l1 (fromList l2)

h t. concat (h :: t) = h @ concat t

b f a. case b f (some a) = f a

k. Bits.width (2 k - 1) = k

f g a. case f g (left a) = f a

f g b. case f g (right b) = g b

t1 t2. ¬(t1 t2) ¬t1 ¬t2

w1 w2. and w1 w2 = fromNatural (Bits.and (toNatural w1) (toNatural w2))

w1 w2. or w1 w2 = fromNatural (Bits.or (toNatural w1) (toNatural w2))

w1 w2. Bits.compare (Bits.fromByte w1) (Bits.fromByte w2) w1 < w2

m n. min m n = if m n then m else n

m n. even (m * n) even m even n

m n. even (m + n) even m even n

m n. m * suc n = m + m * n

m n. m suc n = m * m n

m n. suc m * n = m * n + n

n k. Bits.bound n k = n Bits.width n k

l1 l2. length (l1 @ l2) = length l1 + length l2

n. invariant n Bits.width n 21

n. Bits.cons n = 1 + 2 * n

xs y ys. isProperSuffix xs (cons y ys) isSuffix xs ys

f g. (x. f x = g x) f = g

x. (a. x = left a) b. x = right b

() = λp q. r. (p r) (q r) r

m n. m n m < n m = n

n k. Bits.shiftLeft n k = 2 k * n

m n. m n n m m = n

n k. Bits.bound n k + Bits.shiftLeft (Bits.shiftRight n k) k = n

f. fn. a b. fn (a, b) = f a b

m n. m < n d. n = m + suc d

p x xs. apply p (cons x xs) = dest p x xs

p q. p (x. q x) x. p q x

c. n. invariant n c = mk n dest c = n

n k. Bits.width n k n < 2 k

x y z. x = y y = z x = z

t1 t2 t3. (t1 t2) t3 t1 t2 t3

n k i. Bits.bit n (k + i) Bits.bit (Bits.shiftRight n k) i

m n p. m * (n * p) = n * (m * p)

m n p. m * (n * p) = m * n * p

m n p. m + (n + p) = m + n + p

n k1 k2.
    Bits.shiftRight n (k1 + k2) = Bits.shiftRight (Bits.shiftRight n k1) k2

n j k. Bits.bound (Bits.bound n j) k = Bits.bound n (min j k)

p m n. m + p = n + p m = n

m n p. n + m p + m n p

n1 n2 k. Bits.bound (n1 + Bits.shiftLeft n2 k) k = Bits.bound n1 k

m n p. m < n n p m < p

m n p. m n n p m p

l h t. (h :: t) @ l = h :: t @ l

m n. m suc n m = suc n m n

m n. m * n = 0 m = 0 n = 0

l i. Bits.bit (Bits.fromList l) i i < length l nth l i

xs ys e. toList xs = (ys, e) length xs = length ys

f h t. map f (h :: t) = f h :: map f t

p. p 0 (n. p n p (suc n)) n. p n

n k. Bits.and n (2 k - 1) = Bits.bound n k

f s. apply (fold f s) = case none none (λx xs. fold.prs f s x xs)

k w1 w2. bit (or w1 w2) k bit w1 k bit w2 k

n i k. Bits.bit (Bits.bound n k) i i < k Bits.bit n i

m n i. Bits.bit (Bits.or m n) i Bits.bit m i Bits.bit n i

m n p. m * (n + p) = m * n + m * p

m n p. m (n + p) = m n * m p

m n p. (m + n) * p = m * p + n * p

m n k.
    Bits.bound (Bits.and m n) k =
    Bits.and (Bits.bound m k) (Bits.bound n k)

m n k.
    Bits.bound (Bits.or m n) k = Bits.or (Bits.bound m k) (Bits.bound n k)

n j k. Bits.width n j + k Bits.width (Bits.shiftRight n k) j

n1 n2 k.
    Bits.shiftRight (n1 + Bits.shiftLeft n2 k) k =
    Bits.shiftRight n1 k + n2

xs. xs = error xs = eof x xt. xs = cons x xt

f.
    apply (token f) =
    case none none (λx xs. case f x of none none | some y some (y, xs))

p. (xs. (ys. isProperSuffix ys xs p ys) p xs) xs. p xs

p q. (x. p x q x) (x. p x) x. q x

p q. (x. p x) (x. q x) x. p x q x

p. p [] (h t. p t p (h :: t)) l. p l

h t n. n < length t nth (h :: t) (suc n) = nth t n

w1 w2. (i. i < width (bit w1 i bit w2 i)) w1 = w2

n k i. Bits.bit (Bits.shiftLeft n k) i k i Bits.bit n (i - k)

m n p. m * n = m * p m = 0 n = p

m n p. m * n m * p m = 0 n p

l n. bit (Bits.toByte l) n n < width n < length l nth l n

p1 p2 xs.
    apply (orelse p1 p2) xs =
    case apply p1 xs of none apply p2 xs | some yys some yys

m n p. m * n < m * p ¬(m = 0) n < p

h1 h2 t1 t2. h1 :: t1 = h2 :: t2 h1 = h2 t1 = t2

a b a' b'. (a, b) = (a', b') a = a' b = b'

x xs y ys. cons x xs = cons y ys x = y xs = ys

p e. inverse p e x xs. apply p (append (e x) xs) = some (x, xs)

l.
    Bits.fromByte (Bits.toByte l) =
    if length l width then l @ replicate (width - length l)
    else take width l

p c x y. p (if c then x else y) (c p x) (¬c p y)

m n j k.
    Bits.width (Bits.bound m k + Bits.shiftLeft n k) j + k
    Bits.width n j

l xs ys e. toList (append l xs) = (l @ ys, e) toList xs = (ys, e)

(xs. append [] xs = xs)
  h t xs. append (h :: t) xs = cons h (append t xs)

p xs.
    apply p xs = none
    y ys. apply p xs = some (y, ys) isProperSuffix ys xs

x xs. toList (cons x xs) = let (l, e) toList xs in (x :: l, e)

p xs.
    apply (sequence p) xs =
    case apply p xs of none none | some (y, ys) apply y ys

p f xs.
    apply (map p f) xs =
    case apply p xs of none none | some (y, ys) some (f y, ys)

p e.
    strongInverse p e
    inverse p e
    xs y ys. apply p xs = some (y, ys) xs = append (e y) ys

p x xs.
    parse p (cons x xs) =
    case dest p x xs of none error | some (y, ys) cons y (parse p ys)

q h1 h2 t1 t2.
    Bits.compare q (h1 :: t1) (h2 :: t2)
    Bits.compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

p xs.
    parse p xs =
    case apply p xs of
      none case xs of error error | eof eof | cons y ys error
    | some (y, ys) cons y (parse p ys)

p f xs.
    apply (filter p f) xs =
    case apply p xs of
      none none
    | some (y, ys) if f y then some (y, ys) else none

b.
    x0 x1 x2 x3 x4 x5 x6 x7.
      b = Bits.toByte (x0 :: x1 :: x2 :: x3 :: x4 :: x5 :: x6 :: x7 :: [])

p f xs.
    apply (mapPartial p f) xs =
    case apply p xs of
      none none
    | some (y, ys) case f y of none none | some z some (z, ys)

f s x xs.
    fold.prs f s x xs =
    case f x s of
      none none
    | some y
        case y of
          left z some (z, xs)
        | right t
            case xs of
              error none
            | eof none
            | cons z zs fold.prs f t z zs

toList error = ([], ) toList eof = ([], )
  x xs. toList (cons x xs) = let (l, e) toList xs in (x :: l, e)

f n s.
    foldN f n s =
    fold
      (λx (m, t).
         map (λu. if m = 0 then left u else right (m - 1, u)) (f x t))
      (n, s)

(e b f. case e b f error = e) (e b f. case e b f eof = b)
  e b f x xs. case e b f (cons x xs) = f x xs