Package char-utf8-thm: Properties of the UTF-8 encoding of Unicode characters

Information

namechar-utf8-thm
version1.121
descriptionProperties of the UTF-8 encoding of Unicode characters
authorJoe Leslie-Hurd <joe@gilith.com>
licenseMIT
provenanceHOL Light theory extracted on 2015-04-21
checksume1c1f0ed42c3640d02f5935999c66c106fdaae64
requiresbase
byte
char-def
char-thm
char-utf8-def
natural-bits
parser
showData.Bool
Data.Byte
Data.Char
Data.List
Data.Option
Data.Pair
Data.Sum
Number.Natural
Parser
Parser.Stream

Files

Theorems

inverse UTF8.parseChar UTF8.encodeChar

strongInverse UTF8.parseChar UTF8.encodeChar

b. UTF8.reencode (UTF8.decode b) = b

c. 1 length (UTF8.encodeChar c)

c. UTF8.reencodeChar (right c) = UTF8.encodeChar c

n. length (UTF8.encodeAscii n) = 1

b. length (UTF8.decode b) length b

b. UTF8.reencode (map left b) = b

c. length c length (UTF8.encode c)

c. 1 length (UTF8.reencodeChar c)

c. length c length (UTF8.reencode c)

b. UTF8.reencodeChar (left b) = b :: []

n. length (UTF8.encodeChar.encode2 n) = 2

n. length (UTF8.encodeChar.encode3 n) = 3

c. UTF8.decode (UTF8.encode c) = map right c

c. UTF8.reencode (map right c) = UTF8.encode c

n. length (UTF8.encodeChar.encode4 n) = 4

n b. apply UTF8.parseAscii (append (UTF8.encodeChar.encode2 n) b) = none

n b. apply UTF8.parseAscii (append (UTF8.encodeChar.encode3 n) b) = none

n b. apply UTF8.parseAscii (append (UTF8.encodeChar.encode4 n) b) = none

n. UTF8.isContinuationByte (or 128 (fromNatural (Bits.bound n 6)))

n b.
    n < 128
    apply UTF8.parseAscii (append (UTF8.encodeAscii n) b) = some (n, b)

n m.
    UTF8.parseMultibyte.addContinuationByte
      (or 128 (fromNatural (Bits.bound n 6))) m =
    some (Bits.bound n 6 + Bits.shiftLeft m 6)

n b.
    128 n n < 2048
    apply UTF8.parseMultibyte (append (UTF8.encodeChar.encode2 n) b) =
    some (n, b)

n b.
    65536 n Bits.width n 21
    apply UTF8.parseMultibyte (append (UTF8.encodeChar.encode4 n) b) =
    some (n, b)

n b.
    2048 n n < 65536
    apply UTF8.parseMultibyte (append (UTF8.encodeChar.encode3 n) b) =
    some (n, b)

UTF8.encodeChar =
  λc.
    let n dest c in
    if n < 128 then UTF8.encodeAscii n
    else if n < 2048 then UTF8.encodeChar.encode2 n
    else if n < 65536 then UTF8.encodeChar.encode3 n
    else UTF8.encodeChar.encode4 n

External Type Operators

External Constants

Assumptions

Bits.head 1

¬

¬

length [] = 0

bit0 0 = 0

Bits.fromList [] = 0

concat [] = []

fromList [] = eof

t. t t

n. 0 n

n. n n

xs. isSuffix xs xs

p. p

UTF8.parseNatural = orelse UTF8.parseAscii UTF8.parseMultibyte

t. t ¬t

m. ¬(m < 0)

n. ¬(n < n)

n. 0 < suc n

n. n < suc n

(¬) = λp. p

t. (x. t) t

t. (λx. t x) = t

() = λp. p = λx.

a. ¬(some a = none)

x. replicate x 0 = []

t. ¬¬t t

t. ( t) t

t. (t ) t

t. t

t. t t

t. t

t. t t

t. t t t

t. t

t. t t

t. t

t. t t

t. t

t. t t

t. t

x. fromNatural (toNatural x) = x

n. ¬(suc n = 0)

n. 0 * n = 0

m. m * 0 = 0

n. 0 + n = n

m. m + 0 = m

m. m - 0 = m

n. n - n = 0

n. min n n = n

n. Bits.or 0 n = n

l. [] @ l = l

f. map f [] = []

f. map f none = none

p. parse p eof = eof

p. parse p error = error

p. apply p eof = none

p. apply p error = none

width = 8

t. ( t) ¬t

t. (t ) ¬t

t. t ¬t

q. Bits.compare q [] [] q

n. bit1 n = suc (bit0 n)

n. Bits.bit n 0 Bits.head n

m. m 0 = 1

m. m * 1 = m

m. 1 * m = m

l. length (fromList l) = length l

l. Bits.toByte l = fromNatural (Bits.fromList l)

m n. m m + n

n k. Bits.bound n k n

() = λp q. p q p

t. (t ) (t )

w. Bits.bound (toNatural w) width = toNatural w

n. UTF8.encodeAscii n = fromNatural n :: []

n. even (suc n) ¬even n

m. m 0 m = 0

n. toNatural (fromNatural n) = Bits.bound n width

l. toList (fromList l) = (l, )

c. UTF8.encode c = concat (map UTF8.encodeChar c)

x. (fst x, snd x) = x

c. UTF8.reencode c = concat (map UTF8.reencodeChar c)

t1 t2. (if then t1 else t2) = t2

t1 t2. (if then t1 else t2) = t1

a b. fst (a, b) = a

b f. case b f none = b

n k. Bits.width (Bits.bound n k) k

p x. p x p ((select) p)

UTF8.parse = orelse (map UTF8.parseChar right) (map any left)

r. invariant r dest (mk r) = r

n. bit0 (suc n) = suc (suc (bit0 n))

f y. (let x y in f x) = f y

x. a b. x = (a, b)

x y. x = y y = x

x y. x = y y = x

h t. nth (h :: t) 0 = h

t1 t2. t1 t2 t2 t1

m n. m * n = n * m

m n. m + n = n + m

m n. m + n - m = n

m n. m + n - n = m

n k. Bits.bound (Bits.shiftLeft n k) k = 0

n k. Bits.shiftRight (Bits.bound n k) k = 0

f l. length (map f l) = length l

p xs. length (parse p xs) length xs

n. Bits.cons n = 2 * n

n. n 2 = n * n

n. 2 * n = n + n

b. UTF8.decode b = fst (toList (parse UTF8.parse (fromList b)))

h t. length (h :: t) = suc (length t)

w n. bit w n Bits.bit (toNatural w) n

m n. ¬(m < n n m)

m n. ¬(m n n < m)

n i. Bits.bit n i i < Bits.width n

m n. ¬(m < n) n m

m n. ¬(m n) n < m

m n. m < suc n m n

m n. suc m n m < n

f a. map f (some a) = some (f a)

() = λp q. (λf. f p q) = λf. f

apply any = case none none (λx xs. some (x, xs))

k. Bits.shiftLeft 1 k = 2 k

p. ¬(x. p x) x. ¬p x

p. ¬(x. p x) x. ¬p x

UTF8.parseChar =
  mapPartial UTF8.parseNatural
    (λn. if invariant n then some (mk n) else none)

() = λp. q. (x. p x q) q

a b. some a = some b a = b

h t. (h :: []) @ t = h :: t

t1 t2. ¬t1 ¬t2 t2 t1

h t. Bits.fromList (h :: t) = Bits.cons h (Bits.fromList t)

x y. x < y toNatural x < toNatural y

m n. m + suc n = suc (m + n)

m n. suc m + n = suc (m + n)

m n. suc m = suc n m = n

m n. suc m < suc n m < n

m n. suc m suc n m n

n k. Bits.bound (Bits.bound n k) k = Bits.bound n k

l1 l2. fromList (l1 @ l2) = append l1 (fromList l2)

h t. concat (h :: t) = h @ concat t

b f a. case b f (some a) = f a

k. Bits.width (2 k - 1) = k

f g a. case f g (left a) = f a

f g b. case f g (right b) = g b

x.
    UTF8.reencodeChar x =
    case x of left b b :: [] | right c UTF8.encodeChar c

t1 t2. ¬(t1 t2) ¬t1 ¬t2

w1 w2. and w1 w2 = fromNatural (Bits.and (toNatural w1) (toNatural w2))

w1 w2. or w1 w2 = fromNatural (Bits.or (toNatural w1) (toNatural w2))

w1 w2. Bits.compare (Bits.fromByte w1) (Bits.fromByte w2) w1 < w2

m n. min m n = if m n then m else n

m n. even (m * n) even m even n

m n. even (m + n) even m even n

m n. m * suc n = m + m * n

m n. m suc n = m * m n

m n. suc m * n = m * n + n

n k. Bits.bound n k = n Bits.width n k

l1 l2. length (l1 @ l2) = length l1 + length l2

n. invariant n Bits.width n 21

n. Bits.cons n = 1 + 2 * n

xs y ys. isProperSuffix xs (cons y ys) isSuffix xs ys

f g. (x. f x = g x) f = g

x. (a. x = left a) b. x = right b

() = λp q. r. (p r) (q r) r

m n. m n m < n m = n

n k. Bits.shiftLeft n k = 2 k * n

m n. m n n m m = n

n k. Bits.bound n k + Bits.shiftLeft (Bits.shiftRight n k) k = n

f. fn. a b. fn (a, b) = f a b

m n. m < n d. n = m + suc d

p x xs. apply p (cons x xs) = dest p x xs

UTF8.parseAscii =
  token (λb. if bit b 7 then none else some (toNatural b))

p q. p (x. q x) x. p q x

c. n. invariant n c = mk n dest c = n

n k. Bits.width n k n < 2 k

x y z. x = y y = z x = z

t1 t2 t3. (t1 t2) t3 t1 t2 t3

n k i. Bits.bit n (k + i) Bits.bit (Bits.shiftRight n k) i

m n p. m * (n * p) = n * (m * p)

m n p. m * (n * p) = m * n * p

m n p. m + (n + p) = m + n + p

n k1 k2.
    Bits.shiftRight n (k1 + k2) = Bits.shiftRight (Bits.shiftRight n k1) k2

n j k. Bits.bound (Bits.bound n j) k = Bits.bound n (min j k)

p m n. m + p = n + p m = n

m n p. n + m p + m n p

n1 n2 k. Bits.bound (n1 + Bits.shiftLeft n2 k) k = Bits.bound n1 k

m n p. m < n n p m < p

m n p. m n n p m p

l h t. (h :: t) @ l = h :: t @ l

m n. m suc n m = suc n m n

m n. m * n = 0 m = 0 n = 0

l i. Bits.bit (Bits.fromList l) i i < length l nth l i

xs ys e. toList xs = (ys, e) length xs = length ys

f h t. map f (h :: t) = f h :: map f t

p. p 0 (n. p n p (suc n)) n. p n

n k. Bits.and n (2 k - 1) = Bits.bound n k

f s. apply (fold f s) = case none none (λx xs. fold.prs f s x xs)

k w1 w2. bit (or w1 w2) k bit w1 k bit w2 k

n i k. Bits.bit (Bits.bound n k) i i < k Bits.bit n i

m n i. Bits.bit (Bits.or m n) i Bits.bit m i Bits.bit n i

m n p. m * (n + p) = m * n + m * p

m n p. m (n + p) = m n * m p

m n p. (m + n) * p = m * p + n * p

m n k.
    Bits.bound (Bits.and m n) k =
    Bits.and (Bits.bound m k) (Bits.bound n k)

m n k.
    Bits.bound (Bits.or m n) k = Bits.or (Bits.bound m k) (Bits.bound n k)

n j k. Bits.width n j + k Bits.width (Bits.shiftRight n k) j

n1 n2 k.
    Bits.shiftRight (n1 + Bits.shiftLeft n2 k) k =
    Bits.shiftRight n1 k + n2

xs. xs = error xs = eof x xt. xs = cons x xt

f.
    apply (token f) =
    case none none (λx xs. case f x of none none | some y some (y, xs))

p. (xs. (ys. isProperSuffix ys xs p ys) p xs) xs. p xs

b. UTF8.isContinuationByte b bit b 7 ¬bit b 6

p q. (x. p x q x) (x. p x) x. q x

p q. (x. p x) (x. q x) x. p x q x

p. p [] (h t. p t p (h :: t)) l. p l

h t n. n < length t nth (h :: t) (suc n) = nth t n

w1 w2. (i. i < width (bit w1 i bit w2 i)) w1 = w2

n k i. Bits.bit (Bits.shiftLeft n k) i k i Bits.bit n (i - k)

m n p. m * n = m * p m = 0 n = p

m n p. m * n m * p m = 0 n p

l n. bit (Bits.toByte l) n n < width n < length l nth l n

p1 p2 xs.
    apply (orelse p1 p2) xs =
    case apply p1 xs of none apply p2 xs | some yys some yys

m n p. m * n < m * p ¬(m = 0) n < p

h1 h2 t1 t2. h1 :: t1 = h2 :: t2 h1 = h2 t1 = t2

a b a' b'. (a, b) = (a', b') a = a' b = b'

x xs y ys. cons x xs = cons y ys x = y xs = ys

p e. inverse p e x xs. apply p (append (e x) xs) = some (x, xs)

l.
    Bits.fromByte (Bits.toByte l) =
    if length l width then l @ replicate (width - length l)
    else take width l

p c x y. p (if c then x else y) (c p x) (¬c p y)

m n j k.
    Bits.width (Bits.bound m k + Bits.shiftLeft n k) j + k
    Bits.width n j

l xs ys e. toList (append l xs) = (l @ ys, e) toList xs = (ys, e)

(xs. append [] xs = xs)
  h t xs. append (h :: t) xs = cons h (append t xs)

p xs.
    apply p xs = none
    y ys. apply p xs = some (y, ys) isProperSuffix ys xs

x xs. toList (cons x xs) = let (l, e) toList xs in (x :: l, e)

p xs.
    apply (sequence p) xs =
    case apply p xs of none none | some (y, ys) apply y ys

b n.
    UTF8.parseMultibyte.addContinuationByte b n =
    if UTF8.isContinuationByte b then
      some (toNatural (and b 63) + Bits.shiftLeft n 6)
    else none

b.
    UTF8.parseMultibyte.parse2 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 0
         (toNatural (and b 31))) (λn. 128 n)

p f xs.
    apply (map p f) xs =
    case apply p xs of none none | some (y, ys) some (f y, ys)

p e.
    strongInverse p e
    inverse p e
    xs y ys. apply p xs = some (y, ys) xs = append (e y) ys

p x xs.
    parse p (cons x xs) =
    case dest p x xs of none error | some (y, ys) cons y (parse p ys)

q h1 h2 t1 t2.
    Bits.compare q (h1 :: t1) (h2 :: t2)
    Bits.compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

p xs.
    parse p xs =
    case apply p xs of
      none case xs of error error | eof eof | cons y ys error
    | some (y, ys) cons y (parse p ys)

p f xs.
    apply (filter p f) xs =
    case apply p xs of
      none none
    | some (y, ys) if f y then some (y, ys) else none

b.
    x0 x1 x2 x3 x4 x5 x6 x7.
      b = Bits.toByte (x0 :: x1 :: x2 :: x3 :: x4 :: x5 :: x6 :: x7 :: [])

p f xs.
    apply (mapPartial p f) xs =
    case apply p xs of
      none none
    | some (y, ys) case f y of none none | some z some (z, ys)

b.
    UTF8.parseMultibyte.parse3 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 1
         (toNatural (and b 15))) (λn. 2048 n)

f s x xs.
    fold.prs f s x xs =
    case f x s of
      none none
    | some y
        case y of
          left z some (z, xs)
        | right t
            case xs of
              error none
            | eof none
            | cons z zs fold.prs f t z zs

toList error = ([], ) toList eof = ([], )
  x xs. toList (cons x xs) = let (l, e) toList xs in (x :: l, e)

b.
    UTF8.parseMultibyte.parse4 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 2
         (toNatural (and b 7))) (λn. 65536 n)

f n s.
    foldN f n s =
    fold
      (λx (m, t).
         map (λu. if m = 0 then left u else right (m - 1, u)) (f x t))
      (n, s)

UTF8.parseMultibyte =
  sequence
    (token
       (λb.
          if bit b 6 then
            if bit b 5 then
              if bit b 4 then
                if bit b 3 then none
                else some (UTF8.parseMultibyte.parse4 b)
              else some (UTF8.parseMultibyte.parse3 b)
            else some (UTF8.parseMultibyte.parse2 b)
          else none))

(e b f. case e b f error = e) (e b f. case e b f eof = b)
  e b f x xs. case e b f (cons x xs) = f x xs

n.
    UTF8.encodeChar.encode2 n =
    let n1 Bits.shiftRight n 6 in
    let b0 or 192 (fromNatural n1) in
    let b1 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: []

c.
    UTF8.encodeChar c =
    let n dest c in
    if n < 128 then UTF8.encodeAscii n
    else if n < 2048 then UTF8.encodeChar.encode2 n
    else if n < 65536 then UTF8.encodeChar.encode3 n
    else UTF8.encodeChar.encode4 n

n.
    UTF8.encodeChar.encode3 n =
    let n1 Bits.shiftRight n 6 in
    let n2 Bits.shiftRight n1 6 in
    let b0 or 224 (fromNatural n2) in
    let b1 or 128 (fromNatural (Bits.bound n1 6)) in
    let b2 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: b2 :: []

n.
    UTF8.encodeChar.encode4 n =
    let n1 Bits.shiftRight n 6 in
    let n2 Bits.shiftRight n1 6 in
    let n3 Bits.shiftRight n2 6 in
    let b0 or 240 (fromNatural n3) in
    let b1 or 128 (fromNatural (Bits.bound n2 6)) in
    let b2 or 128 (fromNatural (Bits.bound n1 6)) in
    let b3 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: b2 :: b3 :: []