Package char-utf8: The UTF-8 encoding of Unicode characters
Information
name | char-utf8 |
version | 1.49 |
description | The UTF-8 encoding of Unicode characters |
author | Joe Hurd <joe@gilith.com> |
license | MIT |
requires | bool pair natural option list byte word16 parser char-def char-thm |
show | Data.Bool Data.Byte Data.Byte.Bits Data.Char Data.Char.UTF8 Data.List Data.Option Data.Pair Data.Word16 Data.Word16.Bits Number.Natural Parser Parser.Stream |
Files
- Package tarball char-utf8-1.49.tgz
- Theory file char-utf8.thy (included in the package tarball)
Defined Constants
- Data
- Char
- UTF8
- decode
- decodeStream
- decoder
- decoder.decode1
- decoder.decode2
- decoder.decode3
- decoder.parse
- encode
- encoder
- encoder.encode1
- encoder.encode2
- encoder.encode3
- isContinuationByte
- parseContinuationByte
- parseThreeContinuationBytes
- parseTwoContinuationBytes
- UTF8
- Char
Theorems
⊦ isParser decoder.parse
⊦ inverse decoder encoder
⊦ strongInverse decoder encoder
⊦ parseContinuationByte = parseSome isContinuationByte
⊦ decoder = mkParser decoder.parse
⊦ decodeStream = parseStream decoder
⊦ destParser decoder = decoder.parse
⊦ parseTwoContinuationBytes =
parsePair parseContinuationByte parseContinuationByte
⊦ parseThreeContinuationBytes =
parsePair parseContinuationByte parseTwoContinuationBytes
⊦ parse decoder = case none none decoder.parse
⊦ ∀cs. length cs ≤ length (encode cs)
⊦ ∀cs. decode (encode cs) = some cs
⊦ ∀bs. length (decodeStream bs) ≤ length bs
⊦ ∀bs. decode bs = toList (decodeStream (fromList bs))
⊦ ∀chs. encode chs = concat (map encoder chs)
⊦ ∀bs. case decode bs of none → T | some cs → encode cs = bs
⊦ ∀bs. case decode bs of none → T | some cs → length cs ≤ length bs
⊦ ∀b. isContinuationByte b ⇔ bit b 7 ∧ ¬bit b 6
⊦ ∀p0 p1.
encoder.encode1 p0 p1 =
let b00 ← shiftLeft p0 2 in
let b01 ← shiftRight (and p1 192) 6 in
let b0 ← or 192 (or b00 b01) in
let b10 ← and p1 63 in
let b1 ← or 128 b10 in
b0 :: b1 :: []
⊦ ∀ch.
encoder ch =
let (pl, pos) ← destChar ch in
let p ← destPlane pl in
let (p0, p1) ← toBytes (destPosition pos) in
if p = 0 then
if p0 = 0 ∧ ¬bit p1 7 then p1 :: []
else if and 248 p0 = 0 then encoder.encode1 p0 p1
else encoder.encode2 p0 p1
else encoder.encode3 p p0 p1
⊦ ∀b0 s.
decoder.parse b0 s =
if bit b0 7 then
if bit b0 6 then
if bit b0 5 then
if bit b0 4 then
if bit b0 3 then none
else
parse
(partialMap (decoder.decode3 b0)
parseThreeContinuationBytes) s
else
parse
(partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
else
parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
else none
else
let pl ← mkPlane 0 in
let pos ← mkPosition (fromBytes 0 b0) in
let ch ← mkChar (pl, pos) in
some (ch, s)
⊦ ∀b0 b1.
decoder.decode1 b0 b1 =
let pl ← mkPlane 0 in
let p0 ← shiftRight (and b0 28) 2 in
let y1 ← shiftLeft (and b0 3) 6 in
let x1 ← and b1 63 in
let p1 ← or y1 x1 in
if p0 = 0 ∧ p1 < 128 then none
else
let pos ← mkPosition (fromBytes p0 p1) in
let ch ← mkChar (pl, pos) in
some ch
⊦ ∀p0 p1.
encoder.encode2 p0 p1 =
let b00 ← shiftRight (and p0 240) 4 in
let b0 ← or 224 b00 in
let b10 ← shiftLeft (and p0 15) 2 in
let b11 ← shiftRight (and p1 192) 6 in
let b1 ← or 128 (or b10 b11) in
let b20 ← and p1 63 in
let b2 ← or 128 b20 in
b0 :: b1 :: b2 :: []
⊦ ∀b0 b1 b2 b3.
decoder.decode3 b0 (b1, b2, b3) =
let w ← shiftLeft (and b0 7) 2 in
let z ← shiftRight (and b1 48) 4 in
let p ← or w z in
if p = 0 ∨ 16 < p then none
else
let pl ← mkPlane p in
let z0 ← shiftLeft (and b1 15) 4 in
let y0 ← shiftRight (and b2 60) 2 in
let p0 ← or z0 y0 in
let y1 ← shiftLeft (and b2 3) 6 in
let x1 ← and b3 63 in
let p1 ← or y1 x1 in
let pos ← mkPosition (fromBytes p0 p1) in
let ch ← mkChar (pl, pos) in
some ch
⊦ ∀b0 b1 b2.
decoder.decode2 b0 (b1, b2) =
let z0 ← shiftLeft (and b0 15) 4 in
let y0 ← shiftRight (and b1 60) 2 in
let p0 ← or z0 y0 in
if p0 < 8 ∨ 216 ≤ p0 ∧ p0 ≤ 223 then none
else
let y1 ← shiftLeft (and b1 3) 6 in
let x1 ← and b2 63 in
let p1 ← or y1 x1 in
if p0 = 255 ∧ 254 ≤ p1 then none
else
let pl ← mkPlane 0 in
let pos ← mkPosition (fromBytes p0 p1) in
let ch ← mkChar (pl, pos) in
some ch
⊦ ∀p p0 p1.
encoder.encode3 p p0 p1 =
let b00 ← shiftRight (and p 28) 2 in
let b0 ← or 240 b00 in
let b10 ← shiftLeft (and p 3) 4 in
let b11 ← shiftRight (and p0 240) 4 in
let b1 ← or 128 (or b10 b11) in
let b20 ← shiftLeft (and p0 15) 2 in
let b21 ← shiftRight (and p1 192) 6 in
let b2 ← or 128 (or b20 b21) in
let b30 ← and p1 63 in
let b3 ← or 128 b30 in
b0 :: b1 :: b2 :: b3 :: []
Input Type Operators
- →
- bool
- Data
- Byte
- byte
- Char
- char
- plane
- position
- List
- list
- Option
- option
- Pair
- ×
- Word16
- word16
- Byte
- Number
- Natural
- natural
- Natural
- Parser
- parser
- Stream
- stream
Input Constants
- =
- select
- Data
- Bool
- ∀
- ∧
- ⇒
- ∃
- ∨
- ¬
- cond
- F
- T
- Byte
- <
- ≤
- and
- bit
- fromNatural
- or
- shiftLeft
- shiftRight
- width
- Bits
- compare
- fromByte
- toByte
- Char
- destChar
- destPlane
- destPosition
- isChar
- isPlane
- mkChar
- mkPlane
- mkPosition
- List
- ::
- @
- []
- concat
- drop
- length
- map
- nth
- replicate
- take
- zipWith
- Option
- case
- none
- some
- Pair
- ,
- fst
- snd
- Word16
- <
- fromBytes
- fromNatural
- toBytes
- width
- Bits
- compare
- fromWord
- toWord
- Bool
- Number
- Natural
- *
- +
- -
- <
- ≤
- bit0
- bit1
- div
- even
- mod
- odd
- suc
- zero
- Natural
- Parser
- destParser
- inverse
- isParser
- mkParser
- parse
- parsePair
- parseSome
- parseStream
- partialMap
- strongInverse
- Stream
- append
- case
- eof
- error
- fromList
- isProperSuffix
- isSuffix
- length
- stream
- toList
Assumptions
⊦ T
⊦ ¬F ⇔ T
⊦ ¬T ⇔ F
⊦ length [] = 0
⊦ bit0 0 = 0
⊦ ∀x. x = x
⊦ ∀t. t ⇒ t
⊦ ∀n. 0 ≤ n
⊦ ∀x. isSuffix x x
⊦ F ⇔ ∀p. p
⊦ toByte [] = 0
⊦ toWord [] = 0
⊦ ∀t. t ∨ ¬t
⊦ (¬) = λp. p ⇒ F
⊦ ∀t. (∀x. t) ⇔ t
⊦ ∀t. (λx. t x) = t
⊦ (∀) = λp. p = λx. T
⊦ ∀a'. ¬(none = some a')
⊦ ∀x. replicate 0 x = []
⊦ ∀t. ¬¬t ⇔ t
⊦ ∀t. (T ⇔ t) ⇔ t
⊦ ∀t. (t ⇔ T) ⇔ t
⊦ ∀t. F ∧ t ⇔ F
⊦ ∀t. T ∧ t ⇔ t
⊦ ∀t. t ∧ F ⇔ F
⊦ ∀t. t ∧ T ⇔ t
⊦ ∀t. t ∧ t ⇔ t
⊦ ∀t. F ⇒ t ⇔ T
⊦ ∀t. T ⇒ t ⇔ t
⊦ ∀t. t ⇒ T ⇔ T
⊦ ∀t. F ∨ t ⇔ t
⊦ ∀t. T ∨ t ⇔ T
⊦ ∀t. t ∨ F ⇔ t
⊦ ∀t. t ∨ T ⇔ T
⊦ ∀r. destPosition (mkPosition r) = r
⊦ ∀n. ¬(suc n = 0)
⊦ ∀n. 0 * n = 0
⊦ ∀m. m * 0 = 0
⊦ ∀n. 0 + n = n
⊦ ∀m. m + 0 = m
⊦ ∀l. [] @ l = l
⊦ ∀l. drop 0 l = l
⊦ ∀l. take 0 l = []
⊦ ∀s. append [] s = s
⊦ ∀p. parse p eof = none
⊦ ∀p. parse p error = none
⊦ width = 8
⊦ ∀t. (F ⇔ t) ⇔ ¬t
⊦ ∀t. (t ⇔ F) ⇔ ¬t
⊦ ∀t. t ⇒ F ⇔ ¬t
⊦ ∀q. compare q [] [] ⇔ q
⊦ ∀q. compare q [] [] ⇔ q
⊦ ∀n. bit1 n = suc (bit0 n)
⊦ ∀m. m * 1 = m
⊦ ∀m. 1 * m = m
⊦ ∀l. length (fromList l) = length l
⊦ ∀l. toList (fromList l) = some l
⊦ ∀f. zipWith f [] [] = []
⊦ width = 16
⊦ (⇒) = λp q. p ∧ q ⇔ p
⊦ ∀t. (t ⇔ T) ∨ (t ⇔ F)
⊦ ∀n. even (suc n) ⇔ ¬even n
⊦ ∀n. odd (suc n) ⇔ ¬odd n
⊦ ∀m. m ≤ 0 ⇔ m = 0
⊦ ∀t1 t2. (if F then t1 else t2) = t2
⊦ ∀t1 t2. (if T then t1 else t2) = t1
⊦ ∀x y. fst (x, y) = x
⊦ ∀x y. snd (x, y) = y
⊦ ∀b f. case b f none = b
⊦ ∀p x. p x ⇒ p ((select) p)
⊦ ∀r. isPlane r ⇔ destPlane (mkPlane r) = r
⊦ ∀n. bit0 (suc n) = suc (suc (bit0 n))
⊦ ∀r. isChar r ⇔ destChar (mkChar r) = r
⊦ ∀r. isParser r ⇔ destParser (mkParser r) = r
⊦ ∀x y. x = y ⇔ y = x
⊦ ∀x y. x = y ⇒ y = x
⊦ ∀h t. nth 0 (h :: t) = h
⊦ ∀t1 t2. t1 ∨ t2 ⇔ t2 ∨ t1
⊦ ∀m n. m + n = n + m
⊦ ∀m n. m + n - n = m
⊦ ∀x y. isProperSuffix x y ⇒ isSuffix x y
⊦ ∀p s. length (parseStream p s) ≤ length s
⊦ ∀n. 2 * n = n + n
⊦ ∀h t. length (h :: t) = suc (length t)
⊦ ∀m n. ¬(m < n ∧ n ≤ m)
⊦ ∀m n. ¬(m ≤ n ∧ n < m)
⊦ ∀m n. ¬(m ≤ n) ⇔ n < m
⊦ ∀m n. suc m ≤ n ⇔ m < n
⊦ ∀x. x = none ∨ ∃a. x = some a
⊦ ∀s. case toList s of none → T | some l → length l = length s
⊦ (∧) = λp q. (λf. f p q) = λf. f T T
⊦ ∀e b f. case e b f eof = b
⊦ ∀e b f. case e b f error = e
⊦ ∀p. ¬(∀x. p x) ⇔ ∃x. ¬p x
⊦ (∃) = λp. ∀q. (∀x. p x ⇒ q) ⇒ q
⊦ ∀a a'. some a = some a' ⇔ a = a'
⊦ ∀w1 w2. fromByte w1 = fromByte w2 ⇔ w1 = w2
⊦ ∀pos. ∃w. pos = mkPosition w ∧ destPosition pos = w
⊦ ∀m n. m + suc n = suc (m + n)
⊦ ∀m n. suc m + n = suc (m + n)
⊦ ∀m n. suc m = suc n ⇔ m = n
⊦ ∀b f a. case b f (some a) = f a
⊦ ∀x n. replicate (suc n) x = x :: replicate n x
⊦ ∀w1 w2. compare F (fromByte w1) (fromByte w2) ⇔ w1 < w2
⊦ ∀w1 w2. compare T (fromByte w1) (fromByte w2) ⇔ w1 ≤ w2
⊦ ∀b0 b1. toWord (fromByte b1 @ fromByte b0) = fromBytes b0 b1
⊦ ∀w1 w2. compare F (fromWord w1) (fromWord w2) ⇔ w1 < w2
⊦ ∀m n. even (m * n) ⇔ even m ∨ even n
⊦ ∀m n. even (m + n) ⇔ even m ⇔ even n
⊦ ∀m n. m * suc n = m + m * n
⊦ ∀m n. suc m * n = m * n + n
⊦ ∀p. isPlane p ⇔ p < 17
⊦ ∀f g. (∀x. f x = g x) ⇔ f = g
⊦ (∨) = λp q. ∀r. (p ⇒ r) ⇒ (q ⇒ r) ⇒ r
⊦ ∀w1 w2. and w1 w2 = toByte (zipWith (∧) (fromByte w1) (fromByte w2))
⊦ ∀w1 w2. or w1 w2 = toByte (zipWith (∨) (fromByte w1) (fromByte w2))
⊦ ∀m n. m ≤ n ⇔ m < n ∨ m = n
⊦ ∀m n. odd (m + n) ⇔ ¬(odd m ⇔ odd n)
⊦ ∀m n. m ≤ n ∧ n ≤ m ⇔ m = n
⊦ ∀l n. shiftLeft (toByte l) n = toByte (replicate n F @ l)
⊦ ∀PAIR'. ∃fn. ∀a0 a1. fn (a0, a1) = PAIR' a0 a1
⊦ ∀p a s. parse p (stream a s) = destParser p a s
⊦ ∀p q. p ∨ (∃x. q x) ⇔ ∃x. p ∨ q x
⊦ ∀pl. ∃b. isPlane b ∧ pl = mkPlane b ∧ destPlane pl = b
⊦ ∀h t s. append (h :: t) s = stream h (append t s)
⊦ ∀t1 t2 t3. (t1 ∨ t2) ∨ t3 ⇔ t1 ∨ t2 ∨ t3
⊦ ∀m n p. m + (n + p) = m + n + p
⊦ ∀n. toByte (odd n :: fromByte (fromNatural (n div 2))) = fromNatural n
⊦ ∀n. toWord (odd n :: fromWord (fromNatural (n div 2))) = fromNatural n
⊦ ∀l h t. (h :: t) @ l = h :: t @ l
⊦ ∀m n. m ≤ suc n ⇔ m = suc n ∨ m ≤ n
⊦ ∀m n. m * n = 0 ⇔ m = 0 ∨ n = 0
⊦ ∀w. ∃b0 b1. w = fromBytes b0 b1 ∧ toBytes w = (b0, b1)
⊦ ∀P. P 0 ∧ (∀n. P n ⇒ P (suc n)) ⇒ ∀n. P n
⊦ ∀b t1 t2. (if b then t1 else t2) ⇔ (¬b ∨ t1) ∧ (b ∨ t2)
⊦ ∀m n p. m * (n + p) = m * n + m * p
⊦ ∀m n p. (m + n) * p = m * p + n * p
⊦ ∀x. x = error ∨ x = eof ∨ ∃a0 a1. x = stream a0 a1
⊦ ∀p.
parse (parseSome p) =
case none none (λa s. if p a then some (a, s) else none)
⊦ ∀b f x y. f (if b then x else y) = if b then f x else f y
⊦ ∀b f g x. (if b then f else g) x = if b then f x else g x
⊦ ∀e b f a s. case e b f (stream a s) = f a s
⊦ ∀p q. (∀x. p x ∧ q x) ⇔ (∀x. p x) ∧ ∀x. q x
⊦ ∀p q. (∃x. p x) ∨ (∃x. q x) ⇔ ∃x. p x ∨ q x
⊦ ∀p e l.
inverse p e ⇒ parseStream p (fromList (concat (map e l))) = fromList l
⊦ ∀h t n. n < length t ⇒ nth (suc n) (h :: t) = nth n t
⊦ ∀n h t. n ≤ length t ⇒ drop (suc n) (h :: t) = drop n t
⊦ ∀m n p. m * n = m * p ⇔ m = 0 ∨ n = p
⊦ ∀m n p. m * n ≤ m * p ⇔ m = 0 ∨ n ≤ p
⊦ ∀l n. bit (toByte l) n ⇔ n < width ∧ n < length l ∧ nth n l
⊦ ∀m n p. m * n < m * p ⇔ ¬(m = 0) ∧ n < p
⊦ ∀h1 h2 t1 t2. h1 :: t1 = h2 :: t2 ⇔ h1 = h2 ∧ t1 = t2
⊦ ∀x y a b. (x, y) = (a, b) ⇔ x = a ∧ y = b
⊦ ∀p e. inverse p e ⇔ ∀x s. parse p (append (e x) s) = some (x, s)
⊦ ∀n h t. n ≤ length t ⇒ take (suc n) (h :: t) = h :: take n t
⊦ ∀l.
fromByte (toByte l) =
if length l ≤ width then l @ replicate (width - length l) F
else take width l
⊦ ∀l.
fromWord (toWord l) =
if length l ≤ width then l @ replicate (width - length l) F
else take width l
⊦ ∀c.
∃pl pos.
isChar (pl, pos) ∧ c = mkChar (pl, pos) ∧ destChar c = (pl, pos)
⊦ ∀m n q r. m = q * n + r ∧ r < n ⇒ m div n = q
⊦ ∀m n q r. m = q * n + r ∧ r < n ⇒ m mod n = r
⊦ ∀p e s.
strongInverse p e ⇒
case toList (parseStream p s) of
none → T
| some l → toList s = some (concat (map e l))
⊦ ∀w.
(toByte (drop 8 (fromWord w)), toByte (take 8 (fromWord w))) =
toBytes w
⊦ ∀p s.
parse p s = none ∨
∃b s'. parse p s = some (b, s') ∧ isProperSuffix s' s
⊦ ∀p.
isParser p ⇔
∀x xs. case p x xs of none → T | some (y, xs') → isSuffix xs' xs
⊦ ∀p e.
strongInverse p e ⇔
inverse p e ∧ ∀s x s'. parse p s = some (x, s') ⇒ s = append (e x) s'
⊦ ∀f h1 h2 t1 t2.
length t1 = length t2 ⇒
zipWith f (h1 :: t1) (h2 :: t2) = f h1 h2 :: zipWith f t1 t2
⊦ ∀q h1 h2 t1 t2.
compare q (h1 :: t1) (h2 :: t2) ⇔
compare (¬h1 ∧ h2 ∨ ¬(h1 ∧ ¬h2) ∧ q) t1 t2
⊦ ∀q h1 h2 t1 t2.
compare q (h1 :: t1) (h2 :: t2) ⇔
compare (¬h1 ∧ h2 ∨ ¬(h1 ∧ ¬h2) ∧ q) t1 t2
⊦ ∀b.
∃x0 x1 x2 x3 x4 x5 x6 x7.
b = toByte (x0 :: x1 :: x2 :: x3 :: x4 :: x5 :: x6 :: x7 :: [])
⊦ ∀f p s.
parse (partialMap f p) s =
case parse p s of
none → none
| some (b, s') → case f b of none → none | some c → some (c, s')
⊦ ∀l n.
shiftRight (toByte l) n =
if length l ≤ width then
if length l ≤ n then toByte [] else toByte (drop n l)
else if width ≤ n then toByte []
else toByte (drop n (take width l))
⊦ ∀pb pc s.
parse (parsePair pb pc) s =
case parse pb s of
none → none
| some (b, s') →
case parse pc s' of
none → none
| some (c, s'') → some ((b, c), s'')
⊦ ∀pl pos.
isChar (pl, pos) ⇔
let pli ← destPlane pl in
let posi ← destPosition pos in
¬(pli = 0) ∨ posi < 55296 ∨ 57343 < posi ∧ posi < 65534