Package char-utf8-def: Definition of the UTF-8 encoding of Unicode characters
Information
name | char-utf8-def |
version | 1.96 |
description | Definition of the UTF-8 encoding of Unicode characters |
author | Joe Leslie-Hurd <joe@gilith.com> |
license | MIT |
provenance | HOL Light theory extracted on 2015-03-10 |
checksum | 89899eece90982465f9fe7797e06be02e68a53f5 |
requires | base byte char-def parser |
show | Data.Bool Data.Byte Data.Char Data.List Data.Option Data.Pair Data.Sum Number.Natural Parser Parser.Stream |
Files
- Package tarball char-utf8-def-1.96.tgz
- Theory source file char-utf8-def.thy (included in the package tarball)
Defined Constants
- Data
- Char
- UTF8
- UTF8.decode
- UTF8.encode
- UTF8.encodeAscii
- UTF8.encodeChar
- UTF8.encodeChar.encode2
- UTF8.encodeChar.encode3
- UTF8.encodeChar.encode4
- UTF8.isContinuationByte
- UTF8.parse
- UTF8.parseAscii
- UTF8.parseChar
- UTF8.parseMultibyte
- UTF8.parseMultibyte.addContinuationByte
- UTF8.parseMultibyte.parse2
- UTF8.parseMultibyte.parse3
- UTF8.parseMultibyte.parse4
- UTF8.parseNatural
- UTF8.reencode
- UTF8.reencodeChar
- UTF8
- Char
Theorems
⊦ UTF8.parseNatural = orelse UTF8.parseAscii UTF8.parseMultibyte
⊦ ∀n. UTF8.encodeAscii n = fromNatural n :: []
⊦ ∀c. UTF8.encode c = concat (map UTF8.encodeChar c)
⊦ ∀c. UTF8.reencode c = concat (map UTF8.reencodeChar c)
⊦ UTF8.parse = orelse (map UTF8.parseChar right) (map any left)
⊦ ∀b. UTF8.decode b = fst (toList (parse UTF8.parse (fromList b)))
⊦ UTF8.parseChar =
mapPartial UTF8.parseNatural
(λn. if invariant n then some (mk n) else none)
⊦ ∀x.
UTF8.reencodeChar x =
case x of left b → b :: [] | right c → UTF8.encodeChar c
⊦ UTF8.parseAscii =
token (λb. if bit b 7 then none else some (toNatural b))
⊦ ∀b. UTF8.isContinuationByte b ⇔ bit b 7 ∧ ¬bit b 6
⊦ ∀b n.
UTF8.parseMultibyte.addContinuationByte b n =
if UTF8.isContinuationByte b then
some (toNatural (and b 63) + Bits.shiftLeft n 6)
else none
⊦ ∀b.
UTF8.parseMultibyte.parse2 b =
filter
(foldN UTF8.parseMultibyte.addContinuationByte 0
(toNatural (and b 31))) (λn. 128 ≤ n)
⊦ ∀b.
UTF8.parseMultibyte.parse3 b =
filter
(foldN UTF8.parseMultibyte.addContinuationByte 1
(toNatural (and b 15))) (λn. 2048 ≤ n)
⊦ ∀b.
UTF8.parseMultibyte.parse4 b =
filter
(foldN UTF8.parseMultibyte.addContinuationByte 2
(toNatural (and b 7))) (λn. 65536 ≤ n)
⊦ UTF8.parseMultibyte =
sequence
(token
(λb.
if bit b 6 then
if bit b 5 then
if bit b 4 then
if bit b 3 then none
else some (UTF8.parseMultibyte.parse4 b)
else some (UTF8.parseMultibyte.parse3 b)
else some (UTF8.parseMultibyte.parse2 b)
else none))
⊦ ∀n.
UTF8.encodeChar.encode2 n =
let n1 ← Bits.shiftRight n 6 in
let b0 ← or 192 (fromNatural n1) in
let b1 ← or 128 (fromNatural (Bits.bound n 6)) in
b0 :: b1 :: []
⊦ ∀c.
UTF8.encodeChar c =
let n ← dest c in
if n < 128 then UTF8.encodeAscii n
else if n < 2048 then UTF8.encodeChar.encode2 n
else if n < 65536 then UTF8.encodeChar.encode3 n
else UTF8.encodeChar.encode4 n
⊦ ∀n.
UTF8.encodeChar.encode3 n =
let n1 ← Bits.shiftRight n 6 in
let n2 ← Bits.shiftRight n1 6 in
let b0 ← or 224 (fromNatural n2) in
let b1 ← or 128 (fromNatural (Bits.bound n1 6)) in
let b2 ← or 128 (fromNatural (Bits.bound n 6)) in
b0 :: b1 :: b2 :: []
⊦ ∀n.
UTF8.encodeChar.encode4 n =
let n1 ← Bits.shiftRight n 6 in
let n2 ← Bits.shiftRight n1 6 in
let n3 ← Bits.shiftRight n2 6 in
let b0 ← or 240 (fromNatural n3) in
let b1 ← or 128 (fromNatural (Bits.bound n2 6)) in
let b2 ← or 128 (fromNatural (Bits.bound n1 6)) in
let b3 ← or 128 (fromNatural (Bits.bound n 6)) in
b0 :: b1 :: b2 :: b3 :: []
External Type Operators
- →
- bool
- Data
- Byte
- byte
- Char
- char
- List
- list
- Option
- option
- Pair
- ×
- Sum
- +
- Byte
- Number
- Natural
- natural
- Natural
- Parser
- parser
- Stream
- stream
External Constants
- =
- Data
- Bool
- ∀
- ∧
- ¬
- cond
- ⊤
- Byte
- and
- bit
- fromNatural
- or
- toNatural
- Char
- dest
- invariant
- mk
- List
- ::
- []
- concat
- map
- Option
- none
- some
- Pair
- fst
- Sum
- case
- left
- right
- Bool
- Number
- Natural
- +
- <
- ≤
- bit0
- bit1
- zero
- Bits
- Bits.bound
- Bits.shiftLeft
- Bits.shiftRight
- Natural
- Parser
- any
- filter
- foldN
- map
- mapPartial
- orelse
- parse
- sequence
- token
- Stream
- fromList
- toList
Assumptions
⊦ ⊤
⊦ (∀) = λp. p = λx. ⊤