Package char-utf8-def: Definition of the UTF-8 encoding of Unicode characters
Information
name | char-utf8-def |
version | 1.55 |
description | Definition of the UTF-8 encoding of Unicode characters |
author | Joe Hurd <joe@gilith.com> |
license | MIT |
provenance | HOL Light theory extracted on 2012-05-18 |
requires | bool pair |
show | Data.Bool Data.Byte Data.Char Data.Char.UTF8 Data.List Data.Option Data.Pair Data.Word16 Number.Natural Parser Parser.Stream |
Files
- Package tarball char-utf8-def-1.55.tgz
- Theory file char-utf8-def.thy (included in the package tarball)
Defined Constants
- Data
- Char
- UTF8
- decode
- decodeStream
- decoder
- decoder.decode1
- decoder.decode2
- decoder.decode3
- decoder.parse
- encode
- encoder
- encoder.encode1
- encoder.encode2
- encoder.encode3
- isContinuationByte
- parseContinuationByte
- parseThreeContinuationBytes
- parseTwoContinuationBytes
- UTF8
- Char
Theorems
⊦ parseContinuationByte = parseSome isContinuationByte
⊦ decoder = mkParser decoder.parse
⊦ decodeStream = parseStream decoder
⊦ parseTwoContinuationBytes =
parsePair parseContinuationByte parseContinuationByte
⊦ parseThreeContinuationBytes =
parsePair parseContinuationByte parseTwoContinuationBytes
⊦ ∀bs. decode bs = toList (decodeStream (fromList bs))
⊦ ∀chs. encode chs = concat (map encoder chs)
⊦ ∀b. isContinuationByte b ⇔ bit b 7 ∧ ¬bit b 6
⊦ ∀p0 p1.
encoder.encode1 p0 p1 =
let b00 ← shiftLeft p0 2 in
let b01 ← shiftRight (and p1 192) 6 in
let b0 ← or 192 (or b00 b01) in
let b10 ← and p1 63 in
let b1 ← or 128 b10 in
b0 :: b1 :: []
⊦ ∀ch.
encoder ch =
let (pl, pos) ← destChar ch in
let p ← destPlane pl in
let (p0, p1) ← toBytes (destPosition pos) in
if p = 0 then
if p0 = 0 ∧ ¬bit p1 7 then p1 :: []
else if and 248 p0 = 0 then encoder.encode1 p0 p1
else encoder.encode2 p0 p1
else encoder.encode3 p p0 p1
⊦ ∀b0 s.
decoder.parse b0 s =
if bit b0 7 then
if bit b0 6 then
if bit b0 5 then
if bit b0 4 then
if bit b0 3 then none
else
parse
(partialMap (decoder.decode3 b0)
parseThreeContinuationBytes) s
else
parse
(partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
else
parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
else none
else
let pl ← mkPlane 0 in
let pos ← mkPosition (fromBytes 0 b0) in
let ch ← mkChar (pl, pos) in
some (ch, s)
⊦ ∀b0 b1.
decoder.decode1 b0 b1 =
let pl ← mkPlane 0 in
let p0 ← shiftRight (and b0 28) 2 in
let y1 ← shiftLeft (and b0 3) 6 in
let x1 ← and b1 63 in
let p1 ← or y1 x1 in
if p0 = 0 ∧ p1 < 128 then none
else
let pos ← mkPosition (fromBytes p0 p1) in
let ch ← mkChar (pl, pos) in
some ch
⊦ ∀p0 p1.
encoder.encode2 p0 p1 =
let b00 ← shiftRight (and p0 240) 4 in
let b0 ← or 224 b00 in
let b10 ← shiftLeft (and p0 15) 2 in
let b11 ← shiftRight (and p1 192) 6 in
let b1 ← or 128 (or b10 b11) in
let b20 ← and p1 63 in
let b2 ← or 128 b20 in
b0 :: b1 :: b2 :: []
⊦ ∀b0 b1 b2 b3.
decoder.decode3 b0 (b1, b2, b3) =
let w ← shiftLeft (and b0 7) 2 in
let z ← shiftRight (and b1 48) 4 in
let p ← or w z in
if p = 0 ∨ 16 < p then none
else
let pl ← mkPlane p in
let z0 ← shiftLeft (and b1 15) 4 in
let y0 ← shiftRight (and b2 60) 2 in
let p0 ← or z0 y0 in
let y1 ← shiftLeft (and b2 3) 6 in
let x1 ← and b3 63 in
let p1 ← or y1 x1 in
let pos ← mkPosition (fromBytes p0 p1) in
let ch ← mkChar (pl, pos) in
some ch
⊦ ∀b0 b1 b2.
decoder.decode2 b0 (b1, b2) =
let z0 ← shiftLeft (and b0 15) 4 in
let y0 ← shiftRight (and b1 60) 2 in
let p0 ← or z0 y0 in
if p0 < 8 ∨ 216 ≤ p0 ∧ p0 ≤ 223 then none
else
let y1 ← shiftLeft (and b1 3) 6 in
let x1 ← and b2 63 in
let p1 ← or y1 x1 in
if p0 = 255 ∧ 254 ≤ p1 then none
else
let pl ← mkPlane 0 in
let pos ← mkPosition (fromBytes p0 p1) in
let ch ← mkChar (pl, pos) in
some ch
⊦ ∀p p0 p1.
encoder.encode3 p p0 p1 =
let b00 ← shiftRight (and p 28) 2 in
let b0 ← or 240 b00 in
let b10 ← shiftLeft (and p 3) 4 in
let b11 ← shiftRight (and p0 240) 4 in
let b1 ← or 128 (or b10 b11) in
let b20 ← shiftLeft (and p0 15) 2 in
let b21 ← shiftRight (and p1 192) 6 in
let b2 ← or 128 (or b20 b21) in
let b30 ← and p1 63 in
let b3 ← or 128 b30 in
b0 :: b1 :: b2 :: b3 :: []
Input Type Operators
- →
- bool
- Data
- Byte
- byte
- Char
- char
- plane
- position
- List
- list
- Option
- option
- Pair
- ×
- Word16
- word16
- Byte
- Number
- Natural
- natural
- Natural
- Parser
- parser
- Stream
- stream
Input Constants
- =
- select
- Data
- Bool
- ∀
- ∧
- ⇒
- ∨
- ¬
- cond
- ⊤
- Byte
- <
- ≤
- and
- bit
- fromNatural
- or
- shiftLeft
- shiftRight
- Char
- destChar
- destPlane
- destPosition
- mkChar
- mkPlane
- mkPosition
- List
- ::
- []
- concat
- map
- Option
- none
- some
- Pair
- ,
- fst
- snd
- Word16
- fromBytes
- toBytes
- Bool
- Number
- Natural
- bit0
- bit1
- zero
- Natural
- Parser
- mkParser
- parse
- parsePair
- parseSome
- parseStream
- partialMap
- Stream
- fromList
- toList
Assumptions
⊦ ⊤
⊦ (∀) = λp. p = λx. ⊤
⊦ (⇒) = λp q. p ∧ q ⇔ p
⊦ ∀x y. fst (x, y) = x
⊦ ∀x y. snd (x, y) = y
⊦ (∧) = λp q. (λf. f p q) = λf. f ⊤ ⊤