name | char-utf8-def |
version | 1.7 |
description | Definitions of UTF-8 encoders and decoders |
author | Joe Hurd <joe@gilith.com> |
license | MIT |
provenance | HOL Light theory extracted on 2011-03-20 |
show | Data.Bool Data.Byte as Byte Data.Char as Char Data.Char.UTF8 Data.List Data.Option as Option Data.Pair Data.Word16 as Word16 Number.Natural as Natural Number.Numeral Parser |
⊦ parseContinuationByte = parseSome isContinuationByte
⊦ decoder = mkParser decoder.parse
⊦ decodeStream = parseStream decoder
⊦ parseTwoContinuationBytes =
parsePair parseContinuationByte parseContinuationByte
⊦ parseThreeContinuationBytes =
parsePair parseContinuationByte parseTwoContinuationBytes
⊦ ∀bs. decode bs = Stream.toList (decodeStream (Stream.fromList bs))
⊦ ∀chs. encode chs = concat (map encoder chs)
⊦ ∀b. isContinuationByte b ⇔ Byte.bit b 7 ∧ ¬Byte.bit b 6
⊦ ∀p0 p1.
encoder.encode1 p0 p1 =
let b00 = Byte.shiftLeft p0 2 in
let b01 = Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6 in
let b0 = Byte.or (Byte.fromNatural 192) (Byte.or b00 b01) in
let b10 = Byte.and p1 (Byte.fromNatural 63) in
let b1 = Byte.or (Byte.fromNatural 128) b10 in
b0 :: b1 :: []
⊦ ∀ch.
encoder ch =
let (pl, pos) = Char.destChar ch in
let p = Char.destPlane pl in
let (p0, p1) = Word16.toBytes (Char.destPosition pos) in
if p = Byte.fromNatural 0 then
if p0 = Byte.fromNatural 0 ∧ ¬Byte.bit p1 7 then p1 :: []
else if Byte.and (Byte.fromNatural 248) p0 = Byte.fromNatural 0 then
encoder.encode1 p0 p1
else encoder.encode2 p0 p1
else encoder.encode3 p p0 p1
⊦ ∀b0 s.
decoder.parse b0 s =
if Byte.bit b0 7 then
if Byte.bit b0 6 then
if Byte.bit b0 5 then
if Byte.bit b0 4 then
if Byte.bit b0 3 then Option.none
else
parse
(partialMap (decoder.decode3 b0)
parseThreeContinuationBytes) s
else
parse
(partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
else
parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
else Option.none
else
let pl = Char.mkPlane (Byte.fromNatural 0) in
let pos =
Char.mkPosition (Word16.fromBytes (Byte.fromNatural 0) b0) in
let ch = Char.mkChar (pl, pos) in
Option.some (ch, s)
⊦ ∀b0 b1.
decoder.decode1 b0 b1 =
let pl = Char.mkPlane (Byte.fromNatural 0) in
let p0 = Byte.shiftRight (Byte.and b0 (Byte.fromNatural 28)) 2 in
let y1 = Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 3)) 6 in
let x1 = Byte.and b1 (Byte.fromNatural 63) in
let p1 = Byte.or y1 x1 in
if p0 = Byte.fromNatural 0 ∧ Byte.< p1 (Byte.fromNatural 128) then
Option.none
else
let pos = Char.mkPosition (Word16.fromBytes p0 p1) in
let ch = Char.mkChar (pl, pos) in
Option.some ch
⊦ ∀p0 p1.
encoder.encode2 p0 p1 =
let b00 = Byte.shiftRight (Byte.and p0 (Byte.fromNatural 240)) 4 in
let b0 = Byte.or (Byte.fromNatural 224) b00 in
let b10 = Byte.shiftLeft (Byte.and p0 (Byte.fromNatural 15)) 2 in
let b11 = Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6 in
let b1 = Byte.or (Byte.fromNatural 128) (Byte.or b10 b11) in
let b20 = Byte.and p1 (Byte.fromNatural 63) in
let b2 = Byte.or (Byte.fromNatural 128) b20 in
b0 :: b1 :: b2 :: []
⊦ ∀b0 b1 b2 b3.
decoder.decode3 b0 (b1, b2, b3) =
let w = Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 7)) 2 in
let z = Byte.shiftRight (Byte.and b1 (Byte.fromNatural 48)) 4 in
let p = Byte.or w z in
if p = Byte.fromNatural 0 ∨ Byte.< (Byte.fromNatural 16) p then
Option.none
else
let pl = Char.mkPlane p in
let z0 = Byte.shiftLeft (Byte.and b1 (Byte.fromNatural 15)) 4 in
let y0 = Byte.shiftRight (Byte.and b2 (Byte.fromNatural 60)) 2 in
let p0 = Byte.or z0 y0 in
let y1 = Byte.shiftLeft (Byte.and b2 (Byte.fromNatural 3)) 6 in
let x1 = Byte.and b3 (Byte.fromNatural 63) in
let p1 = Byte.or y1 x1 in
let pos = Char.mkPosition (Word16.fromBytes p0 p1) in
let ch = Char.mkChar (pl, pos) in
Option.some ch
⊦ ∀b0 b1 b2.
decoder.decode2 b0 (b1, b2) =
let z0 = Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 15)) 4 in
let y0 = Byte.shiftRight (Byte.and b1 (Byte.fromNatural 60)) 2 in
let p0 = Byte.or z0 y0 in
if Byte.< p0 (Byte.fromNatural 8) ∨
Byte.≤ (Byte.fromNatural 216) p0 ∧ Byte.≤ p0 (Byte.fromNatural 223)
then Option.none
else
let y1 = Byte.shiftLeft (Byte.and b1 (Byte.fromNatural 3)) 6 in
let x1 = Byte.and b2 (Byte.fromNatural 63) in
let p1 = Byte.or y1 x1 in
if p0 = Byte.fromNatural 255 ∧ Byte.≤ (Byte.fromNatural 254) p1 then
Option.none
else
let pl = Char.mkPlane (Byte.fromNatural 0) in
let pos = Char.mkPosition (Word16.fromBytes p0 p1) in
let ch = Char.mkChar (pl, pos) in
Option.some ch
⊦ ∀p p0 p1.
encoder.encode3 p p0 p1 =
let b00 = Byte.shiftRight (Byte.and p (Byte.fromNatural 28)) 2 in
let b0 = Byte.or (Byte.fromNatural 240) b00 in
let b10 = Byte.shiftLeft (Byte.and p (Byte.fromNatural 3)) 4 in
let b11 = Byte.shiftRight (Byte.and p0 (Byte.fromNatural 240)) 4 in
let b1 = Byte.or (Byte.fromNatural 128) (Byte.or b10 b11) in
let b20 = Byte.shiftLeft (Byte.and p0 (Byte.fromNatural 15)) 2 in
let b21 = Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6 in
let b2 = Byte.or (Byte.fromNatural 128) (Byte.or b20 b21) in
let b30 = Byte.and p1 (Byte.fromNatural 63) in
let b3 = Byte.or (Byte.fromNatural 128) b30 in
b0 :: b1 :: b2 :: b3 :: []
⊦ T
⊦ (∀) = λP. P = λx. T
⊦ (⇒) = λp q. p ∧ q ⇔ p
⊦ ∀x y. fst (x, y) = x
⊦ ∀x y. snd (x, y) = y
⊦ (∧) = λp q. (λf. f p q) = λf. f T T