Package char-utf8-def: Definition of the UTF-8 encoding of Unicode characters

Information

namechar-utf8-def
version1.78
descriptionDefinition of the UTF-8 encoding of Unicode characters
authorJoe Leslie-Hurd <joe@gilith.com>
licenseMIT
provenanceHOL Light theory extracted on 2012-09-25
requiresbool
byte
char-def
list
natural
option
pair
parser
word16
showData.Bool
Data.Byte
Data.Char
Data.Char.UTF8
Data.List
Data.Option
Data.Pair
Data.Word16
Number.Natural
Parser
Parser.Stream

Files

Defined Constants

Theorems

parseContinuationByte = parseSome isContinuationByte

decoder = mkParser decoder.parse

decodeStream = parseStream decoder

parseTwoContinuationBytes =
  parsePair parseContinuationByte parseContinuationByte

parseThreeContinuationBytes =
  parsePair parseContinuationByte parseTwoContinuationBytes

bs. decode bs = toList (decodeStream (fromList bs))

chs. encode chs = concat (map encoder chs)

b. isContinuationByte b bit b 7 ¬bit b 6

p1 p0.
    encoder.encode1 p1 p0 =
    let b00 shiftLeft p1 2 in
    let b01 shiftRight (and p0 192) 6 in
    let b0 or 192 (or b00 b01) in
    let b10 and p0 63 in
    let b1 or 128 b10 in
    b0 :: b1 :: []

b0 b1.
    decoder.decode1 b0 b1 =
    let pl mkPlane 0 in
    let p1 shiftRight (and b0 28) 2 in
    let y0 shiftLeft (and b0 3) 6 in
    let x0 and b1 63 in
    let p0 or y0 x0 in
    if p1 = 0 ¬bit p0 7 then none
    else
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

ch.
    encoder ch =
    let (pl, pos) destChar ch in
    let p destPlane pl in
    let (p0, p1) toBytes (destPosition pos) in
    if p = 0 then
      if p1 = 0 ¬bit p0 7 then p0 :: []
      else if and 248 p1 = 0 then encoder.encode1 p1 p0
      else encoder.encode2 p1 p0
    else encoder.encode3 p p1 p0

b0 s.
    decoder.parse b0 s =
    if bit b0 7 then
      if bit b0 6 then
        if bit b0 5 then
          if bit b0 4 then
            if bit b0 3 then none
            else
              parse
                (partialMap (decoder.decode3 b0)
                   parseThreeContinuationBytes) s
          else
            parse
              (partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
        else
          parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
      else none
    else
      let pl mkPlane 0 in
      let pos mkPosition (fromBytes b0 0) in
      let ch mkChar (pl, pos) in
      some (ch, s)

p1 p0.
    encoder.encode2 p1 p0 =
    let b00 shiftRight (and p1 240) 4 in
    let b0 or 224 b00 in
    let b10 shiftLeft (and p1 15) 2 in
    let b11 shiftRight (and p0 192) 6 in
    let b1 or 128 (or b10 b11) in
    let b20 and p0 63 in
    let b2 or 128 b20 in
    b0 :: b1 :: b2 :: []

b0 b1 b2 b3.
    decoder.decode3 b0 (b1, b2, b3) =
    let w shiftLeft (and b0 7) 2 in
    let z shiftRight (and b1 48) 4 in
    let p or w z in
    if p = 0 16 < p then none
    else
      let pl mkPlane p in
      let z1 shiftLeft (and b1 15) 4 in
      let y1 shiftRight (and b2 60) 2 in
      let p1 or z1 y1 in
      let y0 shiftLeft (and b2 3) 6 in
      let x0 and b3 63 in
      let p0 or y0 x0 in
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

b0 b1 b2.
    decoder.decode2 b0 (b1, b2) =
    let z1 shiftLeft (and b0 15) 4 in
    let y1 shiftRight (and b1 60) 2 in
    let p1 or z1 y1 in
    if p1 < 8 216 p1 p1 223 then none
    else
      let y0 shiftLeft (and b1 3) 6 in
      let x0 and b2 63 in
      let p0 or y0 x0 in
      if p1 = 255 254 p0 then none
      else
        let pl mkPlane 0 in
        let pos mkPosition (fromBytes p0 p1) in
        let ch mkChar (pl, pos) in
        some ch

p p1 p0.
    encoder.encode3 p p1 p0 =
    let b00 shiftRight (and p 28) 2 in
    let b0 or 240 b00 in
    let b10 shiftLeft (and p 3) 4 in
    let b11 shiftRight (and p1 240) 4 in
    let b1 or 128 (or b10 b11) in
    let b20 shiftLeft (and p1 15) 2 in
    let b21 shiftRight (and p0 192) 6 in
    let b2 or 128 (or b20 b21) in
    let b30 and p0 63 in
    let b3 or 128 b30 in
    b0 :: b1 :: b2 :: b3 :: []

Input Type Operators

Input Constants

Assumptions

() = λp. p = λx.

() = λp q. p q p

x y. fst (x, y) = x

x y. snd (x, y) = y

() = λp q. (λf. f p q) = λf. f