Package char-utf8-def: Definition of the UTF-8 encoding of Unicode characters

Information

namechar-utf8-def
version1.38
descriptionDefinition of the UTF-8 encoding of Unicode characters
authorJoe Hurd <joe@gilith.com>
licenseMIT
provenanceHOL Light theory extracted on 2012-01-29
requiresbool
pair
showData.Bool
Data.Byte
Data.Char
Data.Char.UTF8
Data.List
Data.Option
Data.Pair
Data.Word16
Number.Natural
Parser
Parser.Stream

Files

Defined Constants

Theorems

parseContinuationByte = parseSome isContinuationByte

decoder = mkParser decoder.parse

decodeStream = parseStream decoder

parseTwoContinuationBytes =
  parsePair parseContinuationByte parseContinuationByte

parseThreeContinuationBytes =
  parsePair parseContinuationByte parseTwoContinuationBytes

bs. decode bs = toList (decodeStream (fromList bs))

chs. encode chs = concat (map encoder chs)

b. isContinuationByte b bit b 7 ¬bit b 6

p0 p1.
    encoder.encode1 p0 p1 =
    let b00 shiftLeft p0 2 in
    let b01 shiftRight (and p1 192) 6 in
    let b0 or 192 (or b00 b01) in
    let b10 and p1 63 in
    let b1 or 128 b10 in
    b0 :: b1 :: []

ch.
    encoder ch =
    let (pl, pos) destChar ch in
    let p destPlane pl in
    let (p0, p1) toBytes (destPosition pos) in
    if p = 0 then
      if p0 = 0 ¬bit p1 7 then p1 :: []
      else if and 248 p0 = 0 then encoder.encode1 p0 p1
      else encoder.encode2 p0 p1
    else encoder.encode3 p p0 p1

b0 s.
    decoder.parse b0 s =
    if bit b0 7 then
      if bit b0 6 then
        if bit b0 5 then
          if bit b0 4 then
            if bit b0 3 then none
            else
              parse
                (partialMap (decoder.decode3 b0)
                   parseThreeContinuationBytes) s
          else
            parse
              (partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
        else
          parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
      else none
    else
      let pl mkPlane 0 in
      let pos mkPosition (fromBytes 0 b0) in
      let ch mkChar (pl, pos) in
      some (ch, s)

b0 b1.
    decoder.decode1 b0 b1 =
    let pl mkPlane 0 in
    let p0 shiftRight (and b0 28) 2 in
    let y1 shiftLeft (and b0 3) 6 in
    let x1 and b1 63 in
    let p1 or y1 x1 in
    if p0 = 0 p1 < 128 then none
    else
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

p0 p1.
    encoder.encode2 p0 p1 =
    let b00 shiftRight (and p0 240) 4 in
    let b0 or 224 b00 in
    let b10 shiftLeft (and p0 15) 2 in
    let b11 shiftRight (and p1 192) 6 in
    let b1 or 128 (or b10 b11) in
    let b20 and p1 63 in
    let b2 or 128 b20 in
    b0 :: b1 :: b2 :: []

b0 b1 b2 b3.
    decoder.decode3 b0 (b1, b2, b3) =
    let w shiftLeft (and b0 7) 2 in
    let z shiftRight (and b1 48) 4 in
    let p or w z in
    if p = 0 16 < p then none
    else
      let pl mkPlane p in
      let z0 shiftLeft (and b1 15) 4 in
      let y0 shiftRight (and b2 60) 2 in
      let p0 or z0 y0 in
      let y1 shiftLeft (and b2 3) 6 in
      let x1 and b3 63 in
      let p1 or y1 x1 in
      let pos mkPosition (fromBytes p0 p1) in
      let ch mkChar (pl, pos) in
      some ch

b0 b1 b2.
    decoder.decode2 b0 (b1, b2) =
    let z0 shiftLeft (and b0 15) 4 in
    let y0 shiftRight (and b1 60) 2 in
    let p0 or z0 y0 in
    if p0 < 8 216 p0 p0 223 then none
    else
      let y1 shiftLeft (and b1 3) 6 in
      let x1 and b2 63 in
      let p1 or y1 x1 in
      if p0 = 255 254 p1 then none
      else
        let pl mkPlane 0 in
        let pos mkPosition (fromBytes p0 p1) in
        let ch mkChar (pl, pos) in
        some ch

p p0 p1.
    encoder.encode3 p p0 p1 =
    let b00 shiftRight (and p 28) 2 in
    let b0 or 240 b00 in
    let b10 shiftLeft (and p 3) 4 in
    let b11 shiftRight (and p0 240) 4 in
    let b1 or 128 (or b10 b11) in
    let b20 shiftLeft (and p0 15) 2 in
    let b21 shiftRight (and p1 192) 6 in
    let b2 or 128 (or b20 b21) in
    let b30 and p1 63 in
    let b3 or 128 b30 in
    b0 :: b1 :: b2 :: b3 :: []

Input Type Operators

Input Constants

Assumptions

T

() = λp. p = λx. T

() = λp q. p q p

x y. fst (x, y) = x

x y. snd (x, y) = y

() = λp q. (λf. f p q) = λf. f T T