Package char-utf8-def: Definition of the UTF-8 encoding of Unicode characters

Information

namechar-utf8-def
version1.96
descriptionDefinition of the UTF-8 encoding of Unicode characters
authorJoe Leslie-Hurd <joe@gilith.com>
licenseMIT
provenanceHOL Light theory extracted on 2015-03-10
checksum89899eece90982465f9fe7797e06be02e68a53f5
requiresbase
byte
char-def
parser
showData.Bool
Data.Byte
Data.Char
Data.List
Data.Option
Data.Pair
Data.Sum
Number.Natural
Parser
Parser.Stream

Files

Defined Constants

Theorems

UTF8.parseNatural = orelse UTF8.parseAscii UTF8.parseMultibyte

n. UTF8.encodeAscii n = fromNatural n :: []

c. UTF8.encode c = concat (map UTF8.encodeChar c)

c. UTF8.reencode c = concat (map UTF8.reencodeChar c)

UTF8.parse = orelse (map UTF8.parseChar right) (map any left)

b. UTF8.decode b = fst (toList (parse UTF8.parse (fromList b)))

UTF8.parseChar =
  mapPartial UTF8.parseNatural
    (λn. if invariant n then some (mk n) else none)

x.
    UTF8.reencodeChar x =
    case x of left b b :: [] | right c UTF8.encodeChar c

UTF8.parseAscii =
  token (λb. if bit b 7 then none else some (toNatural b))

b. UTF8.isContinuationByte b bit b 7 ¬bit b 6

b n.
    UTF8.parseMultibyte.addContinuationByte b n =
    if UTF8.isContinuationByte b then
      some (toNatural (and b 63) + Bits.shiftLeft n 6)
    else none

b.
    UTF8.parseMultibyte.parse2 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 0
         (toNatural (and b 31))) (λn. 128 n)

b.
    UTF8.parseMultibyte.parse3 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 1
         (toNatural (and b 15))) (λn. 2048 n)

b.
    UTF8.parseMultibyte.parse4 b =
    filter
      (foldN UTF8.parseMultibyte.addContinuationByte 2
         (toNatural (and b 7))) (λn. 65536 n)

UTF8.parseMultibyte =
  sequence
    (token
       (λb.
          if bit b 6 then
            if bit b 5 then
              if bit b 4 then
                if bit b 3 then none
                else some (UTF8.parseMultibyte.parse4 b)
              else some (UTF8.parseMultibyte.parse3 b)
            else some (UTF8.parseMultibyte.parse2 b)
          else none))

n.
    UTF8.encodeChar.encode2 n =
    let n1 Bits.shiftRight n 6 in
    let b0 or 192 (fromNatural n1) in
    let b1 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: []

c.
    UTF8.encodeChar c =
    let n dest c in
    if n < 128 then UTF8.encodeAscii n
    else if n < 2048 then UTF8.encodeChar.encode2 n
    else if n < 65536 then UTF8.encodeChar.encode3 n
    else UTF8.encodeChar.encode4 n

n.
    UTF8.encodeChar.encode3 n =
    let n1 Bits.shiftRight n 6 in
    let n2 Bits.shiftRight n1 6 in
    let b0 or 224 (fromNatural n2) in
    let b1 or 128 (fromNatural (Bits.bound n1 6)) in
    let b2 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: b2 :: []

n.
    UTF8.encodeChar.encode4 n =
    let n1 Bits.shiftRight n 6 in
    let n2 Bits.shiftRight n1 6 in
    let n3 Bits.shiftRight n2 6 in
    let b0 or 240 (fromNatural n3) in
    let b1 or 128 (fromNatural (Bits.bound n2 6)) in
    let b2 or 128 (fromNatural (Bits.bound n1 6)) in
    let b3 or 128 (fromNatural (Bits.bound n 6)) in
    b0 :: b1 :: b2 :: b3 :: []

External Type Operators

External Constants

Assumptions

() = λp. p = λx.