Package char-utf8: Theory of UTF-8 encoders and decoders

Information

namechar-utf8
version1.8
descriptionTheory of UTF-8 encoders and decoders
authorJoe Hurd <joe@gilith.com>
licenseMIT
show Data.Bool
Data.Byte as Byte
Data.Char as Char
Data.Char.UTF8
Data.List
Data.Option as Option
Data.Pair
Data.Word16 as Word16
Number.Natural as Natural
Number.Numeral
Parser

Files

Defined Constants

Theorems

isParser decoder.parse

inverse decoder encoder

strongInverse decoder encoder

parseContinuationByte = parseSome isContinuationByte

decoder = mkParser decoder.parse

decodeStream = parseStream decoder

destParser decoder = decoder.parse

parseTwoContinuationBytes =
  parsePair parseContinuationByte parseContinuationByte

parseThreeContinuationBytes =
  parsePair parseContinuationByte parseTwoContinuationBytes

parse decoder = Stream.case Option.none Option.none decoder.parse

cs. Natural.≤ (length cs) (length (encode cs))

cs. decode (encode cs) = Option.some cs

bs. Natural.≤ (Stream.length (decodeStream bs)) (Stream.length bs)

bs. decode bs = Stream.toList (decodeStream (Stream.fromList bs))

chs. encode chs = concat (map encoder chs)

bs. Option.case T (λcs. encode cs = bs) (decode bs)

bs. Option.case T (λcs. Natural.≤ (length cs) (length bs)) (decode bs)

b. isContinuationByte b Byte.bit b 7 ¬Byte.bit b 6

p0 p1.
    encoder.encode1 p0 p1 =
    let b00 = Byte.shiftLeft p0 2 in
    let b01 = Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6 in
    let b0 = Byte.or (Byte.fromNatural 192) (Byte.or b00 b01) in
    let b10 = Byte.and p1 (Byte.fromNatural 63) in
    let b1 = Byte.or (Byte.fromNatural 128) b10 in
    b0 :: b1 :: []

ch.
    encoder ch =
    let (pl, pos) = Char.destChar ch in
    let p = Char.destPlane pl in
    let (p0, p1) = Word16.toBytes (Char.destPosition pos) in
    if p = Byte.fromNatural 0 then
      if p0 = Byte.fromNatural 0 ¬Byte.bit p1 7 then p1 :: []
      else if Byte.and (Byte.fromNatural 248) p0 = Byte.fromNatural 0 then
        encoder.encode1 p0 p1
      else encoder.encode2 p0 p1
    else encoder.encode3 p p0 p1

b0 s.
    decoder.parse b0 s =
    if Byte.bit b0 7 then
      if Byte.bit b0 6 then
        if Byte.bit b0 5 then
          if Byte.bit b0 4 then
            if Byte.bit b0 3 then Option.none
            else
              parse
                (partialMap (decoder.decode3 b0)
                   parseThreeContinuationBytes) s
          else
            parse
              (partialMap (decoder.decode2 b0) parseTwoContinuationBytes) s
        else
          parse (partialMap (decoder.decode1 b0) parseContinuationByte) s
      else Option.none
    else
      let pl = Char.mkPlane (Byte.fromNatural 0) in
      let pos =
          Char.mkPosition (Word16.fromBytes (Byte.fromNatural 0) b0) in
      let ch = Char.mkChar (pl, pos) in
      Option.some (ch, s)

b0 b1.
    decoder.decode1 b0 b1 =
    let pl = Char.mkPlane (Byte.fromNatural 0) in
    let p0 = Byte.shiftRight (Byte.and b0 (Byte.fromNatural 28)) 2 in
    let y1 = Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 3)) 6 in
    let x1 = Byte.and b1 (Byte.fromNatural 63) in
    let p1 = Byte.or y1 x1 in
    if p0 = Byte.fromNatural 0 Byte.< p1 (Byte.fromNatural 128) then
      Option.none
    else
      let pos = Char.mkPosition (Word16.fromBytes p0 p1) in
      let ch = Char.mkChar (pl, pos) in
      Option.some ch

p0 p1.
    encoder.encode2 p0 p1 =
    let b00 = Byte.shiftRight (Byte.and p0 (Byte.fromNatural 240)) 4 in
    let b0 = Byte.or (Byte.fromNatural 224) b00 in
    let b10 = Byte.shiftLeft (Byte.and p0 (Byte.fromNatural 15)) 2 in
    let b11 = Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6 in
    let b1 = Byte.or (Byte.fromNatural 128) (Byte.or b10 b11) in
    let b20 = Byte.and p1 (Byte.fromNatural 63) in
    let b2 = Byte.or (Byte.fromNatural 128) b20 in
    b0 :: b1 :: b2 :: []

b0 b1 b2 b3.
    decoder.decode3 b0 (b1, b2, b3) =
    let w = Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 7)) 2 in
    let z = Byte.shiftRight (Byte.and b1 (Byte.fromNatural 48)) 4 in
    let p = Byte.or w z in
    if p = Byte.fromNatural 0 Byte.< (Byte.fromNatural 16) p then
      Option.none
    else
      let pl = Char.mkPlane p in
      let z0 = Byte.shiftLeft (Byte.and b1 (Byte.fromNatural 15)) 4 in
      let y0 = Byte.shiftRight (Byte.and b2 (Byte.fromNatural 60)) 2 in
      let p0 = Byte.or z0 y0 in
      let y1 = Byte.shiftLeft (Byte.and b2 (Byte.fromNatural 3)) 6 in
      let x1 = Byte.and b3 (Byte.fromNatural 63) in
      let p1 = Byte.or y1 x1 in
      let pos = Char.mkPosition (Word16.fromBytes p0 p1) in
      let ch = Char.mkChar (pl, pos) in
      Option.some ch

b0 b1 b2.
    decoder.decode2 b0 (b1, b2) =
    let z0 = Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 15)) 4 in
    let y0 = Byte.shiftRight (Byte.and b1 (Byte.fromNatural 60)) 2 in
    let p0 = Byte.or z0 y0 in
    if Byte.< p0 (Byte.fromNatural 8)
       Byte.≤ (Byte.fromNatural 216) p0 Byte.≤ p0 (Byte.fromNatural 223)
    then Option.none
    else
      let y1 = Byte.shiftLeft (Byte.and b1 (Byte.fromNatural 3)) 6 in
      let x1 = Byte.and b2 (Byte.fromNatural 63) in
      let p1 = Byte.or y1 x1 in
      if p0 = Byte.fromNatural 255 Byte.≤ (Byte.fromNatural 254) p1 then
        Option.none
      else
        let pl = Char.mkPlane (Byte.fromNatural 0) in
        let pos = Char.mkPosition (Word16.fromBytes p0 p1) in
        let ch = Char.mkChar (pl, pos) in
        Option.some ch

p p0 p1.
    encoder.encode3 p p0 p1 =
    let b00 = Byte.shiftRight (Byte.and p (Byte.fromNatural 28)) 2 in
    let b0 = Byte.or (Byte.fromNatural 240) b00 in
    let b10 = Byte.shiftLeft (Byte.and p (Byte.fromNatural 3)) 4 in
    let b11 = Byte.shiftRight (Byte.and p0 (Byte.fromNatural 240)) 4 in
    let b1 = Byte.or (Byte.fromNatural 128) (Byte.or b10 b11) in
    let b20 = Byte.shiftLeft (Byte.and p0 (Byte.fromNatural 15)) 2 in
    let b21 = Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6 in
    let b2 = Byte.or (Byte.fromNatural 128) (Byte.or b20 b21) in
    let b30 = Byte.and p1 (Byte.fromNatural 63) in
    let b3 = Byte.or (Byte.fromNatural 128) b30 in
    b0 :: b1 :: b2 :: b3 :: []

Input Type Operators

Input Constants

Assumptions

T

n. Natural.≤ 0 n

x. Stream.isSuffix x x

F p. p

let = λf x. f x

t. t ¬t

p. Char.isPosition p T

(¬) = λp. p F

t. (x. t) t

t. (λx. t x) = t

() = λP. P = λx. T

a'. ¬(Option.none = Option.some a')

x. x = x T

n. ¬(Natural.suc n = 0)

Byte.width = 8

n. bit0 n = Natural.+ n n

l. Stream.length (Stream.fromList l) = length l

l. Stream.toList (Stream.fromList l) = Option.some l

Word16.width = 16

() = λp q. p q p

t. (t T) (t F)

n. bit1 n = Natural.suc (Natural.+ n n)

x y. fst (x, y) = x

x y. snd (x, y) = y

P x. P x P ((select) P)

(¬T F) (¬F T)

f y. (λx. f x) y = f y

x y. x = y y = x

x y. x = y y = x

t1 t2. t1 t2 t2 t1

m n. Natural.- (Natural.+ m n) n = m

x y. Stream.isProperSuffix x y Stream.isSuffix x y

p s. Natural.≤ (Stream.length (parseStream p s)) (Stream.length s)

n. Natural.* 2 n = Natural.+ n n

m n. ¬Natural.≤ m n Natural.< n m

m n. Natural.≤ (Natural.suc m) n Natural.< m n

x. x = Option.none a. x = Option.some a

s. Option.case T (λl. length l = Stream.length s) (Stream.toList s)

P. (b. P b) P T P F

() = λp q. (λf. f p q) = λf. f T T

() = λP. q. (x. P x q) q

a a'. Option.some a = Option.some a' a = a'

w1 w2. Byte.Bits.fromWord w1 = Byte.Bits.fromWord w2 w1 = w2

m n. Natural.suc m = Natural.suc n m = n

m n. Natural.- m n = 0 Natural.≤ m n

w1 w2.
    Byte.Bits.compare F (Byte.Bits.fromWord w1) (Byte.Bits.fromWord w2)
    Byte.< w1 w2

w1 w2.
    Byte.Bits.compare T (Byte.Bits.fromWord w1) (Byte.Bits.fromWord w2)
    Byte.≤ w1 w2

b0 b1.
    Word16.Bits.toWord (Byte.Bits.fromWord b1 @ Byte.Bits.fromWord b0) =
    Word16.fromBytes b0 b1

w1 w2.
    Word16.Bits.compare F (Word16.Bits.fromWord w1)
      (Word16.Bits.fromWord w2) Word16.< w1 w2

m n. Natural.even (Natural.* m n) Natural.even m Natural.even n

m n. Natural.even (Natural.+ m n) Natural.even m Natural.even n

p. Char.isPlane p Byte.< p (Byte.fromNatural 17)

f g. f = g x. f x = g x

() = λp q. r. (p r) (q r) r

(Natural.even 0 T) n. Natural.even (Natural.suc n) ¬Natural.even n

(Natural.odd 0 F) n. Natural.odd (Natural.suc n) ¬Natural.odd n

w1 w2.
    Byte.and w1 w2 =
    Byte.Bits.toWord
      (zipWith () (Byte.Bits.fromWord w1) (Byte.Bits.fromWord w2))

w1 w2.
    Byte.or w1 w2 =
    Byte.Bits.toWord
      (zipWith () (Byte.Bits.fromWord w1) (Byte.Bits.fromWord w2))

m n. Natural.≤ m n Natural.< m n m = n

m n. Natural.odd (Natural.+ m n) ¬(Natural.odd m Natural.odd n)

l n.
    Byte.shiftLeft (Byte.Bits.toWord l) n =
    Byte.Bits.toWord (replicate n F @ l)

PAIR'. fn. a0 a1. fn (a0, a1) = PAIR' a0 a1

pl. b. Char.isPlane b pl = Char.mkPlane b Char.destPlane pl = b

pos.
    w.
      Char.isPosition w pos = Char.mkPosition w
      Char.destPosition pos = w

t1 t2 t3. t1 t2 t3 (t1 t2) t3

n.
    Byte.Bits.toWord
      (Natural.odd n ::
       Byte.Bits.fromWord (Byte.fromNatural (Natural.div n 2))) =
    Byte.fromNatural n

n.
    Word16.Bits.toWord
      (Natural.odd n ::
       Word16.Bits.fromWord (Word16.fromNatural (Natural.div n 2))) =
    Word16.fromNatural n

t1 t2. (if T then t1 else t2) = t1 (if F then t1 else t2) = t2

m n. Natural.* m n = 0 m = 0 n = 0

length [] = 0 h t. length (h :: t) = Natural.suc (length t)

w. b0 b1. w = Word16.fromBytes b0 b1 Word16.toBytes w = (b0, b1)

(t. ¬¬t t) (¬T F) (¬F T)

(a. Char.mkChar (Char.destChar a) = a)
  r. Char.isChar r Char.destChar (Char.mkChar r) = r

(a. Char.mkPlane (Char.destPlane a) = a)
  r. Char.isPlane r Char.destPlane (Char.mkPlane r) = r

(a. Char.mkPosition (Char.destPosition a) = a)
  r. Char.isPosition r Char.destPosition (Char.mkPosition r) = r

(a. mkParser (destParser a) = a)
  r. isParser r destParser (mkParser r) = r

b t1 t2. (if b then t1 else t2) (¬b t1) (b t2)

m n p.
    Natural.* m (Natural.+ n p) = Natural.+ (Natural.* m n) (Natural.* m p)

m n p.
    Natural.* (Natural.+ m n) p = Natural.+ (Natural.* m p) (Natural.* n p)

x. x = Stream.error x = Stream.eof a0 a1. x = Stream.stream a0 a1

p.
    parse (parseSome p) =
    Stream.case Option.none Option.none
      (λa s. if p a then Option.some (a, s) else Option.none)

b f x y. f (if b then x else y) = if b then f x else f y

b f g x. (if b then f else g) x = if b then f x else g x

p e l.
    inverse p e
    parseStream p (Stream.fromList (concat (map e l))) = Stream.fromList l

m n p. Natural.* m n = Natural.* m p m = 0 n = p

m n p. Natural.≤ (Natural.* m n) (Natural.* m p) m = 0 Natural.≤ n p

l n.
    Byte.bit (Byte.Bits.toWord l) n
    Natural.< n Byte.width Natural.< n (length l) nth n l

m n p.
    Natural.< (Natural.* m n) (Natural.* m p) ¬(m = 0) Natural.< n p

(x. replicate 0 x = [])
  n x. replicate (Natural.suc n) x = x :: replicate n x

h1 h2 t1 t2. h1 :: t1 = h2 :: t2 h1 = h2 t1 = t2

x y a b. (x, y) = (a, b) x = a y = b

p e.
    inverse p e
    x s. parse p (Stream.append (e x) s) = Option.some (x, s)

l.
    Byte.Bits.fromWord (Byte.Bits.toWord l) =
    if Natural.≤ (length l) Byte.width then
      l @ replicate (Natural.- Byte.width (length l)) F
    else take Byte.width l

l.
    Word16.Bits.fromWord (Word16.Bits.toWord l) =
    if Natural.≤ (length l) Word16.width then
      l @ replicate (Natural.- Word16.width (length l)) F
    else take Word16.width l

(l. drop 0 l = l) n h t. drop (Natural.suc n) (h :: t) = drop n t

c.
    pl pos.
      Char.isChar (pl, pos) c = Char.mkChar (pl, pos)
      Char.destChar c = (pl, pos)

(b f. Option.case b f Option.none = b)
  b f a. Option.case b f (Option.some a) = f a

p e s.
    strongInverse p e
    Option.case T (λl. Stream.toList s = Option.some (concat (map e l)))
      (Stream.toList (parseStream p s))

(l. [] @ l = l) h t l. (h :: t) @ l = h :: t @ l

(s. Stream.append [] s = s)
  h t s. Stream.append (h :: t) s = Stream.stream h (Stream.append t s)

w.
    (Byte.Bits.toWord (drop 8 (Word16.Bits.fromWord w)),
     Byte.Bits.toWord (take 8 (Word16.Bits.fromWord w))) = Word16.toBytes w

p s.
    parse p s = Option.none
    b s'. parse p s = Option.some (b, s') Stream.isProperSuffix s' s

(l. take 0 l = [])
  n h t. take (Natural.suc n) (h :: t) = h :: take n t

p.
    isParser p
    x xs. Option.case T (λ(y, xs'). Stream.isSuffix xs' xs) (p x xs)

(m. Natural.≤ m 0 m = 0)
  m n. Natural.≤ m (Natural.suc n) m = Natural.suc n Natural.≤ m n

(h t. nth 0 (h :: t) = h)
  h t n. nth (Natural.suc n) (h :: t) = nth n t

t. ((T t) t) ((t T) t) ((F t) ¬t) ((t F) ¬t)

m n q r.
    m = Natural.+ (Natural.* q n) r Natural.< r n
    Natural.div m n = q Natural.mod m n = r

(p. parse p Stream.error = Option.none)
  (p. parse p Stream.eof = Option.none)
  p a s. parse p (Stream.stream a s) = destParser p a s

Byte.Bits.toWord [] = Byte.fromNatural 0
  h t.
    Byte.Bits.toWord (h :: t) =
    if h then
      Byte.+ (Byte.shiftLeft (Byte.Bits.toWord t) 1) (Byte.fromNatural 1)
    else Byte.shiftLeft (Byte.Bits.toWord t) 1

Word16.Bits.toWord [] = Word16.fromNatural 0
  h t.
    Word16.Bits.toWord (h :: t) =
    if h then
      Word16.+ (Word16.shiftLeft (Word16.Bits.toWord t) 1)
        (Word16.fromNatural 1)
    else Word16.shiftLeft (Word16.Bits.toWord t) 1

t. (T t t) (t T t) (F t F) (t F F) (t t t)

t. (T t T) (t T T) (F t t) (t F t) (t t t)

p e.
    strongInverse p e
    inverse p e
    s x s'. parse p s = Option.some (x, s') s = Stream.append (e x) s'

t. (T t t) (t T T) (F t T) (t t T) (t F ¬t)

(f. zipWith f [] [] = [])
  f h1 h2 t1 t2.
    zipWith f (h1 :: t1) (h2 :: t2) = f h1 h2 :: zipWith f t1 t2

b.
    x0 x1 x2 x3 x4 x5 x6 x7.
      b =
      Byte.Bits.toWord (x0 :: x1 :: x2 :: x3 :: x4 :: x5 :: x6 :: x7 :: [])

f p s.
    parse (partialMap f p) s =
    Option.case Option.none
      (λ(b, s'). Option.case Option.none (λc. Option.some (c, s')) (f b))
      (parse p s)

m n p.
    Natural.+ m n = Natural.+ n m
    Natural.+ (Natural.+ m n) p = Natural.+ m (Natural.+ n p)
    Natural.+ m (Natural.+ n p) = Natural.+ n (Natural.+ m p)

l n.
    Byte.shiftRight (Byte.Bits.toWord l) n =
    if Natural.≤ (length l) Byte.width then
      if Natural.≤ (length l) n then Byte.Bits.toWord []
      else Byte.Bits.toWord (drop n l)
    else if Natural.≤ Byte.width n then Byte.Bits.toWord []
    else Byte.Bits.toWord (drop n (take Byte.width l))

(n. Natural.+ 0 n = n) (m. Natural.+ m 0 = m)
  (m n. Natural.+ (Natural.suc m) n = Natural.suc (Natural.+ m n))
  m n. Natural.+ m (Natural.suc n) = Natural.suc (Natural.+ m n)

(q. Byte.Bits.compare q [] [] q)
  q h1 h2 t1 t2.
    Byte.Bits.compare q (h1 :: t1) (h2 :: t2)
    Byte.Bits.compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

(q. Word16.Bits.compare q [] [] q)
  q h1 h2 t1 t2.
    Word16.Bits.compare q (h1 :: t1) (h2 :: t2)
    Word16.Bits.compare (¬h1 h2 ¬(h1 ¬h2) q) t1 t2

(e b f. Stream.case e b f Stream.error = e)
  (e b f. Stream.case e b f Stream.eof = b)
  e b f a s. Stream.case e b f (Stream.stream a s) = f a s

pb pc s.
    parse (parsePair pb pc) s =
    Option.case Option.none
      (λ(b, s').
         Option.case Option.none (λ(c, s''). Option.some ((b, c), s''))
           (parse pc s')) (parse pb s)

p q r.
    (p q q p) ((p q) r p q r) (p q r q p r)
    (p p p) (p p q p q)

(n. Natural.* 0 n = 0) (m. Natural.* m 0 = 0)
  (n. Natural.* 1 n = n) (m. Natural.* m 1 = m)
  (m n. Natural.* (Natural.suc m) n = Natural.+ (Natural.* m n) n)
  m n. Natural.* m (Natural.suc n) = Natural.+ m (Natural.* m n)

pl pos.
    Char.isChar (pl, pos)
    let pli = Char.destPlane pl in
    let posi = Char.destPosition pos in
    ¬(pli = Byte.fromNatural 0)
    Word16.< posi (Word16.fromNatural 55296)
    Word16.< (Word16.fromNatural 57343) posi
    Word16.< posi (Word16.fromNatural 65534)