Problem 50

(難易度 🌟🌟🌟)ハフマン符号

記号とその出現頻度の集合が、fr(S,F) という形の項のリストで与えられているとする。

例:[fr(a,45), fr(b,13), fr(c,12), fr(d,16), fr(e,9), fr(f,5)]

目的は、hc(S,C) という形の項のリストを構成することである。ここで C は記号 S に対するハフマン符号語を表す。

def orderedInsert {α : Type} [Ord α] (a : α) : List α → List α
  | [] => [a]
  | b :: l =>
    match compare a b with
    | .lt => a :: b :: l
    | _ => b :: orderedInsert a l

/-- 挿入ソート -/
def insertionSort {α : Type} [Ord α] : List α → List α
  | [] => []
  | b :: l => orderedInsert b (insertionSort l)

-- これを使ってよい
#check insertionSort


/-- ハフマン木 -/
inductive HuffTree where
  | node (left : HuffTree) (right : HuffTree) (weight : Nat)
  | leaf (c : Char) (weight : Nat)
deriving Inhabited, Repr

def HuffTree.weight : HuffTree → Nat
  | leaf _ w => w
  | node _ _ w => w

def HuffTree.compare (s s' : HuffTree) : Ordering :=
  let w := s.weight
  let w' := s'.weight
  Ord.compare w w'

instance : Ord HuffTree where
  compare := HuffTree.compare

def HuffTree.sort (trees : List HuffTree) : List HuffTree := insertionSort trees


def String.freq (s : String) (c : Char) := s.data.filter (· == c) |>.length

def String.toLeaves (s : String) : List HuffTree :=
  let allChars : List Char := s.data.eraseDups
  allChars.map fun c => HuffTree.leaf c (s.freq c)

partial def HuffTree.merge (trees : List HuffTree) : List HuffTree :=
  let sorted := HuffTree.sort trees
  match sorted with
  | [] => []
  | [tree] => [tree]
  | t1 :: t2 :: rest =>
    let t' := HuffTree.node t1 t2 (t1.weight + t2.weight)
    HuffTree.merge (t' :: rest)

abbrev Code := String

def HuffTree.encode (c : Char) : HuffTree → Option Code
  | .leaf c' _ => if c = c' then some "" else none
  | .node l r _w =>
    match l.encode c, r.encode c with
    | none, some s => some ("1" ++ s)
    | some s, none => some ("0" ++ s)
    | _, _ => none


def huffman (input : List (Char × Nat)) : List (Char × Code) :=
  let leaves : List HuffTree := input.map (fun (c, w) => HuffTree.leaf c w)
  let tree : HuffTree := HuffTree.merge leaves |>.head!
  input.map (fun (c, _) => (c, tree.encode c |>.get!))

#guard huffman [('a', 45),('b', 13),('c', 12),('d', 16),('e', 9),('f', 5)] =
  [('a', "0"),('b', "101"),('c', "100"),('d', "111"),('e', "1101"),('f', "1100")]

#guard huffman [('B', 7),('C', 6),('A', 3),('D', 1),('E', 1),] =
  [('B', "0"), ('C', "11"), ('A', "101"), ('D', "1001"), ('E', "1000")]