deep copy

OCaml

# let orig = Array.init 8 (fun _ -> Array.create 8 "x");;
val orig : string array array =
  [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]

こんな配列を作っていて、これの複製を作りたい、かつその複製に対して変更を加える場合、Array.copyがそれらしく見えるものの、shallow copyなので複製に対する変更が元のarrayに影響する。

# let clone = Array.copy orig;;
val clone : string array array =
  [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]
# clone.(0).(0) <- "o";;
- : unit = ()
# orig;;
- : string array array =
[|[|"o"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]
#

普段、RubyではMarshal.load(Marshal.dump xxx)としているので、同じことがOCamlではできまいか?と思って

# let orig = Array.init 8 (fun _ -> Array.create 8 "x");;
val orig : string array array =
  [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]
# let clone = (Marshal.from_string (Marshal.to_string orig []) 0 : string array array);;
val clone : string array array =
  [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
    [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]
# clone.(0).(0) <- "o";;
- : unit = ()
# orig;;
- : string array array =
[|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|];
  [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]
#

でけた

言語作成とか

このところocamlyacc/ocamllexで簡単なインタープリターを作ろうとしていたのだけど、どうも萌えてこないので、適当な所でやめておいた。

これは確固たる目的(xxxのために新しい言語が必要とか)が無いから萌えてこないのかなぁ、と思ったけれどアルゴリズム系だと特に目的が無くても面白いので、要は言語作成に向いていないのであろうか、と思った今日この頃。

B-Tree

B-Treeアルゴリズムを試してみました。

http://github.com/komamitsu/ocaml-b_tree

B-Treeというのは多分木のアルゴリズムで、木の高さを低く保つことができてキーのバランスが良いので、結構DBのインデックスにそれ系統のやつが使われていてDisk I/O減らせてウマー、とのこと。

# 恥ずかしながら、少し前まで二分木のことかと勘違いしていたのは内緒…

使い方は下記のような感じです。複数のkey-valueを登録してから、全keyについてvalueを取り出しています。木の数を変えつつ。

#use "b_tree.ml"

module IntBTree = BTreeMake(struct   
                              type k = int
                              type v = string
                              let compare a b = a - b
                              let debug = fun () -> ()
                            end)

let _ =
  let _test page_size rec_num =
    Random.self_init ();
    let rec _create_page n page cap_keys =
      if n <= 0 then (page, cap_keys)
      else 
        let key = Random.int max_int in
        let value = Printf.sprintf "_%d_" key in
        (*
        let value =
          if n = 500 then Printf.sprintf "_%d_" (key + 1)
          else Printf.sprintf "_%d_" (key + 1) in
        *)
        let new_page = IntBTree.insert page key value in
        _create_page (n - 1) new_page (key::cap_keys)
    in
    let page, cap_keys =
      _create_page rec_num (IntBTree.create_page page_size) [] in
    let rec _assert = function
      | [] -> ()
      | hd::tl ->
          let expected_value = Printf.sprintf "_%d_" hd in
          match IntBTree.find page hd with
          | Some v ->
              assert (expected_value = v);
              _assert tl
          | None -> failwith "not found"
    in
    _assert cap_keys
  in
  _test    2   1000;
  _test   20   1000;
  _test  100  10000

とはいえ、これでは肝心の木の低さが判らないので、上記のテストコードを少し変更して、findで木を降りていく度に文字列を表示させてみる。

module IntBTree = BTreeMake(struct   
                              type k = int
                              type v = string
                              let compare a b = a - b
                              let debug = fun () -> print_endline "hoge"
                            end)
    :
    :
    let key = List.nth cap_keys 5000 in
    let expected_value = Printf.sprintf "_%d_" key in
    match IntBTree.find page key with
    | Some v ->
        assert (expected_value = v)  
    | None -> failwith "not found"   
  in
  _test 80 100000

実行させてみると

komamitsu@garlic:~/git/ocaml-b_tree$ ocaml test_b_tree.ml
hoge
hoge
hoge
komamitsu@garlic:~/git/ocaml-b_tree$

80分木の場合、100000件が深さ3の木に収まっているそうな。どっとわらい。

隠れパターンマッチとアロケートのコスト

http://rwmj.wordpress.com/2009/08/06/ocaml-internals-part-3-the-minor-heap/ を見ていて興味深かったコードがあったのでコピペして試してみた。

#load "unix.cma"

let f1 (a, b) = (a, b)
let f2 (x : ('a * 'b)) = x
let f3 x = x

let rec loop n f args =
  if n <= 0 then ()
  else
    let _ = f args in
    loop (n - 1) f args

let time name f =
   let start = Unix.gettimeofday () in
   f ();
   let stop = Unix.gettimeofday () in
   Printf.printf "%s: %f sec\n" name (stop -. start)

let _ =
   let loop_count = 100000000 in
   time "f1" (fun _ -> loop loop_count f1 (1, 2));
   time "f2" (fun _ -> loop loop_count f2 (1, 2));
   time "f3" (fun _ -> loop loop_count f3 (1, 2))

関数f1はパターンマッチとタプルのアロケートが発生してしまうらしいメタボなコード、関数f2は同等の型を持つがf1のような内部的な無駄な処理が発生しないもの。関数f3は何となく追加してみたもの。

実行させてみると…

:!ocaml hoge.ml
f1: 21.974001 sec
f2: 18.987098 sec
f3: 19.007618 sec

なるほど、f1の方が多少遅い。多少ね…
まぁ、性能を支配しているコードでは注意した方が良いのかもしれない。

A beginners guide to OCaml internals

http://rwmj.wordpress.com/2009/08/04/ocaml-internals/
(http://d.hatena.ne.jp/camlspotter/20090805/1249476309 経由)

実装について基本的なところを知りたかったので助かるなぁ。

Article was good but fish face at the top scared the shit out of me!

同意。

bench! bench! bench!

Skew Binary Random-Access Lists - komamitsu.logベンチマークをとりたいなぁ、と思っていたところ、OCamlにBenchmarkというそのものズバリなライブラリがあったので試してみました。

# ちなみに件のコードは MIT License に.


aptでのインストールは以下

$sudo aptitude install libbenchmark-ocaml-dev


で、ベンチマーク用のコードは以下. List, Array, Skew binary random access listでランダムアクセスさせまくりです。

let max_size = 1000000

let rec loop f n ac =
  if n <= 0 then ac else loop f (n - 1) (f n ac)

let normal_list =
  loop (fun x l -> x::l) max_size []

module RList = SkewBinaryRandomAccessList
let skew_binary_rlist =
  loop (fun x l -> RList.cons x l) max_size RList.empty

let normal_array = 
  Array.init max_size (fun i -> i)

let _ = Random.self_init ()
let rand () = Random.int (max_size - 1)
let bench_list i = List.nth normal_list i
let bench_rlist i = RList.lookup i skew_binary_rlist
let bench_array i = normal_array.(i)

let _ =
  let res = Benchmark.latencyN 1000 [
    ("normal list   ", bench_list, rand ()); 
    ("skew bin rlist", bench_rlist, rand ());
    ("normal array  ", bench_array, rand ())
  ] in
  Benchmark.tabulate res

で、これをコンパイル.

ocamlfind c -package benchmark -linkpkg -o bench skewBinaryRandomAccessList.cmo bench.ml


さて、いよいよ実行です。

komamitsu@potato:~/git/ocaml-skew-binary-random-access-list$ ./bench 
Latencies for 1000 iterations of normal list   , skew bin rlist, normal array  :
normal list   : 83 WALL (66.46 usr +  0.22 sys = 66.68 CPU) @ 15.00/s (n=1000)
skew bin rlist:  0 WALL ( 0.01 usr +  0.00 sys =  0.01 CPU) @ 125000.00/s (n=1000)
            (warning: too few iterations for a reliable count)
normal array  :  0 WALL ( 0.00 usr +  0.00 sys =  0.00 CPU)
            (warning: too few iterations for a reliable count)
                               Rate       normal list      skew bin rlist normal array  
normal list                  15.0/s                   --            -100%          -100%
skew bin rlist             125000/s              833352%               --          -100%
normal array   999999999999999872/s 6667616699999998976% 799999999999556%             --

やはり、Array > SkewBinaryRandomAccessList > List という性能差がはっきり出ているなぁ。

Listを除いて、さらに負荷をかけてみる(n = 3000000)と…

Latencies for 1000000 iterations of skew bin rlist, normal array  :
skew bin rlist: 10 WALL ( 9.57 usr +  0.04 sys =  9.61 CPU) @ 104030.12/s (n=1000000)
normal array  :  0 WALL ( 0.02 usr +  0.00 sys =  0.02 CPU) @ 49997500.12/s (n=1000000)
            (warning: too few iterations for a reliable count)
                     Rate skew bin rlist normal array
skew bin rlist   104030/s             --          -100%
normal array   49997500/s         47961%             --

n = 3000000 なので、Array = O(1), SkewBinaryRandomAccessList = O(min(i, log(n)), List = O(n) の最悪の場合の計算量をざっくり見積もると、それぞれ 1, 15, 3000000位。

う〜ん、それよりも実測値のほうが差が大きい気がするなぁ。
まぁ、やっぱりArrayは速いし、SkewBinaryRandomAccessListもそこそこ速い。

Skew Binary Random-Access Lists

Linuxの環境整備に熱中していて、すっかり忘れていたけれど、http://www.cs.cmu.edu/~rwh/theses/okasaki.pdf の 6.4.1章について。


これは何かというと、Skew Binary Numbers - komamitsu.logBinary Random-Access List - komamitsu.logを合わせたランダムアクセス向けリストです。


もの凄くあっさりと言うと以下のような特徴を持っています。

  • 二分木の要素数(Node含む)がSkew Binary Numberとなる
  • 繰り上げ・繰り下げの際の挙動もSkew Binary Numberと同じとなる
  • ランダムアクセス時の計算量はO(min(i, log n))でBinary Random-Access Listより少し性能が良いとのこと

内部的な構造について例をあげると…

  • [100; 101] => [(1, Leaf 101); (1, Leaf 100)]
  • [100; 101; 102] => [(3, Node (102, Leaf 101, Leaf 100))]
  • [100; 101; 102; 103] => [(1, Leaf 103); (3, Node (102, Leaf 101, Leaf 100))]
  • [100; 101; 102; 103; 104; 105] => [(3, Node (105, Leaf 104, Leaf 103)); (3, Node (102, Leaf 101, Leaf 100))]

というように、要素を保持していきます(当然、詳細は上記のPDF参照)。


で、いつもの如くこれを実装してみました => http://github.com/komamitsu/ocaml-skew-binary-random-access-list


どうも元の論文(79P: line 23 & 29)のStandardMLのコードが微妙らしく、移植の際には以下のように変更しています。

if i < w div 2 then lookupTree (w div 2, t1, i - 1)
         ↓
if i - 1 < w div 2 then lookupTree (w div 2, t1, i - 1)

で、これの使い方の例はこんな感じです.

open Printf

module M = SkewBinaryRandomAccessList

let p s i = printf "%s : %d\n" s i

let _ =
  let l = [100; 101; 102; 103; 104; 105; 106; 107; 108; 109] in
  let rl = List.fold_left (fun rl x -> M.cons x rl) M.empty l in
  ignore (p "the head of rl" (M.head rl));
  ignore (p "the 2nd of rl" (M.lookup (2 - 1) rl));
  ignore (p "the 5th of rl" (M.lookup (5 - 1) rl));
  ignore (p "the 10th of rl" (M.lookup (10 - 1) rl));
  let the_tail_of_rl = M.tail rl in
  ignore (p "the 2nd of rl via tail" (M.head the_tail_of_rl));
  let rl_replaced_the_1st_with_99 = M.update (1 - 1) 99 rl in
  ignore (p "the head of updated rl" (M.head rl_replaced_the_1st_with_99));
  let rl_replaced_the_9th_with_999 = M.update (9 - 1) 999 rl in
  ignore (p "the 9th of updated rl" (M.lookup (9 - 1) rl_replaced_the_9th_with_999))

これを実行してみると

komamitsu@potato:~/git/ocaml-skew-binary-random-access-list$ ocamlc -o sample skewBinaryRandomAccessList.cmo sample.ml
komamitsu@potato:~/git/ocaml-skew-binary-random-access-list$ ./sample 
the head of rl : 109
the 2nd of rl : 108
the 5th of rl : 105
the 10th of rl : 100
the 2nd of rl via tail : 108
the head of updated rl : 99
the 9th of updated rl : 999

と、きちんと動作しているようです。

後日、暇があったらListとかと性能比較をしてみたいなぁ、なんて。