deep copy
# let orig = Array.init 8 (fun _ -> Array.create 8 "x");; val orig : string array array = [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|]
こんな配列を作っていて、これの複製を作りたい、かつその複製に対して変更を加える場合、Array.copyがそれらしく見えるものの、shallow copyなので複製に対する変更が元のarrayに影響する。
# let clone = Array.copy orig;; val clone : string array array = [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|] # clone.(0).(0) <- "o";; - : unit = () # orig;; - : string array array = [|[|"o"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|] #
普段、RubyではMarshal.load(Marshal.dump xxx)としているので、同じことがOCamlではできまいか?と思って
# let orig = Array.init 8 (fun _ -> Array.create 8 "x");; val orig : string array array = [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|] # let clone = (Marshal.from_string (Marshal.to_string orig []) 0 : string array array);; val clone : string array array = [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|] # clone.(0).(0) <- "o";; - : unit = () # orig;; - : string array array = [|[|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]; [|"x"; "x"; "x"; "x"; "x"; "x"; "x"; "x"|]|] #
でけた
B-Tree
B-Treeアルゴリズムを試してみました。
http://github.com/komamitsu/ocaml-b_tree
B-Treeというのは多分木のアルゴリズムで、木の高さを低く保つことができてキーのバランスが良いので、結構DBのインデックスにそれ系統のやつが使われていてDisk I/O減らせてウマー、とのこと。
# 恥ずかしながら、少し前まで二分木のことかと勘違いしていたのは内緒…
使い方は下記のような感じです。複数のkey-valueを登録してから、全keyについてvalueを取り出しています。木の数を変えつつ。
#use "b_tree.ml" module IntBTree = BTreeMake(struct type k = int type v = string let compare a b = a - b let debug = fun () -> () end) let _ = let _test page_size rec_num = Random.self_init (); let rec _create_page n page cap_keys = if n <= 0 then (page, cap_keys) else let key = Random.int max_int in let value = Printf.sprintf "_%d_" key in (* let value = if n = 500 then Printf.sprintf "_%d_" (key + 1) else Printf.sprintf "_%d_" (key + 1) in *) let new_page = IntBTree.insert page key value in _create_page (n - 1) new_page (key::cap_keys) in let page, cap_keys = _create_page rec_num (IntBTree.create_page page_size) [] in let rec _assert = function | [] -> () | hd::tl -> let expected_value = Printf.sprintf "_%d_" hd in match IntBTree.find page hd with | Some v -> assert (expected_value = v); _assert tl | None -> failwith "not found" in _assert cap_keys in _test 2 1000; _test 20 1000; _test 100 10000
とはいえ、これでは肝心の木の低さが判らないので、上記のテストコードを少し変更して、findで木を降りていく度に文字列を表示させてみる。
module IntBTree = BTreeMake(struct type k = int type v = string let compare a b = a - b let debug = fun () -> print_endline "hoge" end) : : let key = List.nth cap_keys 5000 in let expected_value = Printf.sprintf "_%d_" key in match IntBTree.find page key with | Some v -> assert (expected_value = v) | None -> failwith "not found" in _test 80 100000
実行させてみると
komamitsu@garlic:~/git/ocaml-b_tree$ ocaml test_b_tree.ml hoge hoge hoge komamitsu@garlic:~/git/ocaml-b_tree$
80分木の場合、100000件が深さ3の木に収まっているそうな。どっとわらい。
隠れパターンマッチとアロケートのコスト
http://rwmj.wordpress.com/2009/08/06/ocaml-internals-part-3-the-minor-heap/ を見ていて興味深かったコードがあったのでコピペして試してみた。
#load "unix.cma" let f1 (a, b) = (a, b) let f2 (x : ('a * 'b)) = x let f3 x = x let rec loop n f args = if n <= 0 then () else let _ = f args in loop (n - 1) f args let time name f = let start = Unix.gettimeofday () in f (); let stop = Unix.gettimeofday () in Printf.printf "%s: %f sec\n" name (stop -. start) let _ = let loop_count = 100000000 in time "f1" (fun _ -> loop loop_count f1 (1, 2)); time "f2" (fun _ -> loop loop_count f2 (1, 2)); time "f3" (fun _ -> loop loop_count f3 (1, 2))
関数f1はパターンマッチとタプルのアロケートが発生してしまうらしいメタボなコード、関数f2は同等の型を持つがf1のような内部的な無駄な処理が発生しないもの。関数f3は何となく追加してみたもの。
実行させてみると…
:!ocaml hoge.ml f1: 21.974001 sec f2: 18.987098 sec f3: 19.007618 sec
なるほど、f1の方が多少遅い。多少ね…
まぁ、性能を支配しているコードでは注意した方が良いのかもしれない。
A beginners guide to OCaml internals
http://rwmj.wordpress.com/2009/08/04/ocaml-internals/
(http://d.hatena.ne.jp/camlspotter/20090805/1249476309 経由)
実装について基本的なところを知りたかったので助かるなぁ。
Article was good but fish face at the top scared the shit out of me!
同意。
bench! bench! bench!
Skew Binary Random-Access Lists - komamitsu.logのベンチマークをとりたいなぁ、と思っていたところ、OCamlにBenchmarkというそのものズバリなライブラリがあったので試してみました。
# ちなみに件のコードは MIT License に.
aptでのインストールは以下
$sudo aptitude install libbenchmark-ocaml-dev
で、ベンチマーク用のコードは以下. List, Array, Skew binary random access listでランダムアクセスさせまくりです。
let max_size = 1000000 let rec loop f n ac = if n <= 0 then ac else loop f (n - 1) (f n ac) let normal_list = loop (fun x l -> x::l) max_size [] module RList = SkewBinaryRandomAccessList let skew_binary_rlist = loop (fun x l -> RList.cons x l) max_size RList.empty let normal_array = Array.init max_size (fun i -> i) let _ = Random.self_init () let rand () = Random.int (max_size - 1) let bench_list i = List.nth normal_list i let bench_rlist i = RList.lookup i skew_binary_rlist let bench_array i = normal_array.(i) let _ = let res = Benchmark.latencyN 1000 [ ("normal list ", bench_list, rand ()); ("skew bin rlist", bench_rlist, rand ()); ("normal array ", bench_array, rand ()) ] in Benchmark.tabulate res
で、これをコンパイル.
ocamlfind c -package benchmark -linkpkg -o bench skewBinaryRandomAccessList.cmo bench.ml
さて、いよいよ実行です。
komamitsu@potato:~/git/ocaml-skew-binary-random-access-list$ ./bench Latencies for 1000 iterations of normal list , skew bin rlist, normal array : normal list : 83 WALL (66.46 usr + 0.22 sys = 66.68 CPU) @ 15.00/s (n=1000) skew bin rlist: 0 WALL ( 0.01 usr + 0.00 sys = 0.01 CPU) @ 125000.00/s (n=1000) (warning: too few iterations for a reliable count) normal array : 0 WALL ( 0.00 usr + 0.00 sys = 0.00 CPU) (warning: too few iterations for a reliable count) Rate normal list skew bin rlist normal array normal list 15.0/s -- -100% -100% skew bin rlist 125000/s 833352% -- -100% normal array 999999999999999872/s 6667616699999998976% 799999999999556% --
やはり、Array > SkewBinaryRandomAccessList > List という性能差がはっきり出ているなぁ。
Listを除いて、さらに負荷をかけてみる(n = 3000000)と…
Latencies for 1000000 iterations of skew bin rlist, normal array : skew bin rlist: 10 WALL ( 9.57 usr + 0.04 sys = 9.61 CPU) @ 104030.12/s (n=1000000) normal array : 0 WALL ( 0.02 usr + 0.00 sys = 0.02 CPU) @ 49997500.12/s (n=1000000) (warning: too few iterations for a reliable count) Rate skew bin rlist normal array skew bin rlist 104030/s -- -100% normal array 49997500/s 47961% --
n = 3000000 なので、Array = O(1), SkewBinaryRandomAccessList = O(min(i, log(n)), List = O(n) の最悪の場合の計算量をざっくり見積もると、それぞれ 1, 15, 3000000位。
う〜ん、それよりも実測値のほうが差が大きい気がするなぁ。
まぁ、やっぱりArrayは速いし、SkewBinaryRandomAccessListもそこそこ速い。
Skew Binary Random-Access Lists
Linuxの環境整備に熱中していて、すっかり忘れていたけれど、http://www.cs.cmu.edu/~rwh/theses/okasaki.pdf の 6.4.1章について。
これは何かというと、Skew Binary Numbers - komamitsu.logとBinary Random-Access List - komamitsu.logを合わせたランダムアクセス向けリストです。
もの凄くあっさりと言うと以下のような特徴を持っています。
- 二分木の要素数(Node含む)がSkew Binary Numberとなる
- 繰り上げ・繰り下げの際の挙動もSkew Binary Numberと同じとなる
- ランダムアクセス時の計算量はO(min(i, log n))でBinary Random-Access Listより少し性能が良いとのこと
内部的な構造について例をあげると…
- [100; 101] => [(1, Leaf 101); (1, Leaf 100)]
- [100; 101; 102] => [(3, Node (102, Leaf 101, Leaf 100))]
- [100; 101; 102; 103] => [(1, Leaf 103); (3, Node (102, Leaf 101, Leaf 100))]
- [100; 101; 102; 103; 104; 105] => [(3, Node (105, Leaf 104, Leaf 103)); (3, Node (102, Leaf 101, Leaf 100))]
というように、要素を保持していきます(当然、詳細は上記のPDF参照)。
で、いつもの如くこれを実装してみました => http://github.com/komamitsu/ocaml-skew-binary-random-access-list
どうも元の論文(79P: line 23 & 29)のStandardMLのコードが微妙らしく、移植の際には以下のように変更しています。
if i < w div 2 then lookupTree (w div 2, t1, i - 1) ↓ if i - 1 < w div 2 then lookupTree (w div 2, t1, i - 1)
で、これの使い方の例はこんな感じです.
open Printf module M = SkewBinaryRandomAccessList let p s i = printf "%s : %d\n" s i let _ = let l = [100; 101; 102; 103; 104; 105; 106; 107; 108; 109] in let rl = List.fold_left (fun rl x -> M.cons x rl) M.empty l in ignore (p "the head of rl" (M.head rl)); ignore (p "the 2nd of rl" (M.lookup (2 - 1) rl)); ignore (p "the 5th of rl" (M.lookup (5 - 1) rl)); ignore (p "the 10th of rl" (M.lookup (10 - 1) rl)); let the_tail_of_rl = M.tail rl in ignore (p "the 2nd of rl via tail" (M.head the_tail_of_rl)); let rl_replaced_the_1st_with_99 = M.update (1 - 1) 99 rl in ignore (p "the head of updated rl" (M.head rl_replaced_the_1st_with_99)); let rl_replaced_the_9th_with_999 = M.update (9 - 1) 999 rl in ignore (p "the 9th of updated rl" (M.lookup (9 - 1) rl_replaced_the_9th_with_999))
これを実行してみると
komamitsu@potato:~/git/ocaml-skew-binary-random-access-list$ ocamlc -o sample skewBinaryRandomAccessList.cmo sample.ml komamitsu@potato:~/git/ocaml-skew-binary-random-access-list$ ./sample the head of rl : 109 the 2nd of rl : 108 the 5th of rl : 105 the 10th of rl : 100 the 2nd of rl via tail : 108 the head of updated rl : 99 the 9th of updated rl : 999
と、きちんと動作しているようです。
後日、暇があったらListとかと性能比較をしてみたいなぁ、なんて。