Skip to content

Commit

Permalink
Merge remote-tracking branch 'opt/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
jfeser committed May 9, 2022
2 parents 01b5241 + 1cb4cf3 commit f950d1c
Show file tree
Hide file tree
Showing 34 changed files with 6,471 additions and 34 deletions.
24 changes: 0 additions & 24 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,31 +1,7 @@
.ipynb_checkpoints/

*.annot
*.cmo
*.cma
*.cmi
*.a
*.o
*.cmx
*.cmxs
*.cmxa

# ocamlbuild working directory
_build/

# ocamlbuild targets
*.byte
*.native

# oasis generated files
setup.data
setup.log

# Merlin configuring file for Vim and Emacs
.merlin

*.install

config-*sh
config.ini
_coverage/
Expand Down
2 changes: 1 addition & 1 deletion .ocamlformat
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
margin = 85
break-cases=fit
profile=conventional
profile=conventional
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2018 John K. Feser
Copyright 2022 John K. Feser

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

Expand Down
29 changes: 26 additions & 3 deletions bin/dune
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,29 @@
(pps ppx_sexp_conv ppx_let))
(modules combine))

; Local Variables:
; mode: tuareg-dune
; End:
(executable
(name opt)
(public_name opt.exe)
(libraries core core_unix.command_unix castor castor_opt logs logs.fmt fmt
fmt.tty)
(preprocess
(pps ppx_sexp_conv ppx_let ppx_sexp_conv ppx_compare ppx_hash))
(modules opt))

(executable
(name xform)
(public_name xform.exe)
(libraries core core_unix.command_unix castor castor_opt logs logs.fmt fmt
fmt.tty)
(preprocess
(pps ppx_sexp_conv ppx_let ppx_sexp_conv ppx_compare ppx_hash))
(modules xform))

(executable
(name sql)
(public_name sql.exe)
(libraries core core_unix.command_unix castor castor_opt logs logs.fmt fmt
fmt.tty)
(preprocess
(pps ppx_sexp_conv ppx_let ppx_sexp_conv ppx_compare ppx_hash))
(modules sql))
117 changes: 117 additions & 0 deletions bin/explore.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
open Core
open Castor

module Node = struct
module T = struct
type t = Abslayout.t * int [@@deriving compare, hash, sexp_of]
end

include T
include Comparator.Make (T)
end

module Edge = struct
module T = struct
type t = Ok of Node.t * Node.t * string | Err of Node.t * string
[@@deriving compare, hash, sexp_of]
end

include T
include Comparator.Make (T)
end

let choose ls = List.nth_exn ls (Random.int (List.length ls))
let choose_set ls = Set.nth ls (Random.int (Set.length ls))

let main ~params ~db ch =
let params =
List.map params ~f:(fun (n, t) -> Name.create ~type_:t n)
|> Set.of_list (module Name.Compare_no_type)
in
let module Config = struct
let conn = Db.create db
let params = params
let check_transforms = true
end in
let module A = Abslayout_db.Make (Config) in
let module T = Transform.Make (Config) (A) () in
let query_str = In_channel.input_all ch in
let query = Abslayout.of_string_exn query_str |> A.resolve ~params in
let explore ?(max_nodes = 10000) query =
let tfs =
List.filter_map T.transforms ~f:(fun (_, tf) ->
try Some (tf []) with _ -> None)
in
let edges = ref (Set.empty (module Edge)) in
let nodes = ref (Set.singleton (module Node) (query, 0)) in
let rec loop () =
if Set.length !nodes > max_nodes then ()
else
match choose_set !nodes with
| Some ((query, _) as n) ->
let tf = choose tfs in
(try
match T.run tf query with
| [] -> ()
| ls ->
let n' = (choose ls, Set.length !nodes) in
edges := Set.add !edges (Ok (n, n', tf.name));
nodes := Set.add !nodes n'
with _ -> edges := Set.add !edges (Err (n, tf.name)));
loop ()
| None -> ()
in
loop ();
printf "digraph {";
Set.to_sequence !edges
|> Sequence.iter ~f:(function
| Edge.Err ((_, i), name) ->
printf "%d -> err [label=\"%s\"];\n" i name
| Ok ((_, i1), (_, i2), name) ->
printf "%d -> %d [label=\"%s\"];\n" i1 i2 name);
printf "}"
in
explore query

let reporter ppf =
let report _ level ~over k msgf =
let k _ =
over ();
k ()
in
let with_time h _ k ppf fmt =
let time = Core.Time.now () in
Format.kfprintf k ppf
("%a [%s] @[" ^^ fmt ^^ "@]@.")
Logs.pp_header (level, h) (Core.Time.to_string time)
in
msgf @@ fun ?header ?tags fmt -> with_time header tags k ppf fmt
in
{ Logs.report }

let () =
Logs.set_reporter (reporter Format.err_formatter);
let open Command in
let open Let_syntax in
Logs.info (fun m ->
m "%s" (Sys.argv |> Array.to_list |> String.concat ~sep:" "));
basic ~summary:"Compile a query."
(let%map_open verbose =
flag "verbose" ~aliases:[ "v" ] no_arg ~doc:"increase verbosity"
and quiet = flag "quiet" ~aliases:[ "q" ] no_arg ~doc:"decrease verbosity"
and db =
flag "db" (required string) ~doc:"CONNINFO the database to connect to"
and params =
flag "param" ~aliases:[ "p" ] (listed Util.param)
~doc:"NAME:TYPE query parameters"
and ch =
anon (maybe_with_default In_channel.stdin ("query" %: Util.channel))
in
fun () ->
if verbose then Logs.set_level (Some Logs.Debug)
else if quiet then Logs.set_level (Some Logs.Error)
else Logs.set_level (Some Logs.Info);
Logs.info (fun m ->
m "%s" (Sys.argv |> Array.to_list |> String.concat ~sep:" "));
main ~params ~db ch)
|> run
170 changes: 170 additions & 0 deletions bin/opt.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
open! Core
open Castor
open Collections
open Castor_opt
open Abslayout_load
module A = Abslayout

let dump fn r =
Out_channel.with_file fn ~f:(fun ch ->
Fmt.pf (Format.formatter_of_out_channel ch) "%a" Abslayout.pp r)

(** Run a command and return its output on stdout, logging it if it fails. *)
let command_out cmd =
let open Or_error.Let_syntax in
let ch = Core_unix.open_process_in cmd in
let out = In_channel.input_all ch in
let%map () =
Core_unix.Exit_or_signal.or_error (Core_unix.close_process_in ch)
|> Or_error.tag ~tag:cmd
in
out

let system_exn cmd =
match Core_unix.system cmd with
| Ok () -> ()
| Error (`Exit_non_zero code) ->
failwith @@ sprintf "Command '%s' exited with code %d" cmd code
| Error (`Signal signal) ->
failwith
@@ sprintf "Command '%s' terminated by signal %s" cmd
(Signal.to_string signal)

let opt conn cost_conn params cost_timeout state query =
let module Config = struct
let conn = conn
let cost_conn = cost_conn
let params = params
let cost_timeout = cost_timeout
let random = state
end in
let module T = Transform.Make (Config) in
match Transform.optimize (module Config) query with
| First opt_query ->
if is_ok @@ T.is_serializable opt_query then Some opt_query
else (
Logs.warn (fun m -> m "Not serializable:@ %a" A.pp opt_query);
None)
| Second failed_subquery ->
Logs.warn (fun m ->
m "Optimization failed for subquery:@ %a" A.pp failed_subquery);
None

let eval dir params query =
let open Result.Let_syntax in
Logs.info (fun m -> m "Evaluating:@ %a" A.pp query);

(* Set up the output directory. *)
system_exn @@ sprintf "rm -rf %s" dir;
system_exn @@ sprintf "mkdir -p %s" dir;
let query_fn = sprintf "%s/query.txt" dir in
dump query_fn query;

(* Try to build the query. *)
let%bind () =
let compile_cmd =
let params =
List.map params ~f:(fun (n, t, _) ->
Fmt.str "-p %s:%a" n Prim_type.pp t)
|> String.concat ~sep:" "
in
sprintf
"$CASTOR_ROOT/../_build/default/castor/bin/compile.exe -o %s %s %s > \
%s/compile.log 2>&1"
dir params query_fn dir
in
let%map out = command_out compile_cmd in
Logs.info (fun m -> m "Compile output: %s" out)
in

(* Try to run the query. *)
let%map run_time =
let run_cmd =
let params =
List.map params ~f:(fun (_, _, v) -> sprintf "'%s'" @@ Value.to_param v)
|> String.concat ~sep:" "
in
sprintf "%s/scanner.exe -t 1 %s/data.bin %s" dir dir params
in
let%map out = command_out run_cmd in
let time, _ = String.lsplit2_exn ~on:' ' out in
String.rstrip ~drop:Char.is_alpha time |> Float.of_string
in

run_time

let trial_dir = sprintf "%s-trial"

let copy_out out_file out_dir query =
dump out_file query;
system_exn @@ sprintf "rm -rf %s" out_dir;
system_exn @@ sprintf "mv -f %s %s" (trial_dir out_dir) out_dir

let main ~params ~cost_timeout ~timeout ~out_dir ~out_file ch =
Random.init 0;

let conn = Db.create (Sys.getenv_exn "CASTOR_OPT_DB") in
let cost_conn = conn in
let params_set =
List.map params ~f:(fun (n, t, _) -> Name.create ~type_:t n)
|> Set.of_list (module Name)
in
let query =
load_string_exn ~params:params_set conn @@ In_channel.input_all ch
in

let best_cost = ref Float.infinity in
let cost state =
Fresh.reset Global.fresh;
match opt conn cost_conn params_set cost_timeout state query with
| Some query' -> (
match eval (trial_dir out_dir) params query' with
| Ok cost ->
if Float.(cost < !best_cost) then (
copy_out out_file out_dir query';
best_cost := cost);
cost
| Error err ->
Logs.warn (fun m -> m "Evaluation failed: %a" Error.pp err);
Float.infinity)
| None -> Float.infinity
in

let cost = Memo.of_comparable (module Mcmc.Random_choice.C) cost in
let max_time = Option.map ~f:Time.Span.of_sec timeout in

try Mcmc.run ?max_time cost |> ignore
with Resolve.Resolve_error r -> Fmt.epr "%a@." (Resolve.pp_err Fmt.nop) r

let spec =
let open Command.Let_syntax in
[%map_open
let () = Log.param
and () = Ops.param
and () = Db.param
and () = Type_cost.param
and () = Join_opt.param
and () = Groupby_tactics.param
and () = Type.param
and () = Simplify_tactic.param
and cost_timeout =
flag "cost-timeout" (optional float)
~doc:"SEC time to run cost estimation"
and timeout =
flag "timeout" (optional float) ~doc:"SEC time to run optimizer"
and params =
flag "param" ~aliases:[ "p" ]
(listed Util.param_and_value)
~doc:"NAME:TYPE query parameters"
and out_dir =
flag "out-dir" (required string) ~aliases:[ "o" ]
~doc:"DIR output directory"
and out_file =
flag "out-file" (required string) ~aliases:[ "f" ]
~doc:"FILE output directory"
and ch =
anon (maybe_with_default In_channel.stdin ("query" %: Util.channel))
in
fun () -> main ~params ~cost_timeout ~timeout ~out_dir ~out_file ch]

let () = Command.basic spec ~summary:"Optimize a query." |> Command_unix.run
Loading

0 comments on commit f950d1c

Please sign in to comment.