-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbackend.ml
558 lines (496 loc) · 17.5 KB
/
backend.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
open! Import
type kind = Perf | Regr | Error_regr | Bench
module type Data_tables = sig
type t
val kind : kind
val create_initial : Merlin.t -> t
val init_cache : t -> bool
val update_analysis_data :
id:int ->
responses:Merlin.Response.t list ->
cmd:Merlin.Cmd.t ->
file:File.t ->
loc:Location.t ->
query_type:Merlin.Query_type.t ->
t ->
unit
val persist_logs : log:Logs.t -> t -> unit
val dump : dump_dir:Fpath.t -> t -> unit
val all_files : unit -> Fpath.t list
val wrap_up :
t -> dump_dir:Fpath.t -> proj_paths:Fpath.t list -> merlin:Merlin.t -> unit
end
module Field = struct
(* A custom augmentation of [Fieldslib.Field] for [Data_tables.t] *)
include Fieldslib.Field
let to_filename field =
let field_name = Fieldslib.Field.name field in
Fpath.(add_ext ".json" @@ v field_name)
let dump_single pp dump_dir tables field =
let table_content = Fieldslib.Field.get field tables in
let file_name = to_filename field in
let file_path = Fpath.(to_string @@ append dump_dir file_name) in
let oc = open_out file_path in
Fun.protect
~finally:(fun () -> close_out_noerr oc)
(fun () ->
let ppf = Format.formatter_of_out_channel oc in
pp ppf table_content)
let dump pp dump_dir tables field =
let write_json_lines ~pp ~ppf l =
Format.pp_print_list ~pp_sep:Format.pp_print_newline pp ppf l
in
let table_content = Fieldslib.Field.get field tables in
let file_name = to_filename field in
let file_path = Fpath.(to_string @@ append dump_dir file_name) in
let oc = open_out file_path in
Fun.protect
~finally:(fun () -> close_out_noerr oc)
(fun () ->
let ppf = Format.formatter_of_out_channel oc in
write_json_lines ~pp ~ppf table_content)
let dump_opt pp dump_dir tables field =
let write_json_lines ~pp ~ppf l =
Format.pp_print_list ~pp_sep:Format.pp_print_newline pp ppf l
in
match Fieldslib.Field.get field tables with
| Some content ->
let file_name = to_filename field in
let file_path = Fpath.(to_string @@ append dump_dir file_name) in
let oc = open_out file_path in
Fun.protect
~finally:(fun () -> close_out_noerr oc)
(fun () ->
let ppf = Format.formatter_of_out_channel oc in
write_json_lines ~pp ~ppf content)
| None -> ()
end
module P = struct
type t = {
sample_id : int;
timings : int list;
max_timing : int;
file : File.t;
query_type : Merlin.Query_type.t;
loc : Location.t;
}
[@@deriving yojson_of]
(* FIXME: print each of the sample repeats in a separate json field *)
let pp ppf data =
Format.fprintf ppf "%s%!" (Yojson.Safe.to_string (yojson_of_t data))
end
module Query_response = struct
type t = {
sample_id : int;
cmd : Merlin.Cmd.t;
responses : Merlin.Response.t list;
}
[@@deriving yojson_of]
(* FIXME: print the sample repeats in a separate json field *)
let pp ppf data =
Format.fprintf ppf "%s%!" (Yojson.Safe.to_string (yojson_of_t data))
end
module Command = struct
type t = { sample_id : int; cmd : Merlin.Cmd.t } [@@deriving yojson_of]
let pp ppf data =
Format.fprintf ppf "%s%!" (Yojson.Safe.to_string (yojson_of_t data))
end
module Benchmark_metric = struct
type t = { name : string; mutable value : int list; units : string }
[@@deriving yojson_of]
end
module StringMap = Map.Make (String)
module Benchmark_result = struct
type t = { name : string; mutable metrics : Benchmark_metric.t StringMap.t }
let update (result : t) (metric : Benchmark_metric.t) =
let f x =
match x with
| Some (me : Benchmark_metric.t) ->
Some { me with value = List.append metric.value me.value }
| None -> Some metric
in
{ result with metrics = StringMap.update metric.name f result.metrics }
let create name (metric : Benchmark_metric.t) =
{ name; metrics = StringMap.add metric.name metric StringMap.empty }
(* TODO: Figure out a way to remove intermediate type *)
type t1 = { name : string; metrics : Benchmark_metric.t list }
[@@deriving yojson_of]
let convert ({ name; metrics } : t) =
{ name; metrics = StringMap.bindings metrics |> List.map snd }
end
module Benchmark_summary = struct
type t = { mutable results : Benchmark_result.t StringMap.t }
(* TODO: Figure out a way to remove intermediate type *)
type t1 = { results : Benchmark_result.t1 list } [@@deriving yojson_of]
let pp ppf data =
let convert ({ results } : t) =
{
results =
StringMap.bindings results |> List.map snd
|> List.map Benchmark_result.convert;
}
in
Format.fprintf ppf "%s%!"
(Yojson.Safe.to_string (yojson_of_t1 (convert data)))
end
module Distilled_data = struct
type t = {
sample_id : int;
cmd : Merlin.Cmd.t;
return : Merlin.Response.return_class option;
query_num : int option;
}
[@@deriving yojson_of]
(* FIXME: print the sample repeats in a separate json field *)
let pp ppf data =
Format.fprintf ppf "%s%!" (Yojson.Safe.to_string (yojson_of_t data))
end
(* module Files = struct
type kind = Ml | Mli
type t = {
file_id : string (* could be the hash of the content *)
name : string option (* None, when using [--sanitize] *)
len : int;
functor_depth : int;
num_first_class_modules : int;
cmi_deps : t list; (* note: probably not a good idea to have t itself... *)
cmt_deps : t list; (* note: probably not a good idea to have t itself... *)
commit_sha : string option;
}
let get_commit_sha ~proj_path =
let cmd = "git rev-parse HEAD" in
try
let cwd = Unix.getcwd () in
Unix.chdir @@ Fpath.to_string proj_path;
let ic = Unix.open_process_in cmd in
Unix.chdir cwd;
match input_line ic with
| sha -> Ok sha
| exception exc ->
let err =
Logs.Warning
(Format.sprintf
"Warning: something went wrong trying to get the commit sha \
of the source code project: %s"
(Printexc.to_string exc))
in
Error err
with exc ->
let err =
Logs.Warning
(Format.sprintf
"Warning: something went wrong trying to get the commit sha of \
the source code project: %s"
(Printexc.to_string exc))
in
Error err
end *)
module Performance = struct
(* TODO: add more data/restructure data, such as:
- add a files "table" containing things like
- the length of the file (size of AST)
- in-file max functor depth
- number of first-class modules shipped around in the file
- number of cmi/cmt dependencies
- commit sha of that file
- have a finer-grained level than sample_id to allow dedicated fields for the repeats of one query
- possibly: add a query_types "table":
- all query types used in that `merl-an` run
- the AST node type(s) corresponding to that query
- the way the cache is being initialized for that query
- the way the query is run
*)
type t = {
mutable performances : P.t list;
mutable query_responses : Query_response.t list;
mutable commands : Command.t list;
mutable logs : Logs.t list;
merlin : Merlin.t;
}
[@@deriving fields]
let init_cache p = Merlin.is_server p.merlin
let kind = Perf
let dump ~dump_dir t =
let d = dump_dir in
let () =
Fields.iter ~performances:(Field.dump P.pp d t)
~query_responses:(Field.dump Query_response.pp d t)
~commands:(Field.dump Command.pp d t)
~logs:(Field.dump Logs.pp d t)
~merlin:(Field.dump_single Merlin.pp d t)
in
()
let update_analysis_data ~id ~responses ~cmd ~file ~loc ~query_type tables =
let max_timing, timings, responses =
(* FIXME: add json struture to the two lists *)
let rec loop ~max_timing ~responses ~timings = function
| [] -> (max_timing, timings, responses)
| resp :: rest ->
let timing = Merlin.Response.get_timing resp in
let timings = timing :: timings in
let responses = resp :: responses in
let max_timing = Int.max timing max_timing in
loop ~max_timing ~timings ~responses rest
in
loop ~max_timing:Int.min_int ~responses:[] ~timings:[] responses
in
let perf =
{ P.timings; max_timing; file; query_type; sample_id = id; loc }
in
let resp =
(* TODO: make a cli-argument out of this instead of doing this always *)
let responses =
List.map
(fun resp ->
Merlin.Response.(
crop_arbitrary_keys [ "value" ] @@ strip_location @@ resp))
responses
in
{ Query_response.sample_id = id; cmd; responses }
in
let cmd = { Command.sample_id = id; cmd } in
tables.performances <- perf :: tables.performances;
tables.query_responses <- resp :: tables.query_responses;
tables.commands <- cmd :: tables.commands
let persist_logs ~log tables = tables.logs <- log :: tables.logs
let create_initial merlin =
{
performances = [];
query_responses = [];
commands = [];
logs = [];
merlin;
}
module Metadata = struct
type t = {
date : string option;
proj : string list;
total_time : float; (* query_time : float; *)
merlin : Merlin.t;
}
[@@deriving yojson_of]
let file_name = Fpath.v "metadata.json"
let pp ppf data =
Format.fprintf ppf "%s%!" (Yojson.Safe.to_string (yojson_of_t data))
let get_date () =
let epoch = Unix.time () |> Ptime.of_float_s |> Option.get in
let (year, month, day), _ = Ptime.to_date_time epoch in
Printf.sprintf "%i/%i/%i" day month year
let produce_and_dump ~dump_dir ~proj_paths ~merlin =
let metadata =
let total_time = Sys.time () in
let date = Some (get_date ()) in
{
date;
proj = List.map Fpath.to_string proj_paths;
total_time;
merlin;
(* query_time; *)
}
in
let file_path = Fpath.(to_string @@ append dump_dir file_name) in
let oc = open_out file_path in
Fun.protect
~finally:(fun () -> close_out_noerr oc)
(fun () ->
let ppf = Format.formatter_of_out_channel oc in
Format.fprintf ppf "%a" pp metadata)
end
let wrap_up _t ~dump_dir ~proj_paths ~merlin =
(* TODO: check whether there's data left in memory and, if so, dump it *)
Metadata.produce_and_dump ~dump_dir ~proj_paths ~merlin
let all_files () =
let f = Field.to_filename in
Metadata.file_name
:: Fields.to_list ~performances:f ~query_responses:f ~commands:f ~logs:f
~merlin:f
end
type behavior_config = { full : bool; distilled_data : bool }
let behavior config =
let module Behavior = struct
type t = {
mutable full_responses : Query_response.t list option;
mutable distilled_data : Distilled_data.t list option;
mutable commands : Command.t list;
mutable logs : Logs.t list;
}
[@@deriving fields]
let kind = Regr
let dump ~dump_dir t =
let d = dump_dir in
Fields.iter
~full_responses:(Field.dump_opt Query_response.pp d t)
~distilled_data:(Field.dump_opt Distilled_data.pp d t)
~commands:(Field.dump Command.pp d t)
~logs:(Field.dump Logs.pp d t)
let persist_logs ~log tables = tables.logs <- log :: tables.logs
let update_analysis_data ~id ~responses ~cmd ~file:_ ~loc:_ ~query_type:_
tables =
let command = { Command.sample_id = id; cmd } in
tables.commands <- command :: tables.commands;
let () =
match tables.full_responses with
| None -> ()
| Some fr ->
tables.full_responses <-
(let resp =
let responses =
List.map
(fun resp ->
Merlin.Response.(
strip_file
@@ crop_arbitrary_keys
[ "timing"; "cache"; "heap_mbytes" ]
@@ strip_location @@ resp))
responses
in
{ Query_response.sample_id = id; cmd; responses }
in
Some (resp :: fr))
in
match tables.distilled_data with
| None -> ()
| Some rc -> (
match responses with
| [ resp ] -> (
match
( Merlin.Response.get_return_class resp,
Merlin.Response.get_query_num resp )
with
| Ok return, Ok query_num ->
let new_entry =
{
Distilled_data.sample_id = id;
return = Some return;
query_num = Some query_num;
cmd;
}
in
tables.distilled_data <- Some (new_entry :: rc)
| Error log, Ok query_num ->
persist_logs ~log tables;
let new_entry =
{
Distilled_data.sample_id = id;
return = None;
query_num = Some query_num;
cmd;
}
in
tables.distilled_data <- Some (new_entry :: rc)
| Ok return, Error log ->
persist_logs ~log tables;
let new_entry =
{
Distilled_data.sample_id = id;
return = Some return;
query_num = None;
cmd;
}
in
tables.distilled_data <- Some (new_entry :: rc)
| Error log1, Error log2 ->
persist_logs ~log:log1 tables;
persist_logs ~log:log2 tables)
| _ -> (*FIXME*) ())
let create_initial _merlin =
let full_responses = if config.full then Some [] else None in
let distilled_data = if config.distilled_data then Some [] else None in
{ full_responses; distilled_data; commands = []; logs = [] }
let wrap_up _t ~dump_dir:_ ~proj_paths:_ ~merlin:_ =
(* TODO: check whether there's data left in memory and, if so, dump it *)
()
let init_cache _ = false
let all_files () =
let f = Field.to_filename in
Fields.to_list ~full_responses:f ~distilled_data:f ~commands:f ~logs:f
end in
(module Behavior : Data_tables)
module Benchmark = struct
type t = {
mutable bench : Benchmark_summary.t;
mutable query_responses : Query_response.t list;
mutable commands : Command.t list;
mutable logs : Logs.t list;
merlin : Merlin.t;
}
[@@deriving fields]
let kind = Bench
let init_cache b = Merlin.is_server b.merlin
let create_initial merlin =
{
bench = { results = StringMap.empty };
query_responses = [];
commands = [];
logs = [];
merlin;
}
let persist_logs ~log tables = tables.logs <- log :: tables.logs
let all_files () =
let f = Field.to_filename in
Fields.to_list ~bench:f ~query_responses:f ~commands:f ~logs:f ~merlin:f
let dump ~dump_dir t =
let d = dump_dir in
let () =
Fields.iter
~bench:(Field.dump_single Benchmark_summary.pp d t)
~query_responses:(Field.dump Query_response.pp d t)
~commands:(Field.dump Command.pp d t)
~logs:(Field.dump Logs.pp d t)
~merlin:(Field.dump_single Merlin.pp d t)
in
()
let update_analysis_data ~id ~responses ~cmd ~file:_file
~loc:(_loc : Import.location) ~query_type tables =
let _max_timing, timings, responses =
(* FIXME: add json struture to the two lists *)
let rec loop ~max_timing ~responses ~timings = function
| [] -> (max_timing, timings, responses)
| resp :: rest ->
let timing = Merlin.Response.get_timing resp in
let timings = timing :: timings in
let responses = resp :: responses in
let max_timing = Int.max timing max_timing in
loop ~max_timing ~timings ~responses rest
in
loop ~max_timing:Int.min_int ~responses:[] ~timings:[] responses
in
let resp =
(* TODO: make a cli-argument out of this instead of doing this always *)
let responses =
List.map
Merlin.Response.(
fun resp ->
crop_arbitrary_keys [ "value" ] @@ strip_location @@ resp)
responses
in
{ Query_response.sample_id = id; cmd; responses }
in
let cmd = { Command.sample_id = id; cmd } in
let metric =
{
Benchmark_metric.name = Merlin.Query_type.to_string query_type;
value = timings;
units = "ms";
}
in
(* TODO: Pass it instead of hardcoding *)
let cache_workflow = Merlin.Cache_workflow.Buffer_typed in
let upd = function
| Some x -> Some (Benchmark_result.update x metric)
| None ->
Some
(Benchmark_result.create
(Merlin.Cache_workflow.to_string cache_workflow)
metric)
in
let result =
StringMap.update
(Merlin.Cache_workflow.to_string cache_workflow)
upd tables.bench.results
in
tables.bench.results <- result;
tables.query_responses <- resp :: tables.query_responses;
tables.commands <- cmd :: tables.commands
let wrap_up _t ~dump_dir:_ ~proj_paths:_ ~merlin:_ = ()
end