forked from ga4gh-beacon/beacon-v2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbeacon_yaml2md.pl
executable file
·897 lines (701 loc) · 30.6 KB
/
beacon_yaml2md.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
#!/usr/bin/env perl
#
# Script to convert Beacon v2 Models schemas to Markdown tables
#
# Last Modified: May/05/2022
#
# Version 2.0.0
#
# Copyright (C) 2021-2022 Manuel Rueda ([email protected])
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
# If this program helps you in your research, please cite.
use strict;
use warnings;
use autodie;
use feature qw(say);
use Getopt::Long;
use Pod::Usage;
use File::Basename;
use File::Spec::Functions qw(catdir catfile);
my $debug = 0;
my $verbose = 0;
##### Main #####
beacon_yaml2md();
################
exit;
sub beacon_yaml2md {
# Defining a few variables
my $version = '2.0.0';
# https://github.com/ga4gh-beacon/beacon-v2-Models/tree/main/BEACON-V2-draft3-Model
my $schema_dir = './deref_schemas';
my $markdown_dir = '../docs/schemas-md';
my $obj = 'obj';
# Reading arguments
GetOptions(
'schema-dir=s' => \$schema_dir, # string
'markdown-dir=s' => \$markdown_dir, # string
'delete|D' => \my $delete, # flag
'help|?' => \my $help, # flag
'man' => \my $man, # flag
'debug=i' => \$debug, # integer
'verbose' => \$verbose, # flag
'version|v' => sub { say "$0 Version $version"; exit; }
) or pod2usage(2);
pod2usage(1) if $help;
pod2usage( -verbose => 2, -exitval => 0 ) if $man;
pod2usage(
-message => "Please specify a valid directory for --schema-dir\n",
-exitval => 1
) if ( !-d $schema_dir );
pod2usage(
-message => "Please specify a valid directory for --markdown-dir\n",
-exitval => 1
) if ( !-d $markdown_dir );
# Before anything, $delete will delete previous yaml from nested properties
my $old_yaml = catfile( $schema_dir, "$obj/*.yaml" );
say "*** Erasing $old_yaml ***" and unlink glob "$old_yaml" if $delete;
# Define a few more variables...
my @schemas =
qw (analyses biosamples cohorts datasets genomicVariations individuals runs);
#qw (genomicVariations);
my $schema_obj_dir = catdir( $schema_dir, $obj );
my $markdown_obj_dir = catdir( $markdown_dir, $obj );
my $raa_header = [qw(Term Description Type Properties Example Enum)];
# Load a hash with schema location
my %schema = ();
for my $schema (@schemas) {
$schema{$schema}{yaml} =
catfile( $schema_dir, $schema, 'defaultSchema.yaml' );
$schema{$schema}{md} =
catfile( $markdown_dir,
$schema . '_' . basename( $schema{$schema}{yaml} ) );
$schema{$schema}{md} =~ s/\.yaml/.md/;
}
# Load a variable that needs $markdown_dir
my $beacon_terms = catfile( $markdown_dir, 'beacon_terms.md' );
#########################
# START YAML CONVERSION #
#########################
my $yaml = YAML2MD->new(
{
yaml_obj => $schema_obj_dir,
md_obj => $markdown_obj_dir,
b_terms => $beacon_terms,
header => $raa_header,
obj => $obj
}
);
#for my $schema (qw(analyses)) {
for my $schema (@schemas) {
say "====================================";
say "Transforming schema => $schema";
say "IN $schema{$schema}{yaml}";
say "OUT $schema{$schema}{md}";
$yaml->yaml2md(
{
schema => $schema,
yaml => $schema{$schema}{yaml},
#yaml => './defaultSchema.yaml',
md => $schema{$schema}{md},
}
);
}
$yaml->yaml2md_obj();
#######################
# END YAML CONVERSION #
#######################
}
package YAML2MD;
use strict;
use warnings;
use autodie;
use feature qw(say);
use YAML::XS qw(LoadFile DumpFile);
use JSON::XS;
use File::Basename;
use List::MoreUtils qw/any/;
use Path::Tiny;
use Data::Dumper;
use File::Spec::Functions qw(catdir catfile);
use Mojo::JSON::Pointer;
$Data::Dumper::Sortkeys = 1;
$YAML::XS::QuoteNumericStrings = 0;
sub new {
my ( $class, $arg ) = @_;
my $self = {
yaml_obj => $arg->{yaml_obj},
md_obj => $arg->{md_obj},
b_terms => $arg->{b_terms},
header => $arg->{header},
obj => $arg->{obj}
};
bless $self, $class;
return $self;
}
sub yaml2md {
my $self = shift;
my $var = shift;
my $yo_dir = $self->{yaml_obj};
my $raa_header = $self->{header};
my $obj = $self->{obj};
my $schema = $var->{schema};
my $y_file = $var->{yaml};
my $m_file = $var->{md};
# First we load the main YAML schema
my $data = load_yaml($y_file);
# Then we slice it at the properties-level and create YAMLs for each property
my ( $yaml_properties, $ra_properties ) =
yaml_slicer( $data, $schema, $yo_dir );
say 'All properties => ', join ',', @$ra_properties;
###############################
# Processing main YAML schema #
###############################
my $header = table_header($raa_header);
my $out_str = $header;
$out_str .=
table_content( $yaml_properties, $ra_properties, $raa_header, $obj, 1 );
$out_str .= add_tabs_with_examples($schema);
write_file( $m_file, $out_str );
return 1;
}
sub yaml2md_obj {
my $self = shift;
my $yo_dir = $self->{yaml_obj};
my $mo_dir = $self->{md_obj};
my $beacon_terms = $self->{b_terms};
my $raa_header = $self->{header};
my $obj = $self->{obj};
######################################
# Processing properties YAML schemas #
######################################
my $header = table_header($raa_header);
my @yamls = ();
# Ad hoc solution to force creation of <duoDataUse.yaml> (it gives problems with <allOf>)
# + toolName.yaml, toolReferences (they are too deeply nested)
for my $yaml ( 'duoDataUse.yaml', 'toolName.yaml', 'toolReferences.yaml' ) {
write_file( catfile( $yo_dir, $yaml ), create_str_yaml($yaml) );
}
# Now process all obj
foreach my $yaml ( glob("$yo_dir/*.yaml") ) {
say "*** Processing $yaml file ***" if $debug;
my $data = load_yaml($yaml);
my $nested_properties = $data;
my $nested_ra_properties = [ keys %$data ];
my $out_str = $header;
$out_str .= table_content( $nested_properties, $nested_ra_properties,
$raa_header, $obj, 0 );
# We parse $yaml to get paths and more...
my ( $base, $dir, $ext ) = fileparse( $yaml, '.yaml' );
$ext =~ s/\.yaml/.md/;
my $file = catfile( $mo_dir, $base . $ext ); # Note -> $base.$ext
write_file( $file, $out_str );
# Finally we store values, to be used later (beacon_terms.md)
push @yamls, $base, "./$obj/$base" . $ext;
}
###############################
# Creating beacon_terms.md #
###############################
my %yaml = @yamls;
say "\n*** Creating file => $beacon_terms ***";
my $b_terms_tmp_str = '';
# Sort naturally without resorting to Sort::Naturally
for my $key ( sort { "\L$a" cmp "\L$b" } keys %yaml ) {
$b_terms_tmp_str .= "* [$key]($yaml{$key})\n";
}
# Writing file
write_file( $beacon_terms, $b_terms_tmp_str );
return 1;
}
sub yaml_slicer {
my ( $data, $schema, $yo_dir ) = @_;
my $yaml_properties = $data->{properties};
my $ra_properties = [ keys %$yaml_properties ];
print "YAML Properties\n", Dumper $yaml_properties if $debug;
print "All Properties\n", Dumper $ra_properties if $debug;
###########################
# Object properties => 1D #
###########################
# Instead of working directly with recursive hash slices, we create
# one YAML file for each property and then re-use code from the 'main' schema
##########################################
# **** Note about VRS / PHX adoption *** #
##########################################
# The adoption of those standards had technical implications. The script expects objects to have
# <key> for the object and then <properties>. VRS/PHX follow JSON schemas that include /oneOf allOf anyOf/
# plus other complex intructions such as <if:> <else:>.
# This becomes a real challenge with $ref as, for instance, in <g_v.variation> we can not find the key for
# 'MolecularVariation', 'SystemicVariation', 'LegacyVariation'
# TL;DR; WE ARE INTRODUCING A FEW AD HOC CHANGES BELOW....
for my $property (@$ra_properties) {
#next unless lc($property) eq 'variation';
$yaml_properties->{$property} =
parse_json_keywords( $property, $yaml_properties->{$property} );
my $yaml_properties_1d = $yaml_properties->{$property};
dump_yaml( $property, $yaml_properties_1d, $yo_dir );
############################################
# The objects can come from two sources: #
# #
# 1 - Object itself #
# type: object #
# properties: {foo: John, bar: Doe} #
# #
# 2 - Array of objects #
# type: array #
# items: { #
# properties: {foo: John, bar: Doe} #
# } #
############################################
my $data_type = check_data_type($yaml_properties_1d);
#################################
# Object|Array Properties => 2D #
#################################
my $yaml_properties_2d =
$data_type eq 'array'
? $yaml_properties_1d->{items}{properties}
: $yaml_properties_1d->{properties};
for my $property ( keys %$yaml_properties_2d ) {
$yaml_properties_2d->{$property} = parse_json_keywords( $property,
$yaml_properties_2d->{$property} );
dump_yaml( $property, $yaml_properties_2d->{$property}, $yo_dir );
#################################
# Object|Array Properties => 3D #
#################################
my $data_type = check_data_type( $yaml_properties_2d->{$property} );
my $yaml_properties_3d =
$data_type eq 'array'
? $yaml_properties_2d->{$property}{items}{properties}
: $yaml_properties_2d->{$property}{properties};
for my $property ( keys %$yaml_properties_3d ) {
$yaml_properties_3d->{$property} =
parse_json_keywords( $property,
$yaml_properties_3d->{$property} );
# Can't bo beyond 3D, using subroutine to add links
$yaml_properties_3d->{$property} =
add_properties_vrs( $property,
$yaml_properties_3d->{$property} );
dump_yaml( $property, $yaml_properties_3d->{$property},
$yo_dir );
}
}
}
return ( $yaml_properties, $ra_properties );
}
sub table_content {
my ( $yaml_properties, $ra_properties, $headers, $obj, $link ) = @_;
my @lc_headers = map { lc } @$headers; # Copy array uc to avoid modifying original $ref
my $out_str = '';
#---------------------------------------------------------|
# Term (Header 1) | Header 2 | Header 3 | .... | Header N |
#---------------------------------------------------------|
# Property | Value 2 | Value 3 | .... | Value N |
#---------------------------------------------------------|
# Property = identifiers
for my $property ( sort @$ra_properties ) {
# @refs will be needed below
my @refs = ();
# object ={description,examples,etc...}
for my $object ( $yaml_properties->{$property} ) {
# $object has to be defined as an object in the schema, otherwise we skip it
next unless ref $object eq 'HASH';
# Now we process all values (matching header terms)
for my $header (@lc_headers) {
# Skipping term (not needed)
next if $header eq 'term';
# General assignation
my $value_header = $object->{$header};
##################################################
# Change assignation depending of content/header #
##################################################
# Check presence of plural 'examples'
$value_header =
exists $object->{examples}
? $object->{examples}
: $object->{$header}
if $header eq 'example';
# Slice differentely if $object->{type} eq 'array'
if ($object->{type} eq 'array' ) {
for ('description', 'properties'){
$value_header = $object->{items}{$_} if $header eq $_;
}
}
# Now convert data structure to string
say "#### $property => $header ####" if $debug;
push @refs, ref2str( $property, $value_header, $obj, $link );
}
}
# Add links to cell values when needed
my $property_path =
$link ? "[$property](./$obj/$property.md)" : $property;
# Accumlate all rows in one big string
$out_str .= '| ' . ( join ' | ', $property_path, @refs ) . "|\n";
}
return $out_str;
}
sub table_header {
my $headers = shift;
my $out_str .= '|' . join ' | ', @$headers;
$out_str .= "|\n" . ( ('| ---') x scalar @$headers ) . " |\n";
return $out_str;
}
sub ref2str {
my ( $property, $data, $obj, $link ) = @_;
# We will append everything into a string ($out_str)
my $out_str = '';
# Check data types
# We can have ARRAY, HASH, STRING and undef
if ( ref $data eq 'ARRAY' ) {
# AoH
if ( ref $data->[0] eq 'HASH' ) {
$out_str = '`'
. JSON::XS->new->utf8->space_after->canonical->encode($data)
. '`'; # ->pretty (encode) does not work inside tables
}
# A
elsif ( ref $data->[0] eq 'ARRAY' ) {
$out_str = join ',<br />', map {
'`'
. JSON::XS->new->utf8->space_after->canonical->encode(
$data->[$_] )
. '`'
} ( 0 .. $#{$data} );
}
# string or undef
else {
$out_str = defined $data->[0] ? join ', ', @$data : 'NA'; # Note ', ' to allow HTML column rendering
}
}
elsif ( ref $data eq 'HASH' ) {
my $tmp_str = $link ? "./$obj" : '.';
$out_str = join ', ',
map { add_external_links( $tmp_str, $_ ) } sort keys %$data;
$out_str =~ s/(HASH|ARRAY)\(\w+\)//g;
$out_str =~ s/,,/,/g;
$out_str =~ s/,$//;
}
else {
$out_str = defined $data ? $data : 'NA';
}
# Finally remove newlines at string level
$out_str =~ s/\R//g;
return $out_str;
}
sub add_external_links {
my ( $tmp_str, $key ) = @_;
# Note: This is an ad hoc solution to fix errors with deeply-nested data
my @phx = qw( typedQuantities days weeks Quantity high low);
my @vrs = qw(_id state type CURIE Location);
my @framework = ("ontologyTerm");
return ( any { ( $_ eq $key ) } @phx )
? "[$key](https://phenopacket-schema.readthedocs.io/en/latest/building-blocks.html)"
: ( any { ( $_ eq $key ) } @vrs )
? "[$key](https://vrs.ga4gh.org/en/stable/terms_and_model.html#$key)"
: ( any { ( $_ eq $key ) } @framework )
? "[$key](https://github.com/ga4gh-beacon/beacon-v2/blob/main/framework/src/common/$key.yaml)"
: "[$key]($tmp_str/$key.md)";
}
sub load_yaml {
my $s_file = shift;
my $data = LoadFile($s_file);
return $data;
}
sub dump_yaml {
my ( $property, $data, $dir ) = @_;
########################
# IMPORTANT STEP HERE #
########################
# The slice contains the property values but it does not contain
# the property name, thus we add it here as the first dimension
my $yaml = { $property => $data };
######################
# END IMPORTANT STEP #
######################
# Now we simply print the file as YAML
my $file = catfile( $dir, "$property.yaml" );
DumpFile( $file, $yaml );
return 1;
}
sub write_file {
my ( $file, $str ) = @_;
path($file)->spew_utf8($str);
return 1;
}
sub check_data_type {
my $data = shift;
return $data->{type} // 'string';
}
sub create_str_yaml {
# This is an ad hoc solution :-(
# Changes in the repo won't be reflected here
my $file = shift;
my $str_duoDataUse = <<EOF;
---
duoDataUse:
description: duoDataUse
type: array
examples:
- id: DUO:0000007
label: disease specific research
version: 17-07-2016
items:
description: Definition of an ontology term.
properties:
id:
description: A CURIE identifier for an ontology term.
examples:
- ga4gh:GA.01234abcde
- DUO:0000004
- orcid:0000-0003-3463-0775
- PMID:15254584
type: string
label:
description: 'The text that describes the term. By default it could be the preferred text of the term, but is it acceptable to customize it for a clearer description and understanding of the term in an specific context.'
type: string
modifiers:
type: array
version:
type: string
EOF
my $str_toolName = <<EOF;
---
toolName:
description: Name of the tool.
example: Ensembl Variant Effect Predictor (VEP)
type: string
EOF
my $str_toolreferences = <<EOF;
---
toolReferences:
description: References to the tool
examples:
- bio.toolsId: https://bio.tools/vep
- url: http://www.ensembl.org/vep
type: object
EOF
## ontologyTerm.yaml is needed due to a bug with jsonref2json.js that overrided "parent" <description> field
my $str_ontologyTerm = <<EOF;
---
additionalProperties: true
description: Definition of an ontology term.
properties:
id:
description: 'A CURIE identifier, e.g. as `id` for an ontology term.'
examples:
- ga4gh:GA.01234abcde
- DUO:0000004
- orcid:0000-0003-3463-0775
- PMID:15254584
pattern: '^\\w[^:]+:.+\$'
type: string
label:
description: 'The text that describes the term. By default it could be the preferred text of the term, but is it acceptable to customize it for a clearer description and understanding of the term in an specific context.'
type: string
required:
- id
title: Ontology Term
type: object
EOF
my %yaml = (
'duoDataUse.yaml' => $str_duoDataUse,
'toolName.yaml' => $str_toolName,
'toolReferences.yaml' => $str_toolreferences,
'ontologyTerm.yaml' => $str_ontologyTerm
);
return $yaml{$file};
}
sub add_tabs_with_examples {
# This subroutine is quick workaround to embed json links directly in Markdown
# We dound two alternatives for mkdocs
# A - https://pypi.org/project/mkdocs-embed-external-markdown/
# Suboptimal - Only embed .md (not json) and issues with {{}} syntax
# B - https://pypi.org/project/mkdocs-include-markdown-plugin/
# To be tested (Apr-01-2022)
my $schema = shift;
my $dir =
catdir( '../models/json/beacon-v2-default-model', $schema, 'examples' );
my $str = <<EOF;
## Examples
These are examples extracted directly from the [GitHub repository](https://github.com/ga4gh-beacon/beacon-v2-Models).
EOF
#=== "MIN"
#
# ```
# {
# }
# ```
# Will process al JSON files but only retain those having MAX, MIN or MID
# ---- biosample-MID-example.json biosample-MIN-example.json -----
foreach my $file ( reverse sort glob("$dir/*.json") ) {
my $filename = basename($file);
my @tmp = split /\-/, $filename;
my $label = uc( $tmp[1] );
next unless ( $label eq 'MAX' || $label eq 'MID' || $label eq 'MIN' );
$str .= "=== \"$label\"\n";
$str .= "\t```json\n";
open my $fh, "<", $file;
while (<$fh>) {
$str .= "\t$_";
}
close $fh;
$str .= "\n\t```\n\n";
}
return $str;
}
sub parse_json_keywords {
# We'll be using JSON Pointer to facilitate the search
my ( $property, $data ) = @_;
my $pointer = Mojo::JSON::Pointer->new($data);
# Defining some info (for > 1D we should be able to take this info from properties/type/const)
# but we have sub nested $keywords that complicate a lot the task
my $terms = {
'variation' =>
[ 'MolecularVariation', 'SystemicVariation', 'LegacyVariation' ],
'SystemicVariation' => ['CopyNumber'],
'MolecularVariation' => [ 'Allele', 'Haplotype' ],
'location' => [ 'CURIE', 'Location' ],
'state' => [ 'SequenceState', 'SequenceExpression' ],
'Value' => [ 'Quantity', 'ontologyTerm' ]
};
# We'll be checking <oneOf allOf anyOf>
for my $keyword (qw(oneOf allOf anyOf)) {
if ( $pointer->contains("/$keyword") ) {
#say "$property has /$keyword";
my $tmp_hash;
$tmp_hash->{type} = $keyword;
my $count = 0;
for my $elements ( @{ $data->{$keyword} } ) {
#if ($pointer->contains("/$keyword/$property/$count/properties/type/const")) {
# say "Route pointer const /$keyword/$property/$count/properties/type/const";
# my $const = $pointer->get("/$keyword/$property/$count/properties/type/const");
# $tmp_hash->{properties}{$const} = $elements;
#} else{
my $tmp_term = ( $pointer->contains("/$keyword/$count/title") && $pointer->get("/$keyword/$count/title") ne 'Ontology Term' )
? $pointer->get("/$keyword/$count/title")
: @{ $terms->{$property} }[$count];
$tmp_hash->{properties}{$tmp_term} = $elements if $tmp_term; # Ad-hoc some terms appear duplicated and come empty....
#}
$count++;
}
$data = $tmp_hash; # Adding new reference
}
}
return $data;
}
sub add_properties_vrs {
my ( $property, $data ) = @_;
my %url = (
'SequenceExpression' =>
'https://raw.githubusercontent.com/ga4gh/vrs/1.2/schema/vrs.json#/definitions/',
'CopyNumber' =>
'https://raw.githubusercontent.com/ga4gh/vrs/1.2/schema/vrs.json#/definitions/'
);
if ( exists $url{$property} ) {
$data->{properties} =
"[VRS definition for $property]" . '('
. $url{$property}
. $property . ')';
}
return $data;
}
1;
=head1 NAME
beacon_yaml2md: Script to convert Beacon v2 Models schemas to Markdown tables
=head1 SYNOPSIS
beacon_yaml2md.pl
Options:
-schema-dir Directory with YAML schemas
-markdown-dir Directory where to write .md
-D|delete Delete schema-dir/obj/*yaml
-h|help Brief help message
-man Full documentation
-debug Print debugging (from 1 to 5, being 5 max)
-verbose Verbosity on
=head1 CITATION
To be defined.
=head1 SUMMARY
beacon_yaml2md: Script to convert Beacon v2 Models schemas to Markdown tables
B<NOTE:> This script is B<UNDER CONSTRUCTION> and improvements are added often.
=head1 MOTIVATION
Prior to the creation of this script (i.e.,June-2021), the Beacon documentation for the schemas was first created in a text-type document (e.g., Word) and then MANUALLY transformed into
YAMLs schemas. Each time the original MS Word document was edited, someone had to manually edit the YAML.
This script inverts the process, i.e., B<it enforces modifying the schema specification directly at the YAML/JSON level>.
Editing the schemas directly at the YAML/JSON level has two advantages, the first is that because we follow L<OpenAPI|https://swagger.io/specification/> specification (along with JSON schema), I<a priori> we could use L<SWAGGER UI|https://swagger.io/docs/open-source-tools/swagger-ui/usage/installation>. The second is that the YAML/JSON files can be converted to Markdown tables in order to create L<Markdown based documentation|http://docs.genomebeacons.org> documentation. This script B<transforms YAML/JSON to Markdown tables>, including their nested objects B<up to a third degree of hierarchy>.
The B<Markdown> format can be directly rendered as tables at the GitHub repository, and it can be used with L<MkDocs|https://www.mkdocs.org/> to create L<HTML|http://docs.genomebeacons.org> documentation.
Everytime a C<git push> is performed to the L<repo|https://github.com/ga4gh-beacon/beacon-v2> the documentation at L<Github Pages|http://docs.genomebeacons.org> gets updated. Note that only content under directory C<docs/> will make it to L<Github Pages|http://docs.genomebeacons.org>.
Before creating this tool, the author made an exhaustive search on what had been dveloped by the I<community> to automatically convert YAML/JSON to Markdown tables and found that there were many ways to go from YAML/JSON to HTML (e.g., CPAN, Python, Node.js), but not much from YAML/JSON to Markdown. Obviously, even in the case we had found something, some major tweaking will be needed in order to display things the way we want.
In the Beacon context, I<mbaudis> has developed a nice framework for L<schemablocks|https://github.com/ga4gh-schemablocks/schemablocks-tools> which creates HTML from YAML schemas to be used with L<Yekyll|https://jekyllrb.com/>. However, personalizing his tools to work in our scenario will still not solve the initial objective of creating Markdown tables from the YAML/JSON.
All of the above lead to the creation of this tool, which was written in L<Perl|https://www.perl.org> language.
=head2 ADDENDUM: How to update the Documentation
There are several steps that need to be peformed to update the documentation:
1 - Install C<nodejs>.
$ sudo apt install nodejs
The reason for installing JS is that we need it to convert JSON files with JSON references ($ref) to JSON files with no references (i.e., all references will be embeded in the file).
2 - Now run the script:
$ # cd bin # skip this step if you are already at bin directory
$ ./transform_json2md.sh
$ cd ..
$ git add docs/schemas-md
3 - Finally you need to push beacon-v2 repo to GitHub.
4 - Documentation will get automatically updated via GitHub actions.
=head1 HOW TO RUN BEACON_YAML2MD
The script is written in Perl and runs on Linux (tested on Debian-based distribution). Perl 5 is installed by default on Linux,
but you might need to install some CPAN module(s).
First we install cpanminus (with sudo privileges):
$ sudo apt-get install cpanminus
Then the module(s):
$ cpanm --sudo --installdeps .
If you prefer to have the dependencies in a "virtual environment" (i.e., install the CPAN modules in the directory of the application) we recommend using the module C<Carton>.
$ cpanm --sudo Carton
Then, we can install our dependencies:
$ carton install
The script takes YAMLs as input file and when no arguments is used it will read them from C<./deref_schemas/> directory.
B<NB:> We recommend running the script with the provided bash file C<transform_yaml2md.sh> (B<see ADDENDUM: How to update Documentation>).
B<Example 1:>
$ ./beacon_yaml2md.pl
$ carton exec -- ./beacon_yaml2md.pl # if using Carton
If the script is run directly at C<bin/> directory (default) and with no arguments, then it will create contents in:
* Markdowns at ../docs/schemas-md/
B<Example 2:>
Here we are providind arguments for --schema-dir and for markdown-dir.
$ $path/beacon_yaml2md.pl --schema-dir ./deref_schemas --markdown-dir ../docs/schemas-md --D
I<NB:> if the YAML is not well defined (e.g., wrong indentation, etc.) the script will complain about it. Thus, it indirectly serves as a YAML validator.
B<Notes:>
The argument C<-D|delete> deletes all C<./deref_schemas/obj/*yaml> files prior to doing anything else. It's harmless so you may want to use it you're unsure of who created those files.
See the directory tree below:
C<
```
beacon-v2/
|-- bin
|-- docs
| |-- img
| `-- schemas-md
| `-- obj
```
>
I<NB:> The script was built to work with the Beacon v2 Model schemas and the author deliberately chose to hard-code some variables (e.g., the I<schemas>) to avoid extra arguments or a config.file. However, this may change in future versions depending on user's adoption/feedback.
I<NB:> The decission to take YAMLs (and not JSON) as an input is deliberate and made by the author.
I<NB:> The script only processes the C<Terms> nested B<up to 3 degrees of hierarchy>. Before Adoption of VRS/PHX that limit was OK.
I<NB:> The script also includes the Beacon v2 Models examples from L<beacon-v2 repo|https://github.com/ga4gh-beacon/beacon-v2> in JSON format.
=head1 AUTHOR
Written by Manuel Rueda, PhD. Info about EGA can be found at L<https://ega-archive.org/>.
=head1 KNOWN ISSUES
* If two entities share a term/object (e.g., id, label) the text will correspond to the latest one (alphabetical order). "Ideally" all shared-objects should have a common description, shouldn't them? This way we avoid having to create sub-objects for every entity.
* As of April 2022 we're only processing 3 levels of nesting and adding external URLs for deeper levels.
* We are not processing <if: then:> statements in JSON Schema.
=head1 REPORTING BUGS
Report bugs or comments to <[email protected]>.
=head1 COPYRIGHT
This PERL file is copyrighted. See the LICENSE file included in this distribution.
=cut