diff --git a/age--1.5.0--y.y.y.sql b/age--1.5.0--y.y.y.sql index 9ec64bf8f..d3f9fe93e 100644 --- a/age--1.5.0--y.y.y.sql +++ b/age--1.5.0--y.y.y.sql @@ -28,3 +28,21 @@ -- Please add all additions, deletions, and modifications to the end of this -- file. We need to keep the order of these changes. +DROP FUNCTION IF EXISTS ag_catalog.load_labels_from_file(name, name, text, bool); +CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name, + label_name name, + file_path text, + id_field_exists bool default true, + load_as_agtype bool default false) + RETURNS void + LANGUAGE c + AS 'MODULE_PATHNAME'; + +DROP FUNCTION IF EXISTS ag_catalog.load_edges_from_file(name, name, text); +CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name, + label_name name, + file_path text, + load_as_agtype bool default false) + RETURNS void + LANGUAGE c + AS 'MODULE_PATHNAME'; diff --git a/regress/age_load/data/conversion_edges.csv b/regress/age_load/data/conversion_edges.csv new file mode 100644 index 000000000..f207cdc57 --- /dev/null +++ b/regress/age_load/data/conversion_edges.csv @@ -0,0 +1,7 @@ +start_id, start_vertex_type, end_id, end_vertex_type, string, bool, numeric, +1, Person1, 1, Person2, "John Smith", "true", 1 +1, Person1, 1, Person2, "John", "false", "-2" +1, Person1, 1, Person2, John Smith, true, 1.4 +1, Person1, 1, Person2, """John""", false, -1e10 +1, Person1, 1, Person2, null, false, 0 +1, Person1, 1, Person2, nUll, false, "3.14" diff --git a/regress/age_load/data/conversion_vertices.csv b/regress/age_load/data/conversion_vertices.csv new file mode 100644 index 000000000..8e0a9c65c --- /dev/null +++ b/regress/age_load/data/conversion_vertices.csv @@ -0,0 +1,7 @@ +id, string, bool, numeric, +1, "John Smith", "true", 1 +2, "John", "false", "-2" +3, John Smith, true, 1.4 +4, """John""", false, -1e10 +5, null, false, 0 +6, nUll, false, "3.14" diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 5e74eaae6..8635a499b 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -233,3 +233,127 @@ NOTICE: graph "agload_test_graph" has been dropped (1 row) +-- +-- Test property type conversion +-- +SELECT create_graph('agload_conversion'); +NOTICE: graph "agload_conversion" has been created + create_graph +-------------- + +(1 row) + +-- vertex: load as agtype +SELECT create_vlabel('agload_conversion','Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype); + a +------------------------------------------------------------------------------------ + {"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1} + {"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2} + {"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4} + {"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": -10000000000.0} + {"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0} + {"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14} +(6 rows) + +-- vertex: load as string +SELECT create_vlabel('agload_conversion','Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false); + load_labels_from_file +----------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype); + a +------------------------------------------------------------------------------------- + {"id": "1", "bool": "true", "__id__": 1, "string": "John Smith", "numeric": "1"} + {"id": "2", "bool": "false", "__id__": 2, "string": "John", "numeric": "-2"} + {"id": "3", "bool": "true", "__id__": 3, "string": "John Smith", "numeric": "1.4"} + {"id": "4", "bool": "false", "__id__": 4, "string": "\"John\"", "numeric": "-1e10"} + {"id": "5", "bool": "false", "__id__": 5, "string": "null", "numeric": "0"} + {"id": "6", "bool": "false", "__id__": 6, "string": "nUll", "numeric": "3.14"} +(6 rows) + +-- edge: load as agtype +SELECT create_elabel('agload_conversion','Edges1'); +NOTICE: ELabel "Edges1" has been created + create_elabel +--------------- + +(1 row) + +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true); + load_edges_from_file +---------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype); + a +-------------------------------------------------------------- + {"bool": true, "string": "John Smith", "numeric": 1} + {"bool": false, "string": "John", "numeric": -2} + {"bool": true, "string": "John Smith", "numeric": 1.4} + {"bool": false, "string": "John", "numeric": -10000000000.0} + {"bool": false, "string": null, "numeric": 0} + {"bool": false, "string": "nUll", "numeric": 3.14} +(6 rows) + +-- edge: load as string +SELECT create_elabel('agload_conversion','Edges2'); +NOTICE: ELabel "Edges2" has been created + create_elabel +--------------- + +(1 row) + +SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); + load_edges_from_file +---------------------- + +(1 row) + +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); + a +------------------------------------------------------------- + {"bool": "true", "string": "John Smith", "numeric": "1"} + {"bool": "false", "string": "John", "numeric": "-2"} + {"bool": "true", "string": "John Smith", "numeric": "1.4"} + {"bool": "false", "string": "\"John\"", "numeric": "-1e10"} + {"bool": "false", "string": "null", "numeric": "0"} + {"bool": "false", "string": "nUll", "numeric": "3.14"} +(6 rows) + +SELECT drop_graph('agload_conversion', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table agload_conversion._ag_label_vertex +drop cascades to table agload_conversion._ag_label_edge +drop cascades to table agload_conversion."Person1" +drop cascades to table agload_conversion."Person2" +drop cascades to table agload_conversion."Edges1" +drop cascades to table agload_conversion."Edges2" +NOTICE: graph "agload_conversion" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 105c2fa60..cee34f59c 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -79,3 +79,30 @@ SELECT * FROM cypher('agload_test_graph', $$ $$) AS (result_1 agtype, result_2 agtype); SELECT drop_graph('agload_test_graph', true); + +-- +-- Test property type conversion +-- +SELECT create_graph('agload_conversion'); + +-- vertex: load as agtype +SELECT create_vlabel('agload_conversion','Person1'); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true); +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype); + +-- vertex: load as string +SELECT create_vlabel('agload_conversion','Person2'); +SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false); +SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype); + +-- edge: load as agtype +SELECT create_elabel('agload_conversion','Edges1'); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true); +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype); + +-- edge: load as string +SELECT create_elabel('agload_conversion','Edges2'); +SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); +SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); + +SELECT drop_graph('agload_conversion', true); diff --git a/sql/age_main.sql b/sql/age_main.sql index 51454793f..18be2a82b 100644 --- a/sql/age_main.sql +++ b/sql/age_main.sql @@ -120,17 +120,23 @@ CREATE FUNCTION ag_catalog.drop_label(graph_name name, label_name name, LANGUAGE c AS 'MODULE_PATHNAME'; +-- +-- If `load_as_agtype` is true, property values are loaded as agtype; otherwise +-- loaded as string. +-- CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name, label_name name, file_path text, - id_field_exists bool default true) + id_field_exists bool default true, + load_as_agtype bool default false) RETURNS void LANGUAGE c AS 'MODULE_PATHNAME'; CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name, label_name name, - file_path text) + file_path text, + load_as_agtype bool default false) RETURNS void LANGUAGE c AS 'MODULE_PATHNAME'; diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index e06a46966..32983d653 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -88,7 +88,6 @@ typedef enum /* type categories for datum_to_agtype */ } agt_type_category; static inline Datum agtype_from_cstring(char *str, int len); -static inline agtype_value *agtype_value_from_cstring(char *str, int len); size_t check_string_length(size_t len); static void agtype_in_agtype_annotation(void *pstate, char *annotation); static void agtype_in_object_start(void *pstate); @@ -352,7 +351,7 @@ Datum agtype_out(PG_FUNCTION_ARGS) * Uses the agtype parser (with hooks) to construct an agtype. */ -static inline agtype_value *agtype_value_from_cstring(char *str, int len) +agtype_value *agtype_value_from_cstring(char *str, int len) { agtype_lex_context *lex; agtype_in_state state; diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 9b5868336..d6ae29ff0 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -105,7 +105,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); props = create_agtype_from_list_i(cr->header, cr->fields, - n_fields, 3); + n_fields, 4, cr->load_as_agtype); insert_edge_simple(cr->graph_id, cr->object_name, object_graph_id, start_vertex_graph_id, @@ -158,7 +158,8 @@ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_id, char *object_name, - int object_id ) + int object_id, + bool load_as_agtype) { FILE *fp; @@ -195,6 +196,7 @@ int create_edges_from_csv_file(char *file_path, cr.graph_id = graph_id; cr.object_name = object_name; cr.object_id = object_id; + cr.load_as_agtype = load_as_agtype; while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index a7cc68e7d..6f79071df 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -92,7 +92,8 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) object_graph_id = make_graphid(cr->object_id, label_id_int); props = create_agtype_from_list(cr->header, cr->fields, - n_fields, label_id_int); + n_fields, label_id_int, + cr->load_as_agtype); insert_vertex_simple(cr->graph_id, cr->object_name, object_graph_id, props); pfree(props); @@ -145,7 +146,8 @@ int create_labels_from_csv_file(char *file_path, Oid graph_id, char *object_name, int object_id, - bool id_field_exists) + bool id_field_exists, + bool load_as_agtype) { FILE *fp; @@ -184,6 +186,7 @@ int create_labels_from_csv_file(char *file_path, cr.object_name = object_name; cr.object_id = object_id; cr.id_field_exists = id_field_exists; + cr.load_as_agtype = load_as_agtype; diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 99c3eaee7..5fa637b6e 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,12 +18,16 @@ */ #include "postgres.h" +#include "utils/jsonapi.h" #include "utils/load/age_load.h" #include "utils/load/ag_load_labels.h" #include "utils/load/ag_load_edges.h" -agtype* create_empty_agtype(void) +static agtype_value *csv_value_to_agtype_value(char *csv_val); +static bool json_validate(text *json); + +agtype *create_empty_agtype(void) { agtype* out; agtype_in_state result; @@ -41,8 +45,93 @@ agtype* create_empty_agtype(void) return out; } -agtype* create_agtype_from_list(char **header, char **fields, - size_t fields_len, int64 vertex_id) +/* + * the null action object used for pure validation + * Note: borrowed from PG. + */ +static JsonSemAction nullSemAction = +{ + NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL +}; + +/* + * Validate JSON text. + * + * Note: this function is borrowed from PG16. It is simplified + * by removing two parameters as they are not used in age. + */ +static bool json_validate(text *json) +{ + bool result; + MemoryContext ccxt; + JsonLexContext *lex; + + ccxt = CurrentMemoryContext; + lex = makeJsonLexContext(json, false); + + PG_TRY(); + { + pg_parse_json(lex, &nullSemAction); + result = true; + } + PG_CATCH(); + { + (void) MemoryContextSwitchTo(ccxt); + FlushErrorState(); + result = false; + } + PG_END_TRY(); + + return result; +} + +/* + * Converts the given csv value to an agtype_value. + * + * If csv_val is not a valid json, it is wrapped by double-quotes to make it a + * string value. Because agtype is jsonb-like, the token should be a valid + * json in order to be parsed into an agtype_value of appropriate type. + * Finally, agtype_value_from_cstring() is called for parsing. + */ +static agtype_value *csv_value_to_agtype_value(char *csv_val) +{ + char *new_csv_val; + agtype_value *res; + + if (!json_validate(cstring_to_text(csv_val))) + { + // wrap the string with double-quote + int oldlen; + int newlen; + + oldlen = strlen(csv_val); + newlen = oldlen + 2; // +2 for double-quotes + new_csv_val = (char *)palloc(sizeof(char) * (newlen + 1)); + + new_csv_val[0] = '"'; + strncpy(&new_csv_val[1], csv_val, oldlen); + new_csv_val[oldlen + 1] = '"'; + new_csv_val[oldlen + 2] = '\0'; + } + else + { + new_csv_val = csv_val; + } + + res = agtype_value_from_cstring(new_csv_val, strlen(new_csv_val)); + + // extract from top-level row scalar array + if (res->type == AGTV_ARRAY && res->val.array.raw_scalar) + { + res = &res->val.array.elems[0]; + } + + return res; +} + +agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, + int64 vertex_id, bool load_as_agtype) { agtype* out; agtype_value* key_agtype; @@ -75,7 +164,15 @@ agtype* create_agtype_from_list(char **header, char **fields, WAGT_KEY, key_agtype); - value_agtype = string_to_agtype_value(fields[i]); + if (load_as_agtype) + { + value_agtype = csv_value_to_agtype_value(fields[i]); + } + else + { + value_agtype = string_to_agtype_value(fields[i]); + } + result.res = push_agtype_value(&result.parse_state, WAGT_VALUE, value_agtype); @@ -94,7 +191,8 @@ agtype* create_agtype_from_list(char **header, char **fields, } agtype* create_agtype_from_list_i(char **header, char **fields, - size_t fields_len, size_t start_index) + size_t fields_len, size_t start_index, + bool load_as_agtype) { agtype* out; agtype_value* key_agtype; @@ -117,7 +215,16 @@ agtype* create_agtype_from_list_i(char **header, char **fields, result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); - value_agtype = string_to_agtype_value(fields[i]); + + if (load_as_agtype) + { + value_agtype = csv_value_to_agtype_value(fields[i]); + } + else + { + value_agtype = string_to_agtype_value(fields[i]); + } + result.res = push_agtype_value(&result.parse_state, WAGT_VALUE, value_agtype); @@ -214,6 +321,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) Oid graph_id; int32 label_id; bool id_field_exists; + bool load_as_agtype; if (PG_ARGISNULL(0)) { @@ -237,6 +345,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_name = PG_GETARG_NAME(1); file_path = PG_GETARG_TEXT_P(2); id_field_exists = PG_GETARG_BOOL(3); + load_as_agtype = PG_GETARG_BOOL(4); graph_name_str = NameStr(*graph_name); @@ -246,9 +355,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) graph_id = get_graph_oid(graph_name_str); label_id = get_label_id(label_name_str, graph_id); - create_labels_from_csv_file(file_path_str, graph_name_str, - graph_id, label_name_str, - label_id, id_field_exists); + create_labels_from_csv_file(file_path_str, graph_name_str, graph_id, + label_name_str, label_id, id_field_exists, + load_as_agtype); PG_RETURN_VOID(); } @@ -265,6 +374,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) char* file_path_str; Oid graph_id; int32 label_id; + bool load_as_agtype; if (PG_ARGISNULL(0)) { @@ -287,6 +397,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_path = PG_GETARG_TEXT_P(2); + load_as_agtype = PG_GETARG_BOOL(3); graph_name_str = NameStr(*graph_name); label_name_str = NameStr(*label_name); @@ -295,8 +406,8 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) graph_id = get_graph_oid(graph_name_str); label_id = get_label_id(label_name_str, graph_id); - create_edges_from_csv_file(file_path_str, graph_name_str, - graph_id, label_name_str, label_id); + create_edges_from_csv_file(file_path_str, graph_name_str, graph_id, + label_name_str, label_id, load_as_agtype); PG_RETURN_VOID(); } diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index 4ce2762ba..49eeb7b9d 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -554,6 +554,7 @@ agtype_iterator *get_next_list_element(agtype_iterator *it, void pfree_agtype_value(agtype_value* value); void pfree_agtype_value_content(agtype_value* value); void pfree_agtype_in_state(agtype_in_state* value); +agtype_value *agtype_value_from_cstring(char *str, int len); /* Oid accessors for AGTYPE */ Oid get_AGTYPEOID(void); diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index e875133fc..57940d459 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -43,6 +43,7 @@ typedef struct { int object_id; char *start_vertex; char *end_vertex; + bool load_as_agtype; } csv_edge_reader; @@ -51,7 +52,8 @@ void edge_field_cb(void *field, size_t field_len, void *data); void edge_row_cb(int delim __attribute__((unused)), void *data); int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_id, - char *object_name, int object_id ); + char *object_name, int object_id, + bool load_as_agtype); #endif //AG_LOAD_EDGES_H diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index 75231372a..71fcf97dc 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -49,6 +49,7 @@ typedef struct { char *object_name; int object_id; bool id_field_exists; + bool load_as_agtype; } csv_vertex_reader; @@ -57,6 +58,6 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data); int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_id, char *object_name, int object_id, - bool id_field_exists); + bool id_field_exists, bool load_as_agtype); #endif //AG_LOAD_LABELS_H diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index de33522fa..1c77645d9 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -32,12 +32,14 @@ agtype* create_empty_agtype(void); agtype* create_agtype_from_list(char **header, char **fields, - size_t fields_len, int64 vertex_id); + size_t fields_len, int64 vertex_id, + bool load_as_agtype); agtype* create_agtype_from_list_i(char **header, char **fields, - size_t fields_len, size_t start_index); -void insert_vertex_simple(Oid graph_id, char* label_name, graphid vertex_id, - agtype* vertex_properties); -void insert_edge_simple(Oid graph_id, char* label_name, graphid edge_id, + size_t fields_len, size_t start_index, + bool load_as_agtype); +void insert_vertex_simple(Oid graph_id, char *label_name, graphid vertex_id, + agtype *vertex_properties); +void insert_edge_simple(Oid graph_id, char *label_name, graphid edge_id, graphid start_id, graphid end_id, agtype* end_properties);