diff --git a/cpp/include/gar/utils/writer_utils.h b/cpp/include/gar/utils/writer_utils.h new file mode 100644 index 000000000..c431eb75a --- /dev/null +++ b/cpp/include/gar/utils/writer_utils.h @@ -0,0 +1,41 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef GAR_UTILS_WRITER_UTILS_H_ +#define GAR_UTILS_WRITER_UTILS_H_ + +#include "gar/utils/macros.h" + +namespace GAR_NAMESPACE_INTERNAL { + +/** + * @brief The level for validating writing operations. + */ +enum class ValidateLevel : char { + /// To use the default validate level of the writer/builder. + default_validate = 0, + /// To skip the validation. + no_validate = 1, + /// Weak validation: check if the index, count, adj_list type, property group + /// and the size of the table passed to the writer/builder are valid. + weak_validate = 2, + /// Strong validation: except for the weak validation, also check if the + /// schema (including each property name and data type) of the intput data + /// passed to the writer/builder is consistent with that defined in the info. + strong_validate = 3 +}; + +} // namespace GAR_NAMESPACE_INTERNAL +#endif // GAR_UTILS_WRITER_UTILS_H_ diff --git a/cpp/include/gar/writer/arrow_chunk_writer.h b/cpp/include/gar/writer/arrow_chunk_writer.h index a438c4b3e..51f8ce13b 100644 --- a/cpp/include/gar/writer/arrow_chunk_writer.h +++ b/cpp/include/gar/writer/arrow_chunk_writer.h @@ -27,6 +27,7 @@ limitations under the License. #include "gar/utils/result.h" #include "gar/utils/status.h" #include "gar/utils/utils.h" +#include "gar/utils/writer_utils.h" // forward declaration namespace arrow { @@ -35,19 +36,26 @@ class Table; namespace GAR_NAMESPACE_INTERNAL { -/** - * @brief The level for validating writing operations. - */ -enum class ValidateLevel : char { - default_validate = 0, - no_validate = 1, - weak_validate = 2, - strong_validate = 3 -}; - /** * @brief The writer for vertex property group chunks. * + * Notes: For each writing operation, a validate_level could be set, which will + * be used to validate the data before writing. The validate_level could be: + * + * ValidateLevel::default_validate: to use the validate_level of the writer, + * which set through the constructor or the SetValidateLevel method; + * + * ValidateLevel::no_validate: without validation; + * + * ValidateLevel::weak_validate: to validate if the vertex count or vertex chunk + * index is non-negative, the property group exists and the size of input_table + * is not larger than the vertex chunk size; + * + * ValidateLevel::strong_validate: besides weak_validate, also validate the + * schema of input_table is consistent with that of property group; for writing + * operations without input_table, such as writing vertices number or copying + * file, the strong_validate is same as weak_validate. + * */ class VertexPropertyWriter { public: @@ -56,7 +64,10 @@ class VertexPropertyWriter { * * @param vertex_info The vertex info that describes the vertex type. * @param prefix The absolute prefix. - * @param validate_level The validate level, with no validate by default. + * @param validate_level The global validate level for the writer, with no + * validate by default. It could be ValidateLevel::no_validate, + * ValidateLevel::weak_validate or ValidateLevel::strong_validate, but could + * not be ValidateLevel::default_validate. */ VertexPropertyWriter( const VertexInfo& vertex_info, const std::string& prefix, @@ -64,6 +75,11 @@ class VertexPropertyWriter { : vertex_info_(vertex_info), prefix_(prefix), validate_level_(validate_level) { + if (validate_level_ == ValidateLevel::default_validate) { + throw std::runtime_error( + "default_validate is not allowed to be set as the global validate " + "level for VertexPropertyWriter"); + } GAR_ASSIGN_OR_RAISE_ERROR(fs_, FileSystemFromUriOrPath(prefix, &prefix_)); } @@ -73,6 +89,9 @@ class VertexPropertyWriter { * @param validate_level The validate level to set. */ inline void SetValidateLevel(const ValidateLevel& validate_level) { + if (validate_level == ValidateLevel::default_validate) { + return; + } validate_level_ = validate_level; } @@ -83,28 +102,17 @@ class VertexPropertyWriter { */ inline ValidateLevel GetValidateLevel() const { return validate_level_; } - /** - * @brief Check if the write opeartion is allowed. - * - * @param input_table The input table containing data. - * @param property_group The property group to write. - * @param chunk_index The index of the vertex chunk. - * @param validate_level The validate level for this operation, - * which is the writer's validate level by default. - * @return Status: ok or error. - */ - Status Validate(const std::shared_ptr& input_table, - const PropertyGroup& property_group, IdType chunk_index, - ValidateLevel validate_level = - ValidateLevel::default_validate) const noexcept; - /** * @brief Write the number of vertices into the file. * * @param count The number of vertices. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WriteVerticesNum(const IdType& count) const noexcept; + Status WriteVerticesNum(const IdType& count, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Copy a file as a vertex property group chunk. @@ -112,11 +120,14 @@ class VertexPropertyWriter { * @param file_name The file to copy. * @param property_group The property group. * @param chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteChunk(const std::string& file_name, - const PropertyGroup& property_group, - IdType chunk_index) const noexcept; + const PropertyGroup& property_group, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Validate and write a single property group for a * single vertex chunk. @@ -124,11 +135,14 @@ class VertexPropertyWriter { * @param input_table The table containing data. * @param property_group The property group. * @param chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteChunk(const std::shared_ptr& input_table, - const PropertyGroup& property_group, - IdType chunk_index) const noexcept; + const PropertyGroup& property_group, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Write all property groups of a single vertex chunk @@ -136,10 +150,14 @@ class VertexPropertyWriter { * * @param input_table The table containing data. * @param chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WriteChunk(const std::shared_ptr& input_table, - IdType chunk_index) const noexcept; + Status WriteChunk( + const std::shared_ptr& input_table, IdType chunk_index, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Write a single property group for multiple vertex chunks @@ -148,11 +166,15 @@ class VertexPropertyWriter { * @param input_table The table containing data. * @param property_group The property group. * @param start_chunk_index The start index of the vertex chunks. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WriteTable(const std::shared_ptr& input_table, - const PropertyGroup& property_group, - IdType start_chunk_index) const noexcept; + Status WriteTable( + const std::shared_ptr& input_table, + const PropertyGroup& property_group, IdType start_chunk_index, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Write all property groups for multiple vertex chunks @@ -160,10 +182,49 @@ class VertexPropertyWriter { * * @param input_table The table containing data. * @param start_chunk_index The start index of the vertex chunks. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteTable(const std::shared_ptr& input_table, - IdType start_chunk_index) const noexcept; + IdType start_chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; + + private: + /** + * @brief Check if the opeartion of writing vertices number is allowed. + * + * @param count The number of vertices. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const IdType& count, ValidateLevel validate_level) const + noexcept; + + /** + * @brief Check if the opeartion of copying a file as a chunk is allowed. + * + * @param property_group The property group to write. + * @param chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const PropertyGroup& property_group, IdType chunk_index, + ValidateLevel validate_level) const noexcept; + + /** + * @brief Check if the opeartion of writing a table as a chunk is allowed. + * + * @param input_table The input table containing data. + * @param property_group The property group to write. + * @param chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const std::shared_ptr& input_table, + const PropertyGroup& property_group, IdType chunk_index, + ValidateLevel validate_level) const noexcept; private: VertexInfo vertex_info_; @@ -175,6 +236,24 @@ class VertexPropertyWriter { /** * @brief The writer for edge (adj list, offset and property group) chunks. * + * Notes: For each writing operation, a validate_level could be set, which will + * be used to validate the data before writing. The validate_level could be: + * + * ValidateLevel::default_validate: to use the validate_level of the writer, + * which set through the constructor or the SetValidateLevel method; + * + * ValidateLevel::no_validate: without validation; + * + * ValidateLevel::weak_validate: to validate if the vertex/edge count or + * vertex/edge chunk index is non-negative, the adj_list type is valid, the + * property group exists and the size of input_table is not larger than the + * chunk size; + * + * ValidateLevel::strong_validate: besides weak_validate, also validate the + * schema of input_table is consistent with that of property group; for writing + * operations without input_table, such as writing vertices/edges number or + * copying file, the strong_validate is same as weak_validate. + * */ class EdgeChunkWriter { public: @@ -184,7 +263,10 @@ class EdgeChunkWriter { * @param edge_info The edge info that describes the edge type. * @param prefix The absolute prefix. * @param adj_list_type The adj list type for the edges. - * @param validate_level The validate level, with no validate by default. + * @param validate_level The global validate level for the writer, with no + * validate by default. It could be ValidateLevel::no_validate, + * ValidateLevel::weak_validate or ValidateLevel::strong_validate, but could + * not be ValidateLevel::default_validate. */ EdgeChunkWriter( const EdgeInfo& edge_info, const std::string& prefix, @@ -193,6 +275,11 @@ class EdgeChunkWriter { : edge_info_(edge_info), adj_list_type_(adj_list_type), validate_level_(validate_level) { + if (validate_level_ == ValidateLevel::default_validate) { + throw std::runtime_error( + "default_validate is not allowed to be set as the global validate " + "level for EdgeChunkWriter"); + } GAR_ASSIGN_OR_RAISE_ERROR(fs_, FileSystemFromUriOrPath(prefix, &prefix_)); chunk_size_ = edge_info_.GetChunkSize(); switch (adj_list_type) { @@ -219,6 +306,9 @@ class EdgeChunkWriter { * @param validate_level The validate level to set. */ void SetValidateLevel(const ValidateLevel& validate_level) { + if (validate_level == ValidateLevel::default_validate) { + return; + } validate_level_ = validate_level; } @@ -230,89 +320,58 @@ class EdgeChunkWriter { inline ValidateLevel GetValidateLevel() const { return validate_level_; } /** - * @brief Check if the writer operation for offset is allowed. + * @brief Write the number of edges into the file. * - * @param input_table The input table containing data. * @param vertex_chunk_index The index of the vertex chunk. + * @param count The number of edges. * @param validate_level The validate level for this operation, * which is the writer's validate level by default. * @return Status: ok or error. */ - Status Validate(const std::shared_ptr& input_table, - IdType vertex_chunk_index, - ValidateLevel validate_level = - ValidateLevel::default_validate) const noexcept; + Status WriteEdgesNum(IdType vertex_chunk_index, const IdType& count, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** - * @brief Check if the writer operation for adj list is allowed. + * @brief Write the number of vertices into the file. * - * @param input_table The input table containing data. - * @param vertex_chunk_index The index of the vertex chunk. - * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param count The number of vertices. * @param validate_level The validate level for this operation, * which is the writer's validate level by default. * @return Status: ok or error. */ - Status Validate(const std::shared_ptr& input_table, - IdType vertex_chunk_index, IdType chunk_index, - ValidateLevel validate_level = - ValidateLevel::default_validate) const noexcept; + Status WriteVerticesNum(const IdType& count, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** - * @brief Check if the writer operation (for property group) is allowed. + * @brief Copy a file as a offset chunk. * - * @param input_table The input table containing data. - * @param property_group The property group to write. + * @param file_name The file to copy. * @param vertex_chunk_index The index of the vertex chunk. - * @param chunk_index The index of the edge chunk inside the vertex chunk. * @param validate_level The validate level for this operation, * which is the writer's validate level by default. * @return Status: ok or error. */ - Status Validate(const std::shared_ptr& input_table, - const PropertyGroup& property_group, - IdType vertex_chunk_index, IdType chunk_index, - ValidateLevel validate_level = - ValidateLevel::default_validate) const noexcept; - - /** - * @brief Write the number of edges into the file. - * - * @param count The number of edges. - * @return Status: ok or error. - */ - Status WriteEdgesNum(IdType vertex_chunk_index, const IdType& count) const + Status WriteOffsetChunk( + const std::string& file_name, IdType vertex_chunk_index, + ValidateLevel validate_level = ValidateLevel::default_validate) const noexcept; - /** - * @brief Write the number of vertices into the file. - * - * @param count The number of vertices. - * @return Status: ok or error. - */ - Status WriteVerticesNum(const IdType& count) const noexcept; - - /** - * @brief Copy a file as a offset chunk. - * - * @param file_name The file to copy. - * @param vertex_chunk_index The index of the vertex chunk. - * @return Status: ok or error. - */ - Status WriteOffsetChunk(const std::string& file_name, - IdType vertex_chunk_index) const noexcept; - /** * @brief Copy a file as an adj list chunk. * * @param file_name The file to copy. * @param vertex_chunk_index The index of the vertex chunk. * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteAdjListChunk(const std::string& file_name, - IdType vertex_chunk_index, IdType chunk_index) const - noexcept; + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Copy a file as an edge property group chunk. @@ -321,11 +380,14 @@ class EdgeChunkWriter { * @param property_group The property group to write. * @param vertex_chunk_index The index of the vertex chunk. * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WritePropertyChunk(const std::string& file_name, - const PropertyGroup& property_group, - IdType vertex_chunk_index, IdType chunk_index) const + Status WritePropertyChunk( + const std::string& file_name, const PropertyGroup& property_group, + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level = ValidateLevel::default_validate) const noexcept; /** @@ -333,10 +395,14 @@ class EdgeChunkWriter { * * @param input_table The table containing data. * @param vertex_chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteOffsetChunk(const std::shared_ptr& input_table, - IdType vertex_chunk_index) const noexcept; + IdType vertex_chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Validate and write the adj list chunk for an edge chunk. @@ -344,11 +410,14 @@ class EdgeChunkWriter { * @param input_table The table containing data. * @param vertex_chunk_index The index of the vertex chunk. * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteAdjListChunk(const std::shared_ptr& input_table, - IdType vertex_chunk_index, IdType chunk_index) const - noexcept; + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Validate and write a single edge property group for an edge chunk. @@ -357,12 +426,15 @@ class EdgeChunkWriter { * @param property_group The property group to write. * @param vertex_chunk_index The index of the vertex chunk. * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WritePropertyChunk(const std::shared_ptr& input_table, const PropertyGroup& property_group, - IdType vertex_chunk_index, IdType chunk_index) const - noexcept; + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Write all edge property groups for an edge chunk. @@ -370,11 +442,14 @@ class EdgeChunkWriter { * @param input_table The table containing data. * @param vertex_chunk_index The index of the vertex chunk. * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WritePropertyChunk(const std::shared_ptr& input_table, - IdType vertex_chunk_index, IdType chunk_index) const - noexcept; + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Write the adj list and all property groups for an edge chunk. @@ -382,11 +457,14 @@ class EdgeChunkWriter { * @param input_table The table containing data. * @param vertex_chunk_index The index of the vertex chunk. * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteChunk(const std::shared_ptr& input_table, - IdType vertex_chunk_index, IdType chunk_index) const - noexcept; + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Write the adj list chunks for the edges of a vertex chunk. @@ -395,11 +473,15 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WriteAdjListTable(const std::shared_ptr& input_table, - IdType vertex_chunk_index, - IdType start_chunk_index = 0) const noexcept; + Status WriteAdjListTable( + const std::shared_ptr& input_table, + IdType vertex_chunk_index, IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Write chunks of a single property group for the edges of a @@ -410,12 +492,16 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WritePropertyTable(const std::shared_ptr& input_table, - const PropertyGroup& property_group, - IdType vertex_chunk_index, - IdType start_chunk_index = 0) const noexcept; + Status WritePropertyTable( + const std::shared_ptr& input_table, + const PropertyGroup& property_group, IdType vertex_chunk_index, + IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Write chunks of all property groups for the edges of a vertex @@ -425,11 +511,15 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status WritePropertyTable(const std::shared_ptr& input_table, - IdType vertex_chunk_index, - IdType start_chunk_index = 0) const noexcept; + Status WritePropertyTable( + const std::shared_ptr& input_table, + IdType vertex_chunk_index, IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Write chunks of the adj list and all property groups for the @@ -439,11 +529,14 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status WriteTable(const std::shared_ptr& input_table, - IdType vertex_chunk_index, - IdType start_chunk_index = 0) const noexcept; + IdType vertex_chunk_index, IdType start_chunk_index = 0, + ValidateLevel validate_level = + ValidateLevel::default_validate) const noexcept; /** * @brief Sort the edges, and write the adj list chunks for the edges of a @@ -453,11 +546,15 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status SortAndWriteAdjListTable( const std::shared_ptr& input_table, - IdType vertex_chunk_index, IdType start_chunk_index = 0) const noexcept; + IdType vertex_chunk_index, IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Sort the edges, and write chunks of a single property group for the @@ -468,12 +565,16 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status SortAndWritePropertyTable( const std::shared_ptr& input_table, const PropertyGroup& property_group, IdType vertex_chunk_index, - IdType start_chunk_index = 0) const noexcept; + IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Sort the edges, and write chunks of all property groups for the @@ -483,11 +584,15 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ Status SortAndWritePropertyTable( const std::shared_ptr& input_table, - IdType vertex_chunk_index, IdType start_chunk_index = 0) const noexcept; + IdType vertex_chunk_index, IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; /** * @brief Sort the edges, and write chunks of the adj list and all property @@ -497,13 +602,86 @@ class EdgeChunkWriter { * @param vertex_chunk_index The index of the vertex chunk. * @param start_chunk_index The start index of the edge chunks inside * the vertex chunk. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or error. */ - Status SortAndWriteTable(const std::shared_ptr& input_table, - IdType vertex_chunk_index, - IdType start_chunk_index = 0) const noexcept; + Status SortAndWriteTable( + const std::shared_ptr& input_table, + IdType vertex_chunk_index, IdType start_chunk_index = 0, + ValidateLevel validate_level = ValidateLevel::default_validate) const + noexcept; private: + /** + * @brief Check if the operation of writing number or copying a file is + * allowed. + * + * @param count_or_index1 The first count or index used by the operation. + * @param count_or_index2 The second count or index used by the operation. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(IdType count_or_index1, IdType count_or_index2, + ValidateLevel validate_level) const noexcept; + + /** + * @brief Check if the operation of copying a file as a property chunk is + * allowed. + * + * @param property_group The property group to write. + * @param vertex_chunk_index The index of the vertex chunk. + * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const PropertyGroup& property_group, + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level) const noexcept; + + /** + * @brief Check if the operation of writing a table as an offset chunk is + * allowed. + * + * @param input_table The input table containing data. + * @param vertex_chunk_index The index of the vertex chunk. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const std::shared_ptr& input_table, + IdType vertex_chunk_index, ValidateLevel validate_level) const + noexcept; + + /** + * @brief Check if the operation of writing a table as an adj list chunk is + * allowed. + * + * @param input_table The input table containing data. + * @param vertex_chunk_index The index of the vertex chunk. + * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const std::shared_ptr& input_table, + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level) const noexcept; + + /** + * @brief Check if the operation of writing a table as a property chunk is + * allowed. + * + * @param input_table The input table containing data. + * @param property_group The property group to write. + * @param vertex_chunk_index The index of the vertex chunk. + * @param chunk_index The index of the edge chunk inside the vertex chunk. + * @param validate_level The validate level for this operation. + * @return Status: ok or error. + */ + Status validate(const std::shared_ptr& input_table, + const PropertyGroup& property_group, + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level) const noexcept; + /** * @brief Construct the offset table. * diff --git a/cpp/include/gar/writer/edges_builder.h b/cpp/include/gar/writer/edges_builder.h index 16eb06095..b26dccc9a 100644 --- a/cpp/include/gar/writer/edges_builder.h +++ b/cpp/include/gar/writer/edges_builder.h @@ -147,7 +147,10 @@ class EdgesBuilder { * @param prefix The absolute prefix. * @param adj_list_type The adj list type of the edges. * @param num_vertices The total number of vertices for source or destination. - * @param validate_level The validate level, with no validate by default. + * @param validate_level The global validate level for the writer, with no + * validate by default. It could be ValidateLevel::no_validate, + * ValidateLevel::weak_validate or ValidateLevel::strong_validate, but could + * not be ValidateLevel::default_validate. */ explicit EdgesBuilder( const EdgeInfo edge_info, const std::string& prefix, @@ -158,6 +161,11 @@ class EdgesBuilder { adj_list_type_(adj_list_type), num_vertices_(num_vertices), validate_level_(validate_level) { + if (validate_level_ == ValidateLevel::default_validate) { + throw std::runtime_error( + "default_validate is not allowed to be set as the global validate " + "level for EdgesBuilder"); + } edges_.clear(); num_edges_ = 0; is_saved_ = false; @@ -185,6 +193,9 @@ class EdgesBuilder { * @param validate_level The validate level to set. */ inline void SetValidateLevel(const ValidateLevel& validate_level) { + if (validate_level == ValidateLevel::default_validate) { + return; + } validate_level_ = validate_level; } @@ -196,46 +207,39 @@ class EdgesBuilder { inline ValidateLevel GetValidateLevel() const { return validate_level_; } /** - * @brief Check if adding an edge is allowed. - * - * @param e The edge to add. - * @param validate_level The validate level for this operation, - * which is the writer's validate level by default. - * @return Status: ok or status::InvalidOperation error. - */ - Status Validate(const Edge& e, ValidateLevel validate_level = - ValidateLevel::default_validate) const; - - /** - * @brief Get the vertex chunk index of a given edge. - * - * @param e The edge to add. - * @return The vertex chunk index of the edge. + * @brief Clear the edges in this EdgessBuilder. */ - IdType getVertexChunkIndex(const Edge& e) { - switch (adj_list_type_) { - case AdjListType::unordered_by_source: - return e.GetSource() / vertex_chunk_size_; - case AdjListType::ordered_by_source: - return e.GetSource() / vertex_chunk_size_; - case AdjListType::unordered_by_dest: - return e.GetDestination() / vertex_chunk_size_; - case AdjListType::ordered_by_dest: - return e.GetDestination() / vertex_chunk_size_; - default: - return e.GetSource() / vertex_chunk_size_; - } + inline void Clear() { + edges_.clear(); + num_edges_ = 0; + is_saved_ = false; } /** * @brief Add an edge to the collection. * + * The validate_level for this operation could be: + * + * ValidateLevel::default_validate: to use the validate_level of the builder, + * which set through the constructor or the SetValidateLevel method; + * + * ValidateLevel::no_validate: without validation; + * + * ValidateLevel::weak_validate: to validate if the adj_list type is valid, + * and the data in builder is not saved; + * + * ValidateLevel::strong_validate: besides weak_validate, also validate the + * schema of the edge is consistent with the info defined. + * * @param e The edge to add. + * @param validate_level The validate level for this operation, + * which is the builder's validate level by default. * @return Status: ok or Status::InvalidOperation error. */ - Status AddEdge(const Edge& e) { + Status AddEdge(const Edge& e, const ValidateLevel& validate_level = + ValidateLevel::default_validate) { // validate - GAR_RETURN_NOT_OK(Validate(e)); + GAR_RETURN_NOT_OK(validate(e, validate_level)); // add an edge IdType vertex_chunk_index = getVertexChunkIndex(e); edges_[vertex_chunk_index].push_back(e); @@ -313,6 +317,36 @@ class EdgesBuilder { } private: + /** + * @brief Get the vertex chunk index of a given edge. + * + * @param e The edge to add. + * @return The vertex chunk index of the edge. + */ + IdType getVertexChunkIndex(const Edge& e) { + switch (adj_list_type_) { + case AdjListType::unordered_by_source: + return e.GetSource() / vertex_chunk_size_; + case AdjListType::ordered_by_source: + return e.GetSource() / vertex_chunk_size_; + case AdjListType::unordered_by_dest: + return e.GetDestination() / vertex_chunk_size_; + case AdjListType::ordered_by_dest: + return e.GetDestination() / vertex_chunk_size_; + default: + return e.GetSource() / vertex_chunk_size_; + } + } + + /** + * @brief Check if adding an edge is allowed. + * + * @param e The edge to add. + * @param validate_level The validate level for this operation. + * @return Status: ok or status::InvalidOperation error. + */ + Status validate(const Edge& e, ValidateLevel validate_level) const; + /** * @brief Construct an array for a given property. * diff --git a/cpp/include/gar/writer/vertices_builder.h b/cpp/include/gar/writer/vertices_builder.h index 97bf8d1f9..02ba9f312 100644 --- a/cpp/include/gar/writer/vertices_builder.h +++ b/cpp/include/gar/writer/vertices_builder.h @@ -129,7 +129,10 @@ class VerticesBuilder { * @param vertex_info The vertex info that describes the vertex type. * @param prefix The absolute prefix. * @param start_vertex_index The start index of the vertices collection. - * @param validate_level The validate level, with no validate by default. + * @param validate_level The global validate level for the writer, with no + * validate by default. It could be ValidateLevel::no_validate, + * ValidateLevel::weak_validate or ValidateLevel::strong_validate, but could + * not be ValidateLevel::default_validate. */ explicit VerticesBuilder( const VertexInfo& vertex_info, const std::string& prefix, @@ -139,6 +142,20 @@ class VerticesBuilder { prefix_(prefix), start_vertex_index_(start_vertex_index), validate_level_(validate_level) { + if (validate_level_ == ValidateLevel::default_validate) { + throw std::runtime_error( + "default_validate is not allowed to be set as the global validate " + "level for VerticesBuilder"); + } + vertices_.clear(); + num_vertices_ = 0; + is_saved_ = false; + } + + /** + * @brief Clear the vertices in this VerciesBuilder. + */ + inline void Clear() { vertices_.clear(); num_vertices_ = 0; is_saved_ = false; @@ -150,6 +167,9 @@ class VerticesBuilder { * @param validate_level The validate level to set. */ inline void SetValidateLevel(const ValidateLevel& validate_level) { + if (validate_level == ValidateLevel::default_validate) { + return; + } validate_level_ = validate_level; } @@ -160,29 +180,33 @@ class VerticesBuilder { */ inline ValidateLevel GetValidateLevel() const { return validate_level_; } - /** - * @brief Check if adding a vertex with the given index is allowed. - * - * @param v The vertex to add. - * @param index The given index, -1 means the next unused index. - * @param validate_level The validate level for this operation, - * which is the writer's validate level by default. - * @return Status: ok or Status::InvalidOperation error. - */ - Status Validate( - const Vertex& v, IdType index = -1, - ValidateLevel validate_level = ValidateLevel::default_validate) const; - /** * @brief Add a vertex with the given index. * + * The validate_level for this operation could be: + * + * ValidateLevel::default_validate: to use the validate_level of the builder, + * which set through the constructor or the SetValidateLevel method; + * + * ValidateLevel::no_validate: without validation; + * + * ValidateLevel::weak_validate: to validate if the start index and the vertex + * index is valid, and the data in builder is not saved; + * + * ValidateLevel::strong_validate: besides weak_validate, also validate the + * schema of the vertex is consistent with the info defined. + * * @param v The vertex to add. * @param index The given index, -1 means the next unused index. + * @param validate_level The validate level for this operation, + * which is the builder's validate level by default. * @return Status: ok or Status::InvalidOperation error. */ - Status AddVertex(Vertex& v, IdType index = -1) { // NOLINT + Status AddVertex( + Vertex& v, IdType index = -1, // NOLINT + ValidateLevel validate_level = ValidateLevel::default_validate) { // validate - GAR_RETURN_NOT_OK(Validate(v, index)); + GAR_RETURN_NOT_OK(validate(v, index, validate_level)); // add a vertex if (index == -1) { v.SetId(vertices_.size()); @@ -226,6 +250,17 @@ class VerticesBuilder { } private: + /** + * @brief Check if adding a vertex with the given index is allowed. + * + * @param v The vertex to add. + * @param index The given index, -1 means the next unused index. + * @param validate_level The validate level for this operation. + * @return Status: ok or Status::InvalidOperation error. + */ + Status validate(const Vertex& v, IdType index, + ValidateLevel validate_level) const; + /** * @brief Construct an array for a given property. * diff --git a/cpp/src/arrow_chunk_writer.cc b/cpp/src/arrow_chunk_writer.cc index 131830e7e..aa66916c2 100644 --- a/cpp/src/arrow_chunk_writer.cc +++ b/cpp/src/arrow_chunk_writer.cc @@ -90,7 +90,47 @@ Result> ExecutePlanAndCollectAsTable( // implementations for VertexPropertyChunkWriter -Status VertexPropertyWriter::Validate( +// Check if the opeartion of writing vertices number is allowed. +Status VertexPropertyWriter::validate(const IdType& count, + ValidateLevel validate_level) const + noexcept { + // use the writer's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + // weak & strong validate + if (count < 0) { + return Status::InvalidOperation( + "the number of vertices must be non-negative"); + } + return Status::OK(); +} + +// Check if the opeartion of copying a file as a chunk is allowed. +Status VertexPropertyWriter::validate(const PropertyGroup& property_group, + IdType chunk_index, + ValidateLevel validate_level) const + noexcept { + // use the writer's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + // weak & strong validate + if (!vertex_info_.ContainPropertyGroup(property_group)) { + return Status::InvalidOperation( + "the property group does not exist in the vertex info"); + } + if (chunk_index < 0) + return Status::InvalidOperation("invalid vertex chunk index"); + return Status::OK(); +} + +// Check if the opeartion of writing a table as a chunk is allowed. +Status VertexPropertyWriter::validate( const std::shared_ptr& input_table, const PropertyGroup& property_group, IdType chunk_index, ValidateLevel validate_level) const noexcept { @@ -100,24 +140,23 @@ Status VertexPropertyWriter::Validate( // no validate if (validate_level == ValidateLevel::no_validate) return Status::OK(); - // weak validate - if (input_table->num_rows() > vertex_info_.GetChunkSize()) + // validate property_group & chunk_index + GAR_RETURN_NOT_OK(validate(property_group, chunk_index, validate_level)); + // weak validate for the input_table + if (input_table->num_rows() > vertex_info_.GetChunkSize()) { return Status::OutOfRange( "the number of rows in the input table is larger than the vertex chunk " "size"); - if (!vertex_info_.ContainPropertyGroup(property_group)) - return Status::InvalidOperation( - "the property group does not exist in the vertex info"); - if (chunk_index < 0) - return Status::InvalidOperation("invalid vertex chunk index"); - // strong validate + } + // strong validate for the input_table if (validate_level == ValidateLevel::strong_validate) { auto schema = input_table->schema(); for (auto& property : property_group.GetProperties()) { int indice = schema->GetFieldIndex(property.name); - if (indice == -1) + if (indice == -1) { return Status::InvalidOperation("property: " + property.name + " not found"); + } auto field = schema->field(indice); if (DataType::ArrowDataTypeToDataType(field->type()) != property.type) { std::string err_msg = @@ -131,8 +170,9 @@ Status VertexPropertyWriter::Validate( return Status::OK(); } -Status VertexPropertyWriter::WriteVerticesNum(const IdType& count) const - noexcept { +Status VertexPropertyWriter::WriteVerticesNum( + const IdType& count, ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK(validate(count, validate_level)); GAR_ASSIGN_OR_RAISE(auto suffix, vertex_info_.GetVerticesNumFilePath()); std::string path = prefix_ + suffix; return fs_->WriteValueToFile(count, path); @@ -140,7 +180,10 @@ Status VertexPropertyWriter::WriteVerticesNum(const IdType& count) const Status VertexPropertyWriter::WriteChunk(const std::string& file_name, const PropertyGroup& property_group, - IdType chunk_index) const noexcept { + IdType chunk_index, + ValidateLevel validate_level) const + noexcept { + GAR_RETURN_NOT_OK(validate(property_group, chunk_index, validate_level)); GAR_ASSIGN_OR_RAISE(auto suffix, vertex_info_.GetFilePath(property_group, chunk_index)); std::string path = prefix_ + suffix; @@ -149,8 +192,10 @@ Status VertexPropertyWriter::WriteChunk(const std::string& file_name, Status VertexPropertyWriter::WriteChunk( const std::shared_ptr& input_table, - const PropertyGroup& property_group, IdType chunk_index) const noexcept { - GAR_RETURN_NOT_OK(Validate(input_table, property_group, chunk_index)); + const PropertyGroup& property_group, IdType chunk_index, + ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK( + validate(input_table, property_group, chunk_index, validate_level)); auto file_type = property_group.GetFileType(); std::vector indices; @@ -174,92 +219,137 @@ Status VertexPropertyWriter::WriteChunk( } Status VertexPropertyWriter::WriteChunk( - const std::shared_ptr& input_table, IdType chunk_index) const - noexcept { + const std::shared_ptr& input_table, IdType chunk_index, + ValidateLevel validate_level) const noexcept { auto property_groups = vertex_info_.GetPropertyGroups(); for (auto& property_group : property_groups) { - GAR_RETURN_NOT_OK(WriteChunk(input_table, property_group, chunk_index)); + GAR_RETURN_NOT_OK( + WriteChunk(input_table, property_group, chunk_index, validate_level)); } return Status::OK(); } Status VertexPropertyWriter::WriteTable( const std::shared_ptr& input_table, - const PropertyGroup& property_group, IdType start_chunk_index) const - noexcept { + const PropertyGroup& property_group, IdType start_chunk_index, + ValidateLevel validate_level) const noexcept { IdType chunk_size = vertex_info_.GetChunkSize(); int64_t length = input_table->num_rows(); IdType chunk_index = start_chunk_index; for (int64_t offset = 0; offset < length; offset += chunk_size, chunk_index++) { auto in_chunk = input_table->Slice(offset, chunk_size); - GAR_RETURN_NOT_OK(WriteChunk(in_chunk, property_group, chunk_index)); + GAR_RETURN_NOT_OK( + WriteChunk(in_chunk, property_group, chunk_index, validate_level)); } return Status::OK(); } Status VertexPropertyWriter::WriteTable( - const std::shared_ptr& input_table, - IdType start_chunk_index) const noexcept { + const std::shared_ptr& input_table, IdType start_chunk_index, + ValidateLevel validate_level) const noexcept { auto property_groups = vertex_info_.GetPropertyGroups(); for (auto& property_group : property_groups) { - GAR_RETURN_NOT_OK( - WriteTable(input_table, property_group, start_chunk_index)); + GAR_RETURN_NOT_OK(WriteTable(input_table, property_group, start_chunk_index, + validate_level)); } return Status::OK(); } // implementations for EdgeChunkWriter -Status EdgeChunkWriter::Validate( - const std::shared_ptr& input_table, IdType vertex_chunk_index, - ValidateLevel validate_level) const noexcept { +// Check if the operation of writing number or copying a file is allowed. +Status EdgeChunkWriter::validate(IdType count_or_index1, IdType count_or_index2, + ValidateLevel validate_level) const noexcept { // use the writer's validate level if (validate_level == ValidateLevel::default_validate) validate_level = validate_level_; // no validate if (validate_level == ValidateLevel::no_validate) return Status::OK(); - // weak validate - if (!edge_info_.ContainAdjList(adj_list_type_)) + // weak & strong validate for adj list type + if (!edge_info_.ContainAdjList(adj_list_type_)) { return Status::InvalidOperation( "the adj list type " + std::string(AdjListTypeToString(adj_list_type_)) + " does not exist in the edge info"); + } + // weak & strong validate for count or index + if (count_or_index1 < 0 || count_or_index2 < 0) + return Status::InvalidOperation("the count or index must be non-negative"); + return Status::OK(); +} + +// Check if the operation of copying a file as a property chunk is allowed. +Status EdgeChunkWriter::validate(const PropertyGroup& property_group, + IdType vertex_chunk_index, IdType chunk_index, + ValidateLevel validate_level) const noexcept { + // use the writer's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + // validate for adj list type & index + GAR_RETURN_NOT_OK(validate(vertex_chunk_index, chunk_index, validate_level)); + // weak & strong validate for property group + if (!edge_info_.ContainPropertyGroup(property_group, adj_list_type_)) { + return Status::InvalidOperation( + "the property group does not exist in the edge info"); + } + return Status::OK(); +} + +// Check if the operation of writing a table as an offset chunk is allowed. +Status EdgeChunkWriter::validate( + const std::shared_ptr& input_table, IdType vertex_chunk_index, + ValidateLevel validate_level) const noexcept { + // use the writer's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + // validate for adj list type & index + GAR_RETURN_NOT_OK(validate(vertex_chunk_index, 0, validate_level)); + // weak validate for the input table if (adj_list_type_ != AdjListType::ordered_by_source && - adj_list_type_ != AdjListType::ordered_by_dest) + adj_list_type_ != AdjListType::ordered_by_dest) { return Status::InvalidOperation( "the adj list type has to be ordered_by_source or ordered_by_dest, but " "got " + std::string(AdjListTypeToString(adj_list_type_))); + } if (adj_list_type_ == AdjListType::ordered_by_source && - input_table->num_rows() > edge_info_.GetSrcChunkSize() + 1) + input_table->num_rows() > edge_info_.GetSrcChunkSize() + 1) { return Status::OutOfRange( "the number of rows in the input table is larger than the offset table " "size for a vertex chunk"); + } if (adj_list_type_ == AdjListType::ordered_by_dest && - input_table->num_rows() > edge_info_.GetDstChunkSize() + 1) + input_table->num_rows() > edge_info_.GetDstChunkSize() + 1) { return Status::OutOfRange( "the number of rows in the input table is larger than the offset table " "size for a vertex chunk"); - if (vertex_chunk_index < 0) - return Status::InvalidOperation("invalid vertex chunk index"); - // strong validate + } + // strong validate for the input_table if (validate_level == ValidateLevel::strong_validate) { auto schema = input_table->schema(); int index = schema->GetFieldIndex(GeneralParams::kOffsetCol); if (index == -1) return Status::InvalidOperation("the offset column is not provided"); auto field = schema->field(index); - if (field->type()->id() != arrow::Type::INT64) + if (field->type()->id() != arrow::Type::INT64) { return Status::TypeError( "the data type for offset column should be INT64, but got " + field->type()->name()); + } } return Status::OK(); } -Status EdgeChunkWriter::Validate( +// Check if the operation of writing a table as an adj list chunk is allowed. +Status EdgeChunkWriter::validate( const std::shared_ptr& input_table, IdType vertex_chunk_index, IdType chunk_index, ValidateLevel validate_level) const noexcept { // use the writer's validate level @@ -268,44 +358,41 @@ Status EdgeChunkWriter::Validate( // no validate if (validate_level == ValidateLevel::no_validate) return Status::OK(); - // weak validate - if (!edge_info_.ContainAdjList(adj_list_type_)) - return Status::InvalidOperation( - "the adj list type " + - std::string(AdjListTypeToString(adj_list_type_)) + - " does not exist in the edge info"); - if (input_table->num_rows() > edge_info_.GetChunkSize()) + // validate for adj list type & index + GAR_RETURN_NOT_OK(validate(vertex_chunk_index, chunk_index, validate_level)); + // weak validate for the input table + if (input_table->num_rows() > edge_info_.GetChunkSize()) { return Status::OutOfRange( "the number of rows in the input table is larger than the edge chunk " "size"); - if (vertex_chunk_index < 0) - return Status::InvalidOperation("invalid vertex chunk index"); - if (chunk_index < 0) - return Status::InvalidOperation("invalid edge chunk index"); - // stong validate + } + // stong validate for the input table if (validate_level == ValidateLevel::strong_validate) { auto schema = input_table->schema(); int index = schema->GetFieldIndex(GeneralParams::kSrcIndexCol); if (index == -1) return Status::InvalidOperation("the source column is not provided"); auto field = schema->field(index); - if (field->type()->id() != arrow::Type::INT64) + if (field->type()->id() != arrow::Type::INT64) { return Status::TypeError( "the data type for source column should be INT64, but got " + field->type()->name()); + } index = schema->GetFieldIndex(GeneralParams::kDstIndexCol); if (index == -1) return Status::InvalidOperation("the destination column is not provided"); field = schema->field(index); - if (field->type()->id() != arrow::Type::INT64) + if (field->type()->id() != arrow::Type::INT64) { return Status::TypeError( - "the data type for destination column should be INT64, but got " + + "the data type for destination column should be INT64, but got " + field->type()->name()); + } } return Status::OK(); } -Status EdgeChunkWriter::Validate( +// Check if the operation of writing a table as a property chunk is allowed. +Status EdgeChunkWriter::validate( const std::shared_ptr& input_table, const PropertyGroup& property_group, IdType vertex_chunk_index, IdType chunk_index, ValidateLevel validate_level) const noexcept { @@ -315,22 +402,13 @@ Status EdgeChunkWriter::Validate( // no validate if (validate_level == ValidateLevel::no_validate) return Status::OK(); - // weak validate - if (!edge_info_.ContainPropertyGroup(property_group, adj_list_type_)) - return Status::InvalidOperation( - "the property group does not exist in the edge info"); - if (!edge_info_.ContainAdjList(adj_list_type_)) - return Status::InvalidOperation( - "the adj list type " + - std::string(AdjListTypeToString(adj_list_type_)) + - " does not exist in the edge info"); + // validate for property group, adj list type & index + GAR_RETURN_NOT_OK(validate(property_group, vertex_chunk_index, chunk_index, + validate_level)); + // weak validate for the input table if (input_table->num_rows() > edge_info_.GetChunkSize()) return Status::OutOfRange(); - if (vertex_chunk_index < 0) - return Status::InvalidOperation("invalid vertex chunk index"); - if (chunk_index < 0) - return Status::InvalidOperation("invalid edge chunk index"); - // strong validate + // strong validate for the input table if (validate_level == ValidateLevel::strong_validate) { auto schema = input_table->schema(); for (auto& property : property_group.GetProperties()) { @@ -352,14 +430,20 @@ Status EdgeChunkWriter::Validate( } Status EdgeChunkWriter::WriteEdgesNum(IdType vertex_chunk_index, - const IdType& count) const noexcept { + const IdType& count, + ValidateLevel validate_level) const + noexcept { + GAR_RETURN_NOT_OK(validate(vertex_chunk_index, count, validate_level)); GAR_ASSIGN_OR_RAISE(auto suffix, edge_info_.GetEdgesNumFilePath( vertex_chunk_index, adj_list_type_)); std::string path = prefix_ + suffix; return fs_->WriteValueToFile(count, path); } -Status EdgeChunkWriter::WriteVerticesNum(const IdType& count) const noexcept { +Status EdgeChunkWriter::WriteVerticesNum(const IdType& count, + ValidateLevel validate_level) const + noexcept { + GAR_RETURN_NOT_OK(validate(0, count, validate_level)); GAR_ASSIGN_OR_RAISE(auto suffix, edge_info_.GetVerticesNumFilePath(adj_list_type_)); std::string path = prefix_ + suffix; @@ -367,8 +451,10 @@ Status EdgeChunkWriter::WriteVerticesNum(const IdType& count) const noexcept { } Status EdgeChunkWriter::WriteOffsetChunk(const std::string& file_name, - IdType vertex_chunk_index) const + IdType vertex_chunk_index, + ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK(validate(vertex_chunk_index, 0, validate_level)); GAR_ASSIGN_OR_RAISE(auto suffix, edge_info_.GetAdjListOffsetFilePath( vertex_chunk_index, adj_list_type_)); std::string path = prefix_ + suffix; @@ -377,7 +463,10 @@ Status EdgeChunkWriter::WriteOffsetChunk(const std::string& file_name, Status EdgeChunkWriter::WriteAdjListChunk(const std::string& file_name, IdType vertex_chunk_index, - IdType chunk_index) const noexcept { + IdType chunk_index, + ValidateLevel validate_level) const + noexcept { + GAR_RETURN_NOT_OK(validate(vertex_chunk_index, chunk_index, validate_level)); GAR_ASSIGN_OR_RAISE( auto suffix, edge_info_.GetAdjListFilePath(vertex_chunk_index, chunk_index, adj_list_type_)); @@ -388,7 +477,11 @@ Status EdgeChunkWriter::WriteAdjListChunk(const std::string& file_name, Status EdgeChunkWriter::WritePropertyChunk(const std::string& file_name, const PropertyGroup& property_group, IdType vertex_chunk_index, - IdType chunk_index) const noexcept { + IdType chunk_index, + ValidateLevel validate_level) const + noexcept { + GAR_RETURN_NOT_OK(validate(property_group, vertex_chunk_index, chunk_index, + validate_level)); GAR_ASSIGN_OR_RAISE(auto suffix, edge_info_.GetPropertyFilePath( property_group, adj_list_type_, vertex_chunk_index, chunk_index)); @@ -397,9 +490,9 @@ Status EdgeChunkWriter::WritePropertyChunk(const std::string& file_name, } Status EdgeChunkWriter::WriteOffsetChunk( - const std::shared_ptr& input_table, - IdType vertex_chunk_index) const noexcept { - GAR_RETURN_NOT_OK(Validate(input_table, vertex_chunk_index)); + const std::shared_ptr& input_table, IdType vertex_chunk_index, + ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK(validate(input_table, vertex_chunk_index, validate_level)); GAR_ASSIGN_OR_RAISE(auto file_type, edge_info_.GetFileType(adj_list_type_)); auto schema = input_table->schema(); int index = schema->GetFieldIndex(GeneralParams::kOffsetCol); @@ -414,8 +507,9 @@ Status EdgeChunkWriter::WriteOffsetChunk( Status EdgeChunkWriter::WriteAdjListChunk( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType chunk_index) const noexcept { - GAR_RETURN_NOT_OK(Validate(input_table, vertex_chunk_index, chunk_index)); + IdType chunk_index, ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK( + validate(input_table, vertex_chunk_index, chunk_index, validate_level)); GAR_ASSIGN_OR_RAISE(auto file_type, edge_info_.GetFileType(adj_list_type_)); std::vector indices; indices.clear(); @@ -440,9 +534,9 @@ Status EdgeChunkWriter::WriteAdjListChunk( Status EdgeChunkWriter::WritePropertyChunk( const std::shared_ptr& input_table, const PropertyGroup& property_group, IdType vertex_chunk_index, - IdType chunk_index) const noexcept { - GAR_RETURN_NOT_OK( - Validate(input_table, property_group, vertex_chunk_index, chunk_index)); + IdType chunk_index, ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK(validate(input_table, property_group, vertex_chunk_index, + chunk_index, validate_level)); auto file_type = property_group.GetFileType(); std::vector indices; @@ -467,34 +561,36 @@ Status EdgeChunkWriter::WritePropertyChunk( Status EdgeChunkWriter::WritePropertyChunk( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType chunk_index) const noexcept { + IdType chunk_index, ValidateLevel validate_level) const noexcept { GAR_ASSIGN_OR_RAISE(auto& property_groups, edge_info_.GetPropertyGroups(adj_list_type_)); for (auto& property_group : property_groups) { GAR_RETURN_NOT_OK(WritePropertyChunk(input_table, property_group, - vertex_chunk_index, chunk_index)); + vertex_chunk_index, chunk_index, + validate_level)); } return Status::OK(); } Status EdgeChunkWriter::WriteChunk( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType chunk_index) const noexcept { - GAR_RETURN_NOT_OK( - WriteAdjListChunk(input_table, vertex_chunk_index, chunk_index)); - return WritePropertyChunk(input_table, vertex_chunk_index, chunk_index); + IdType chunk_index, ValidateLevel validate_level) const noexcept { + GAR_RETURN_NOT_OK(WriteAdjListChunk(input_table, vertex_chunk_index, + chunk_index, validate_level)); + return WritePropertyChunk(input_table, vertex_chunk_index, chunk_index, + validate_level); } Status EdgeChunkWriter::WriteAdjListTable( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { int64_t length = input_table->num_rows(); IdType chunk_index = start_chunk_index; for (int64_t offset = 0; offset < length; offset += chunk_size_, chunk_index++) { auto in_chunk = input_table->Slice(offset, chunk_size_); - GAR_RETURN_NOT_OK( - WriteAdjListChunk(in_chunk, vertex_chunk_index, chunk_index)); + GAR_RETURN_NOT_OK(WriteAdjListChunk(in_chunk, vertex_chunk_index, + chunk_index, validate_level)); } return Status::OK(); } @@ -502,48 +598,50 @@ Status EdgeChunkWriter::WriteAdjListTable( Status EdgeChunkWriter::WritePropertyTable( const std::shared_ptr& input_table, const PropertyGroup& property_group, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { int64_t length = input_table->num_rows(); IdType chunk_index = start_chunk_index; for (int64_t offset = 0; offset < length; offset += chunk_size_, chunk_index++) { auto in_chunk = input_table->Slice(offset, chunk_size_); GAR_RETURN_NOT_OK(WritePropertyChunk(in_chunk, property_group, - vertex_chunk_index, chunk_index)); + vertex_chunk_index, chunk_index, + validate_level)); } return Status::OK(); } Status EdgeChunkWriter::WritePropertyTable( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { int64_t length = input_table->num_rows(); IdType chunk_index = start_chunk_index; for (int64_t offset = 0; offset < length; offset += chunk_size_, chunk_index++) { auto in_chunk = input_table->Slice(offset, chunk_size_); - GAR_RETURN_NOT_OK( - WritePropertyChunk(in_chunk, vertex_chunk_index, chunk_index)); + GAR_RETURN_NOT_OK(WritePropertyChunk(in_chunk, vertex_chunk_index, + chunk_index, validate_level)); } return Status::OK(); } Status EdgeChunkWriter::WriteTable( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { int64_t length = input_table->num_rows(); IdType chunk_index = start_chunk_index; for (int64_t offset = 0; offset < length; offset += chunk_size_, chunk_index++) { auto in_chunk = input_table->Slice(offset, chunk_size_); - GAR_RETURN_NOT_OK(WriteChunk(in_chunk, vertex_chunk_index, chunk_index)); + GAR_RETURN_NOT_OK( + WriteChunk(in_chunk, vertex_chunk_index, chunk_index, validate_level)); } return Status::OK(); } Status EdgeChunkWriter::SortAndWriteAdjListTable( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { GAR_ASSIGN_OR_RAISE( auto response_table, sortTable(input_table, getSortColumnName(adj_list_type_))); @@ -553,36 +651,37 @@ Status EdgeChunkWriter::SortAndWriteAdjListTable( auto offset_table, getOffsetTable(response_table, getSortColumnName(adj_list_type_), vertex_chunk_index)); - GAR_RETURN_NOT_OK(WriteOffsetChunk(offset_table, vertex_chunk_index)); + GAR_RETURN_NOT_OK( + WriteOffsetChunk(offset_table, vertex_chunk_index, validate_level)); } return WriteAdjListTable(response_table, vertex_chunk_index, - start_chunk_index); + start_chunk_index, validate_level); } Status EdgeChunkWriter::SortAndWritePropertyTable( const std::shared_ptr& input_table, const PropertyGroup& property_group, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { GAR_ASSIGN_OR_RAISE( auto response_table, sortTable(input_table, getSortColumnName(adj_list_type_))); return WritePropertyTable(response_table, property_group, vertex_chunk_index, - start_chunk_index); + start_chunk_index, validate_level); } Status EdgeChunkWriter::SortAndWritePropertyTable( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { GAR_ASSIGN_OR_RAISE( auto response_table, sortTable(input_table, getSortColumnName(adj_list_type_))); return WritePropertyTable(response_table, vertex_chunk_index, - start_chunk_index); + start_chunk_index, validate_level); } Status EdgeChunkWriter::SortAndWriteTable( const std::shared_ptr& input_table, IdType vertex_chunk_index, - IdType start_chunk_index) const noexcept { + IdType start_chunk_index, ValidateLevel validate_level) const noexcept { GAR_ASSIGN_OR_RAISE( auto response_table, sortTable(input_table, getSortColumnName(adj_list_type_))); @@ -593,10 +692,12 @@ Status EdgeChunkWriter::SortAndWriteTable( auto offset_table, getOffsetTable(response_table, getSortColumnName(adj_list_type_), vertex_chunk_index)); - GAR_RETURN_NOT_OK(WriteOffsetChunk(offset_table, vertex_chunk_index)); + GAR_RETURN_NOT_OK( + WriteOffsetChunk(offset_table, vertex_chunk_index, validate_level)); } - return WriteTable(response_table, vertex_chunk_index, start_chunk_index); + return WriteTable(response_table, vertex_chunk_index, start_chunk_index, + validate_level); } Result> EdgeChunkWriter::getOffsetTable( diff --git a/cpp/src/edges_builder.cc b/cpp/src/edges_builder.cc index d5eff84f7..db4e22daf 100644 --- a/cpp/src/edges_builder.cc +++ b/cpp/src/edges_builder.cc @@ -20,7 +20,7 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { namespace builder { -Status EdgesBuilder::Validate(const Edge& e, +Status EdgesBuilder::validate(const Edge& e, ValidateLevel validate_level) const { // use the builder's validate level if (validate_level == ValidateLevel::default_validate) @@ -46,10 +46,11 @@ Status EdgesBuilder::Validate(const Edge& e, if (validate_level == ValidateLevel::strong_validate) { for (auto& property : e.GetProperties()) { // check if the property is contained - if (!edge_info_.ContainProperty(property.first)) + if (!edge_info_.ContainProperty(property.first)) { return Status::InvalidOperation( "invalid property name: " + property.first + ", which is not contained in the vertex info"); + } // check if the property type is correct auto type = edge_info_.GetPropertyType(property.first).value(); bool invalid_type = false; @@ -223,10 +224,11 @@ Result> EdgesBuilder::getOffsetTable( int64_t x = (adj_list_type_ == AdjListType::ordered_by_source ? edges[index].GetSource() : edges[index].GetDestination()); - if (x <= i) + if (x <= i) { index++; - else + } else { break; + } } RETURN_NOT_ARROW_OK(builder.Append(index)); } diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index 74408116b..c4650fcc1 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -19,7 +19,7 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { namespace builder { -Status VerticesBuilder::Validate(const Vertex& v, IdType index, +Status VerticesBuilder::validate(const Vertex& v, IdType index, ValidateLevel validate_level) const { // use the builder's validate level if (validate_level == ValidateLevel::default_validate) @@ -40,18 +40,20 @@ Status VerticesBuilder::Validate(const Vertex& v, IdType index, "with the chunk size"); } // the vertex index must larger than start index - if (index != -1 && index < start_vertex_index_) + if (index != -1 && index < start_vertex_index_) { return Status::InvalidOperation( "the vertex index must be larger than start index"); + } // strong validate if (validate_level == ValidateLevel::strong_validate) { for (auto& property : v.GetProperties()) { // check if the property is contained - if (!vertex_info_.ContainProperty(property.first)) + if (!vertex_info_.ContainProperty(property.first)) { return Status::InvalidOperation( "invalid property name: " + property.first + ", which is not contained in the vertex info"); + } // check if the property type is correct auto type = vertex_info_.GetPropertyType(property.first).value(); bool invalid_type = false; diff --git a/cpp/test/test_arrow_chunk_writer.cc b/cpp/test/test_arrow_chunk_writer.cc index 32d4e8d81..31bb6946f 100644 --- a/cpp/test/test_arrow_chunk_writer.cc +++ b/cpp/test/test_arrow_chunk_writer.cc @@ -63,29 +63,36 @@ TEST_CASE("test_vertex_property_writer_from_file") { std::shared_ptr table = *maybe_table; std::cout << table->num_rows() << ' ' << table->num_columns() << std::endl; + // Construct the writer std::string vertex_meta_file = root + "/ldbc_sample/parquet/" + "person.vertex.yml"; auto vertex_meta = GAR_NAMESPACE::Yaml::LoadFile(vertex_meta_file).value(); auto vertex_info = GAR_NAMESPACE::VertexInfo::Load(vertex_meta).value(); REQUIRE(vertex_info.GetLabel() == "person"); GAR_NAMESPACE::VertexPropertyWriter writer(vertex_info, "/tmp/"); - REQUIRE(writer.WriteTable(table, 0).ok()); - REQUIRE(writer.WriteVerticesNum(table->num_rows()).ok()); - input = fs->OpenInputStream("/tmp/vertex/person/vertex_count").ValueOrDie(); - auto num = input->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); - GAR_NAMESPACE::IdType* ptr = (GAR_NAMESPACE::IdType*) num->data(); - REQUIRE((*ptr) == table->num_rows()); - - // Set validate level + // Get & set validate level REQUIRE(writer.GetValidateLevel() == GAR_NAMESPACE::ValidateLevel::no_validate); writer.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); REQUIRE(writer.GetValidateLevel() == GAR_NAMESPACE::ValidateLevel::strong_validate); - // Validate operation + + // Valid cases + // Write the table REQUIRE(writer.WriteTable(table, 0).ok()); + // Write the number of vertices + REQUIRE(writer.WriteVerticesNum(table->num_rows()).ok()); + // Check vertex count + input = fs->OpenInputStream("/tmp/vertex/person/vertex_count").ValueOrDie(); + auto num = input->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); + GAR_NAMESPACE::IdType* ptr = (GAR_NAMESPACE::IdType*) num->data(); + REQUIRE((*ptr) == table->num_rows()); + + // Invalid cases + // Invalid vertices number + REQUIRE(writer.WriteVerticesNum(-1).IsInvalidOperation()); // Out of range REQUIRE(writer.WriteChunk(table, 0).IsOutOfRange()); // Invalid chunk id @@ -178,17 +185,30 @@ TEST_CASE("test_edge_chunk_writer") { std::cout << table->schema()->ToString() << std::endl; std::cout << table->num_rows() << ' ' << table->num_columns() << std::endl; - // Write edges of vertex chunk 0 to files + // Construct the writer std::string edge_meta_file = root + "/ldbc_sample/csv/" + "person_knows_person.edge.yml"; auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value(); auto edge_info = GAR_NAMESPACE::EdgeInfo::Load(edge_meta).value(); auto adj_list_type = GAR_NAMESPACE::AdjListType::ordered_by_source; GAR_NAMESPACE::EdgeChunkWriter writer(edge_info, "/tmp/", adj_list_type); - REQUIRE(writer.SortAndWriteAdjListTable(table, 0, 0).ok()); + // Get & set validate level + REQUIRE(writer.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::no_validate); + writer.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + REQUIRE(writer.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::strong_validate); + + // Valid cases + // Write adj list of vertex chunk 0 to files + REQUIRE(writer.SortAndWriteAdjListTable(table, 0, 0).ok()); // Write number of edges for vertex chunk 0 REQUIRE(writer.WriteEdgesNum(0, table->num_rows()).ok()); + // Write number of vertices + REQUIRE(writer.WriteVerticesNum(903).ok()); + + // Check the number of edges std::shared_ptr input2 = fs->OpenInputStream( "/tmp/edge/person_knows_person/ordered_by_source/edge_count0") @@ -198,8 +218,7 @@ TEST_CASE("test_edge_chunk_writer") { (GAR_NAMESPACE::IdType*) edge_num->data(); REQUIRE((*edge_num_ptr) == table->num_rows()); - // Write number of vertices - REQUIRE(writer.WriteVerticesNum(903).ok()); + // Check the number of vertices std::shared_ptr input3 = fs->OpenInputStream( "/tmp/edge/person_knows_person/ordered_by_source/vertex_count") @@ -209,15 +228,11 @@ TEST_CASE("test_edge_chunk_writer") { (GAR_NAMESPACE::IdType*) vertex_num->data(); REQUIRE((*vertex_num_ptr) == 903); - // Set validate level - REQUIRE(writer.GetValidateLevel() == - GAR_NAMESPACE::ValidateLevel::no_validate); - writer.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); - REQUIRE(writer.GetValidateLevel() == - GAR_NAMESPACE::ValidateLevel::strong_validate); - // Validate operation - REQUIRE(writer.SortAndWriteAdjListTable(table, 0, 0).ok()); - + // Invalid cases + // Invalid count or index + REQUIRE(writer.WriteEdgesNum(-1, 0).IsInvalidOperation()); + REQUIRE(writer.WriteEdgesNum(0, -1).IsInvalidOperation()); + REQUIRE(writer.WriteVerticesNum(-1).IsInvalidOperation()); // Out of range REQUIRE(writer.WriteOffsetChunk(table, 0).IsOutOfRange()); // Invalid chunk id diff --git a/cpp/test/test_builder.cc b/cpp/test/test_builder.cc index b61973be2..f0d5920bb 100644 --- a/cpp/test/test_builder.cc +++ b/cpp/test/test_builder.cc @@ -51,7 +51,7 @@ TEST_CASE("test_vertices_builder") { GAR_NAMESPACE::builder::VerticesBuilder builder(vertex_info, "/tmp/", start_index); - // set validate level + // get & set validate level REQUIRE(builder.GetValidateLevel() == GAR_NAMESPACE::ValidateLevel::no_validate); builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); @@ -62,15 +62,19 @@ TEST_CASE("test_vertices_builder") { GAR_NAMESPACE::builder::Vertex v; v.AddProperty("id", "id_of_string"); REQUIRE( - builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); - REQUIRE( - builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); - REQUIRE(builder.Validate(v, -2, GAR_NAMESPACE::ValidateLevel::weak_validate) + builder.AddVertex(v, 0, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE(builder.AddVertex(v, 0, GAR_NAMESPACE::ValidateLevel::weak_validate) + .ok()); + REQUIRE(builder.AddVertex(v, -2, GAR_NAMESPACE::ValidateLevel::weak_validate) .IsInvalidOperation()); - REQUIRE(builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::strong_validate) + REQUIRE(builder.AddVertex(v, 0, GAR_NAMESPACE::ValidateLevel::strong_validate) .IsTypeError()); v.AddProperty("invalid_name", "invalid_value"); - REQUIRE(builder.Validate(v, 0).IsInvalidOperation()); + REQUIRE(builder.AddVertex(v, 0).IsInvalidOperation()); + + // clear vertices + builder.Clear(); + REQUIRE(builder.GetNum() == 0); // add vertices std::ifstream fp(root + "/ldbc_sample/person_0_0.csv"); @@ -84,7 +88,10 @@ TEST_CASE("test_vertices_builder") { getline(readstr, name, '|'); names.push_back(name); } + + int lines = 0; while (getline(fp, line)) { + lines++; std::string val; std::istringstream readstr(line); GAR_NAMESPACE::builder::Vertex v; @@ -102,13 +109,16 @@ TEST_CASE("test_vertices_builder") { REQUIRE(builder.AddVertex(v).ok()); } - // dump + // check the number of vertices in builder + REQUIRE(builder.GetNum() == lines); + + // dump to files REQUIRE(builder.Dump().ok()); // can not add new vertices after dumping REQUIRE(builder.AddVertex(v).IsInvalidOperation()); - // check the number of vertices + // check the number of vertices dumped auto fs = arrow::fs::FileSystemFromUriOrPath(root).ValueOrDie(); auto input = fs->OpenInputStream("/tmp/vertex/person/vertex_count").ValueOrDie(); @@ -127,10 +137,12 @@ TEST_CASE("test_edges_builder") { root + "/ldbc_sample/parquet/" + "person_knows_person.edge.yml"; auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value(); auto edge_info = GAR_NAMESPACE::EdgeInfo::Load(edge_meta).value(); + auto vertices_num = 903; GAR_NAMESPACE::builder::EdgesBuilder builder( - edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest, 903); + edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest, + vertices_num); - // set validate level + // get & set validate level REQUIRE(builder.GetValidateLevel() == GAR_NAMESPACE::ValidateLevel::no_validate); builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); @@ -140,13 +152,16 @@ TEST_CASE("test_edges_builder") { // check different validate levels GAR_NAMESPACE::builder::Edge e(0, 1); e.AddProperty("creationDate", 2020); - REQUIRE(builder.Validate(e, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); - REQUIRE( - builder.Validate(e, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); - REQUIRE(builder.Validate(e, GAR_NAMESPACE::ValidateLevel::strong_validate) + REQUIRE(builder.AddEdge(e, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE(builder.AddEdge(e, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); + REQUIRE(builder.AddEdge(e, GAR_NAMESPACE::ValidateLevel::strong_validate) .IsTypeError()); e.AddProperty("invalid_name", "invalid_value"); - REQUIRE(builder.Validate(e).IsInvalidOperation()); + REQUIRE(builder.AddEdge(e).IsInvalidOperation()); + + // clear edges + builder.Clear(); + REQUIRE(builder.GetNum() == 0); // add edges std::ifstream fp(root + "/ldbc_sample/person_knows_person_0_0.csv"); @@ -155,8 +170,10 @@ TEST_CASE("test_edges_builder") { std::vector names; std::istringstream readstr(line); std::map mapping; - int64_t cnt = 0; + int64_t cnt = 0, lines = 0; + while (getline(fp, line)) { + lines++; std::string val; std::istringstream readstr(line); int64_t s, d; @@ -178,9 +195,22 @@ TEST_CASE("test_edges_builder") { } } - // dump + // check the number of edges in builder + REQUIRE(builder.GetNum() == lines); + + // dump to files REQUIRE(builder.Dump().ok()); // can not add new edges after dumping REQUIRE(builder.AddEdge(e).IsInvalidOperation()); + + // check the number of vertices dumped + auto fs = arrow::fs::FileSystemFromUriOrPath(root).ValueOrDie(); + auto input = + fs->OpenInputStream( + "/tmp/edge/person_knows_person/ordered_by_dest/vertex_count") + .ValueOrDie(); + auto num = input->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); + GAR_NAMESPACE::IdType* ptr = (GAR_NAMESPACE::IdType*) num->data(); + REQUIRE((*ptr) == vertices_num); } diff --git a/docs/reference/api-reference-cpp.rst b/docs/reference/api-reference-cpp.rst index 10e3465a6..8066772c8 100644 --- a/docs/reference/api-reference-cpp.rst +++ b/docs/reference/api-reference-cpp.rst @@ -181,6 +181,10 @@ Adj List Type ~~~~~~~~~~~~~~~~~~~ .. doxygenenum:: GraphArchive::AdjListType +Validate Level +~~~~~~~~~~~~~~~~~~~ +.. doxygenenum:: GraphArchive::ValidateLevel + Utilities ---------