diff --git a/docs/_toc.yml b/docs/_toc.yml index afec7a9..2e905a2 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -14,6 +14,8 @@ parts: title: Clean-Clean ER for Devs - file: tutorials/CleanCleanERwithoutGT.ipynb title: Clean-Clean ER without GT + - file: tutorials/Reproducibility.ipynb + title: Academic reproducibility - file: tutorials/DirtyER.ipynb title: Dirty ER for Devs - file: tutorials/Optuna.ipynb diff --git a/docs/pyjedai/block_building.py b/docs/pyjedai/block_building.py index 0d73c2c..cc1e2df 100644 --- a/docs/pyjedai/block_building.py +++ b/docs/pyjedai/block_building.py @@ -236,38 +236,42 @@ def stats(self, blocks: dict, verbose: bool = True) -> dict: 'skewness_of_comparison_per_entity': self.skewness_of_comparison_per_entity } - def export_to_df( - self, - blocks: dict - ) -> pd.DataFrame: - """creates a dataframe for the evaluation report + def export_to_df(self, blocks: dict, tqdm_enable:bool = False) -> pd.DataFrame: + """Creates a dataframe for the evaluation report. Args: - blocks (any): Predicted blocks - data (Data): initial dataset + blocks (dict): Predicted blocks. Returns: - pd.DataFrame: Dataframe predicted pairs (can be exported to csv) + pd.DataFrame: Dataframe with the predicted pairs (can be exported to CSV). """ - pairs_df = pd.DataFrame(columns=['id1', 'id2']) - for _, block in blocks.items(): - if self.data.is_dirty_er: + pairs_list = [] + + is_dirty_er = self.data.is_dirty_er + gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1 + gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2 + + for block in tqdm(blocks.values(), desc="Exporting to DataFrame", disable=not tqdm_enable): + if is_dirty_er: lblock = list(block.entities_D1) - for i1 in range(0, len(lblock)): - for i2 in range(i1+1, len(lblock)): - id1 = self.data._gt_to_ids_reversed_1[lblock[i1]] - id2 = self.data._gt_to_ids_reversed_1[lblock[i2]] if self.data.is_dirty_er \ - else self.data._gt_to_ids_reversed_2[lblock[i2]] - pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True) + + for i1 in range(len(lblock)): + for i2 in range(i1 + 1, len(lblock)): + id1 = gt_to_ids_reversed_1[lblock[i1]] + id2 = gt_to_ids_reversed_1[lblock[i2]] + pairs_list.append((id1, id2)) else: for i1 in block.entities_D1: for i2 in block.entities_D2: - id1 = self.data._gt_to_ids_reversed_1[i1] - id2 = self.data._gt_to_ids_reversed_1[i2] if self.data.is_dirty_er \ - else self.data._gt_to_ids_reversed_2[i2] - pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True) + id1 = gt_to_ids_reversed_1[i1] + id2 = gt_to_ids_reversed_2[i2] + pairs_list.append((id1, id2)) + + pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2']) + return pairs_df + class AbstractBlockBuilding(AbstractBlockProcessing): """Abstract class for the block building method """ diff --git a/docs/pyjedai/clustering.py b/docs/pyjedai/clustering.py index 9f645e7..65560d0 100644 --- a/docs/pyjedai/clustering.py +++ b/docs/pyjedai/clustering.py @@ -376,31 +376,45 @@ def stats(self) -> None: def _configuration(self) -> dict: pass - def export_to_df(self, prediction: list) -> pd.DataFrame: - """creates a dataframe for the evaluation report + import pandas as pd + + def export_to_df(self, prediction: list, tqdm_enable:bool = False) -> pd.DataFrame: + """Creates a dataframe for the evaluation report. Args: - prediction (any): Predicted clusters + prediction (list): Predicted clusters. Returns: - pd.DataFrame: Dataframe containg evaluation scores and stats + pd.DataFrame: Dataframe containing evaluation scores and stats. """ - pairs_df = pd.DataFrame(columns=['id1', 'id2']) - for cluster in prediction: + pairs_list = [] + + dataset_limit = self.data.dataset_limit + is_dirty_er = self.data.is_dirty_er + gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1 + gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2 + + for cluster in tqdm(prediction, desc="Exporting to DataFrame", disable=not tqdm_enable): lcluster = list(cluster) - for i1 in range(0, len(lcluster)): - for i2 in range(i1+1, len(lcluster)): - if lcluster[i1] < self.data.dataset_limit: - id1 = self.data._gt_to_ids_reversed_1[lcluster[i1]] - id2 = self.data._gt_to_ids_reversed_1[lcluster[i2]] if self.data.is_dirty_er else self.data._gt_to_ids_reversed_2[lcluster[i2]] + + for i1 in range(len(lcluster)): + for i2 in range(i1 + 1, len(lcluster)): + node1 = lcluster[i1] + node2 = lcluster[i2] + + if node1 < dataset_limit: + id1 = gt_to_ids_reversed_1[node1] + id2 = gt_to_ids_reversed_1[node2] if is_dirty_er else gt_to_ids_reversed_2[node2] else: - id2 = self.data._gt_to_ids_reversed_2[lcluster[i1]] - id1 = self.data._gt_to_ids_reversed_1[lcluster[i2]] - pairs_df = pd.concat( - [pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], - ignore_index=True - ) + id2 = gt_to_ids_reversed_2[node1] + id1 = gt_to_ids_reversed_1[node2] + + pairs_list.append((id1, id2)) + + pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2']) + return pairs_df + def sorted_indicators(self, first_indicator : int, second_indicator : int): return (first_indicator, second_indicator) if (first_indicator < second_indicator) else (second_indicator, first_indicator) diff --git a/docs/pyjedai/datamodel.py b/docs/pyjedai/datamodel.py index d7bd273..2eea542 100644 --- a/docs/pyjedai/datamodel.py +++ b/docs/pyjedai/datamodel.py @@ -123,12 +123,12 @@ def __init__( self.dataset_name_2 = dataset_name_2 # Fill NaN values with empty string - self.dataset_1 = self.dataset_1.astype(str) self.dataset_1.fillna("", inplace=True) + self.dataset_1 = self.dataset_1.astype(str) if not self.is_dirty_er: - self.dataset_2 = self.dataset_2.astype(str) self.dataset_2.fillna("", inplace=True) - + self.dataset_2 = self.dataset_2.astype(str) + # Attributes if attributes_1 is None: if dataset_1.columns.values.tolist(): diff --git a/docs/pyjedai/joins.py b/docs/pyjedai/joins.py index 23a9ade..944322c 100644 --- a/docs/pyjedai/joins.py +++ b/docs/pyjedai/joins.py @@ -309,24 +309,40 @@ def _configuration(self) -> dict: "qgrams": self.qgrams } - def export_to_df(self, prediction) -> pd.DataFrame: + def export_to_df(self, prediction, tqdm_enable=False) -> pd.DataFrame: """creates a dataframe with the predicted pairs Args: - prediction (any): Predicted candidate pairs + prediction (any): Predicted candidate pairs, + tqdm_enable (bool, optional): Enable tqdm. Defaults to False. Returns: pd.DataFrame: Dataframe with the predicted pairs """ - if self.data.ground_truth is None: - raise AttributeError("Can not proceed to evaluation without a ground-truth file. \ - Data object mush have initialized with the ground-truth file") - pairs_df = pd.DataFrame(columns=['id1', 'id2']) - for edge in prediction.edges: - id1 = self.data._gt_to_ids_reversed_1[edge[0]] - id2 = self.data._gt_to_ids_reversed_1[edge[1]] if self.data.is_dirty_er \ - else self.data._gt_to_ids_reversed_2[edge[1]] - pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True) + pairs_list = [] + + is_dirty_er = self.data.is_dirty_er + dataset_limit = self.data.dataset_limit + gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1 + gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2 + + for edge in tqdm(prediction.edges, disable=not tqdm_enable, desc="Exporting to DataFrame"): + node1, node2 = edge + + if not is_dirty_er: + if node1 < dataset_limit: + id1 = gt_to_ids_reversed_1[node1] + id2 = gt_to_ids_reversed_2[node2] + else: + id1 = gt_to_ids_reversed_2[node1] + id2 = gt_to_ids_reversed_1[node2] + else: + id1 = gt_to_ids_reversed_1[node1] + id2 = gt_to_ids_reversed_1[node2] + + pairs_list.append((id1, id2)) + + pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2']) return pairs_df @@ -416,8 +432,7 @@ def _configuration(self) -> dict: "tokenization" : self.tokenization, "qgrams": self.qgrams } - - + class PETopKJoin(TopKJoin): """Progressive Entity Resolution Top-K class of Joins module """ diff --git a/docs/pyjedai/matching.py b/docs/pyjedai/matching.py index 9955fec..b0ed0a4 100644 --- a/docs/pyjedai/matching.py +++ b/docs/pyjedai/matching.py @@ -155,7 +155,7 @@ def get_weights_median(self) -> float: def get_weights_standard_deviation(self) -> float: return statistics.stdev([w for _, _, w in self.pairs.edges(data='weight')]) - def plot_distribution_of_all_weights(self) -> None: + def plot_distribution_of_all_weights(self, save_figure_path=None) -> None: title = "Distribution of scores with " + self.metric + " metric in graph from entity matching" plt.figure(figsize=(10, 6)) all_weights = [w for _, _, w in self.pairs.edges(data='weight')] @@ -168,9 +168,11 @@ def plot_distribution_of_all_weights(self) -> None: plt.axvline(x = self.get_weights_median(), color = 'black', label = 'Median weight') plt.axvline(x = self.get_weights_avg()+self.get_weights_standard_deviation(), color = 'green', label = 'Average + SD weight') plt.legend() + if save_figure_path: + plt.savefig(save_figure_path) plt.show() - def plot_distribution_of_all_weights_2d(self) -> None: + def plot_distribution_of_all_weights_2d(self, save_figure_path=None) -> None: title = "Distribution of scores with " + self.metric + " metric in graph from entity matching" plt.figure(figsize=(10, 6)) all_weights = [w for _, _, w in self.pairs.edges(data='weight')] @@ -182,9 +184,11 @@ def plot_distribution_of_all_weights_2d(self) -> None: plt.axvline(x = self.get_weights_median(), color = 'black', label = 'Median weight') plt.axvline(x = self.get_weights_avg()+self.get_weights_standard_deviation(), color = 'green', label = 'Average + SD weight') plt.legend() + if save_figure_path: + plt.savefig(save_figure_path) plt.show() - def plot_distribution_of_scores(self) -> None: + def plot_distribution_of_scores(self, save_figure_path=None) -> None: title = "Distribution of scores with " + self.metric + " metric in graph from entity matching" def weight_distribution(G): bins = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] @@ -221,9 +225,11 @@ def weight_distribution(G): plt.axvline(x = self.get_weights_median()*10, color = 'black', label = 'Median weight') plt.axvline(x = self.get_weights_avg()*10+self.get_weights_standard_deviation()*10, color = 'green', label = 'Average + SD weight') plt.legend() + if save_figure_path: + plt.savefig(save_figure_path) plt.show() - def plot_gt_distribution_of_scores(self) -> None: + def plot_gt_distribution_of_scores(self, save_figure_path=None) -> None: title = "Distribution of scores with " + self.metric + " metric on ground truth pairs" def weight_distribution(): bins = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] @@ -257,6 +263,8 @@ def weight_distribution(): ax.set_title(title) ax.set_xlabel('Similarity score range') fig.tight_layout() + if save_figure_path: + plt.savefig(save_figure_path) plt.show() def evaluate(self, @@ -294,43 +302,43 @@ def evaluate(self, def stats(self) -> None: pass - - def export_pairs_to_csv(self, filename: str, with_similarity: bool = True) -> None: - if self.pairs is None: - raise AttributeError("Pairs have not been initialized yet. " + - "Please run the method `run` first.") - - with open(filename, 'w') as f: - for e1, e2, similarity in self.pairs.edges(data='weight'): - e1 = self.data._ids_mapping_1[e1] if e1 < self.data.dataset_limit else self.data._ids_mapping_2[e1] - e2 = self.data._ids_mapping_1[e2] if e2 < self.data.dataset_limit else self.data._ids_mapping_2[e2] - if with_similarity: - f.write(f"{e1}, {e2}, {similarity}\n") - else: - f.write(f"{e1}, {e2}\n") - f.close() - - def export_to_df(self, prediction: Graph) -> pd.DataFrame: - """creates a dataframe with the predicted pairs + + def export_to_df(self, prediction: Graph, tqdm_enable=False) -> pd.DataFrame: + """Creates a dataframe with the predicted pairs. Args: - prediction (any): Predicted graph + prediction (Graph): Predicted graph + tqdm_enable (bool): Whether to enable tqdm progress bar Returns: pd.DataFrame: Dataframe with the predicted pairs """ - if self.data.ground_truth is None: - raise AttributeError("Can not proceed to evaluation without a ground-truth file. \ - Data object mush have initialized with the ground-truth file") - pairs_df = pd.DataFrame(columns=['id1', 'id2']) - for edge in prediction.edges: - id1 = self.data._gt_to_ids_reversed_1[edge[0]] - id2 = self.data._gt_to_ids_reversed_1[edge[1]] if self.data.is_dirty_er \ - else self.data._gt_to_ids_reversed_2[edge[1]] - pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True) + pairs_list = [] - return pairs_df + is_dirty_er = self.data.is_dirty_er + dataset_limit = self.data.dataset_limit + gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1 + gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2 + for edge in tqdm(prediction.edges, disable=not tqdm_enable, desc="Exporting to DataFrame"): + node1, node2 = edge + + if not is_dirty_er: + if node1 < dataset_limit: + id1 = gt_to_ids_reversed_1[node1] + id2 = gt_to_ids_reversed_2[node2] + else: + id1 = gt_to_ids_reversed_2[node1] + id2 = gt_to_ids_reversed_1[node2] + else: + id1 = gt_to_ids_reversed_1[node1] + id2 = gt_to_ids_reversed_1[node2] + + pairs_list.append((id1, id2)) + + pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2']) + + return pairs_df class EntityMatching(AbstractEntityMatching): """Calculates similarity from 0.0 to 1.0 for all blocks @@ -345,7 +353,7 @@ def __init__( tokenizer: str = 'white_space_tokenizer', vectorizer : str = None, qgram : int = 1, - similarity_threshold: float = 0.5, + similarity_threshold: float = 0.0, tokenizer_return_unique_values = False, # unique values or not, attributes: any = None, ) -> None: diff --git a/docs/pyjedai/utils.py b/docs/pyjedai/utils.py index f95d9f3..6dd56b0 100644 --- a/docs/pyjedai/utils.py +++ b/docs/pyjedai/utils.py @@ -1258,3 +1258,51 @@ def predict(self, id1 : int, id2 : int) -> float: _id2 = (id2 - self._entities_d1_num) return self.distance_matrix[_id1][_id2] + +def read_data_from_json(json_path, base_dir, verbose=True): + """ + Reads dataset details from a JSON file and returns a Data object. + + Parameters: + - json_path (str): Path to the JSON configuration file. + - verbose (bool): Whether to print information about the loaded datasets. + + Returns: + - Data: A pyjedai Data object initialized with the dataset details. + """ + # Load JSON configuration + with open(json_path, 'r') as f: + config = json.load(f) + + # Extract common settings + separator = config.get("separator", ",") + engine = config.get("engine", "python") + na_filter = config.get("na_filter", False) + dataset_dir = config.get("dir", "") + + # Construct file paths + d1_path = f"{base_dir}{dataset_dir}/{config['d1']}.{config.get('format', 'csv')}" + d2_path = f"{base_dir}{dataset_dir}/{config['d2']}.{config.get('format', 'csv')}" if "d2" in config else None + gt_path = f"{base_dir}{dataset_dir}/{config['gt']}.{config.get('format', 'csv')}" if "gt" in config else None + + # Load datasets + d1 = pd.read_csv(d1_path, sep=separator, engine=engine, na_filter=na_filter) + d2 = pd.read_csv(d2_path, sep=separator, engine=engine, na_filter=na_filter) if d2_path else None + gt = pd.read_csv(gt_path, sep=separator, engine=engine) if gt_path else None + + # Initialize Data object + data = Data( + dataset_1=d1, + id_column_name_1=config["d1_id"], + dataset_name_1=config.get("d1", None), + dataset_2=d2, + id_column_name_2=config.get("d2_id", None), + dataset_name_2=config.get("d2", None), + ground_truth=gt, + skip_ground_truth_processing=config.get("skip_ground_truth_processing", False) + ) + + if verbose: + data.print_specs() + + return data diff --git a/docs/pyjedai/vector_based_blocking.py b/docs/pyjedai/vector_based_blocking.py index 3584966..f9ebec1 100644 --- a/docs/pyjedai/vector_based_blocking.py +++ b/docs/pyjedai/vector_based_blocking.py @@ -138,7 +138,6 @@ def build_blocks(self, if self.similarity_search != 'faiss': raise AttributeError("Only FAISS is available for now.") - print('Building blocks via Embeddings-NN Block Building [' + self.vectorizer + ', ' + self.similarity_search + ']') _start_time = time() self.blocks = dict() self.verbose = verbose @@ -226,7 +225,7 @@ def build_blocks(self, if verbose: print(f"{p2} -> Loaded Successfully") else: if verbose: print("Embeddings not found for D2. Creating new ones.") - if not self._d1_loaded or not self._d2_loaded: + if not self._d1_loaded or (not data.is_dirty_er and not self._d2_loaded): if self.vectorizer in ['word2vec', 'fasttext', 'doc2vec', 'glove']: self.vectors_1, self.vectors_2 = self._create_gensim_embeddings() elif self.vectorizer in ['bert', 'distilbert', 'roberta', 'xlnet', 'albert']: @@ -368,7 +367,6 @@ def _create_pretrained_sentence_embeddings(self): vectors_2 = [] if not self.data.is_dirty_er and not self._d2_loaded: for e2 in self._entities_d2: - # print("e2: ", e2) vector = model.encode(e2) vectors_2.append(vector) self._progress_bar.update(1) @@ -420,7 +418,7 @@ def _similarity_search_with_FAISS(self): self.blocks = dict() if self.verbose: print("Building blocks...") - print("disable", not self.verbose) + for _entity in tqdm(range(0, self.neighbors.shape[0]), desc="Building blocks", disable=not self.verbose): _entity_id = self._si.d1_retained_ids[_entity] if self.data.is_dirty_er else self._si.d2_retained_ids[_entity] @@ -510,23 +508,33 @@ def stats(self) -> None: "\n\tIndices shape returned after search: " + str(self.neighbors.shape) ) print(u'\u2500' * 123) - - - def export_to_df(self, prediction: dict) -> pd.DataFrame: - """creates a dataframe with the predicted pairs + + def export_to_df(self, prediction: dict, tqdm_enable:bool = False) -> pd.DataFrame: + """Creates a dataframe with the predicted pairs. Args: - prediction (any): Predicted candidate pairs + prediction (dict): Predicted candidate pairs. Returns: - pd.DataFrame: Dataframe with the predicted pairs + pd.DataFrame: Dataframe with the predicted pairs. """ - pairs_df = pd.DataFrame(columns=['id1', 'id2']) - for entity_id, candidates in prediction.items(): - id1 = self.data._gt_to_ids_reversed_1[entity_id] - for candiadate_id in candidates: - id2 = self.data._gt_to_ids_reversed_1[candiadate_id] if self.data.is_dirty_er \ - else self.data._gt_to_ids_reversed_2[candiadate_id] - pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True) - - return pairs_df \ No newline at end of file + pairs_list = [] + + is_dirty_er = self.data.is_dirty_er + gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1 + gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2 + + for entity_id, candidates in tqdm(prediction.items(), desc="Exporting to DataFrame", disable=not tqdm_enable): + id1 = gt_to_ids_reversed_1[entity_id] + + for candidate_id in candidates: + if is_dirty_er: + id2 = gt_to_ids_reversed_1[candidate_id] + else: + id2 = gt_to_ids_reversed_2[candidate_id] + + pairs_list.append((id1, id2)) + + pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2']) + + return pairs_df diff --git a/docs/pyjedai/workflow.py b/docs/pyjedai/workflow.py index b2a9f3e..4a0d393 100644 --- a/docs/pyjedai/workflow.py +++ b/docs/pyjedai/workflow.py @@ -192,7 +192,7 @@ def export_pairs(self) -> pd.DataFrame: Returns: pd.DataFrame: pairs as a DataFrame """ - return write(self.final_pairs, self.data) + return self.final_step_method.export_to_df(self.final_pairs) def _save_step(self, results: dict, configuration: dict) -> None: self.f1.append(results['F1 %']) @@ -580,6 +580,7 @@ def run(self, if "attributes_2" in self.block_building else None, tqdm_disable=workflow_step_tqdm_disable) self.final_pairs = block_building_blocks + self.final_step_method = block_building_method if data.ground_truth is not None: res = block_building_method.evaluate(block_building_blocks, export_to_dict=True, @@ -604,6 +605,7 @@ def run(self, tqdm_disable=workflow_step_tqdm_disable) self.final_pairs = bblocks = block_cleaning_blocks + # self.final_pairs = block_cleaning_method.export_to_df(self.final_pairs) if data.ground_truth is not None: res = block_cleaning_method.evaluate(bblocks, export_to_dict=True, @@ -625,6 +627,8 @@ def run(self, else block_building_blocks, data, tqdm_disable=workflow_step_tqdm_disable) + self.final_step_method = comparison_cleaning_method + if data.ground_truth is not None: res = comparison_cleaning_method.evaluate(comparison_cleaning_blocks, export_to_dict=True, @@ -653,6 +657,8 @@ def run(self, tqdm_disable=workflow_step_tqdm_disable, **self.entity_matching["exec_params"]) + self.final_step_method = entity_matching_method + if data.ground_truth is not None: res = entity_matching_method.evaluate(em_graph, export_to_dict=True, @@ -671,7 +677,8 @@ def run(self, self.final_pairs = components = clustering_method.process(em_graph, data) else: self.final_pairs = components = clustering_method.process(em_graph, data, **self.clustering["exec_params"]) - + + self.final_step_method = clustering_method self.clusters = components if data.ground_truth is not None: res = clustering_method.evaluate(components, @@ -859,6 +866,8 @@ def run(self, **self.block_building["exec_params"]) self.final_pairs = block_building_blocks + self.final_pairs = block_building_method.export_to_df(self.final_pairs) + if data.ground_truth is not None: res = block_building_method.evaluate(block_building_blocks, export_to_dict=True, @@ -874,6 +883,8 @@ def run(self, if "params" in self.clustering \ else self.clustering['method']() self.final_pairs = components = clustering_method.process(em_graph, data) + self.final_pairs = clustering_method.export_to_df(self.final_pairs) + if data.ground_truth is not None: res = clustering_method.evaluate(components, export_to_dict=True, diff --git a/pyproject.toml b/pyproject.toml index e6453c8..2e0f044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "pyjedai" -version = "0.2.0" +version = "0.2.1" description = "An open-source library that builds powerful end-to-end Entity Resolution workflows." readme = "README.md" authors = [