diff --git a/docs/_toc.yml b/docs/_toc.yml
index afec7a9..2e905a2 100644
--- a/docs/_toc.yml
+++ b/docs/_toc.yml
@@ -14,6 +14,8 @@ parts:
       title: Clean-Clean ER for Devs
     - file: tutorials/CleanCleanERwithoutGT.ipynb
       title: Clean-Clean ER without GT
+    - file: tutorials/Reproducibility.ipynb
+      title: Academic reproducibility
     - file: tutorials/DirtyER.ipynb
       title: Dirty ER for Devs
     - file: tutorials/Optuna.ipynb
diff --git a/docs/pyjedai/block_building.py b/docs/pyjedai/block_building.py
index 0d73c2c..cc1e2df 100644
--- a/docs/pyjedai/block_building.py
+++ b/docs/pyjedai/block_building.py
@@ -236,38 +236,42 @@ def stats(self, blocks: dict, verbose: bool = True) -> dict:
             'skewness_of_comparison_per_entity': self.skewness_of_comparison_per_entity
         }
 
-    def export_to_df(
-        self,
-        blocks: dict
-    ) -> pd.DataFrame:
-        """creates a dataframe for the evaluation report
+    def export_to_df(self, blocks: dict, tqdm_enable:bool = False) -> pd.DataFrame:
+        """Creates a dataframe for the evaluation report.
 
         Args:
-            blocks (any): Predicted blocks
-            data (Data): initial dataset
+            blocks (dict): Predicted blocks.
 
         Returns:
-            pd.DataFrame: Dataframe predicted pairs (can be exported to csv)
+            pd.DataFrame: Dataframe with the predicted pairs (can be exported to CSV).
         """
-        pairs_df = pd.DataFrame(columns=['id1', 'id2'])
-        for _, block in blocks.items():
-            if self.data.is_dirty_er:
+        pairs_list = []
+
+        is_dirty_er = self.data.is_dirty_er
+        gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1
+        gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2
+
+        for block in tqdm(blocks.values(), desc="Exporting to DataFrame", disable=not tqdm_enable):
+            if is_dirty_er:
                 lblock = list(block.entities_D1)
-                for i1 in range(0, len(lblock)):
-                    for i2 in range(i1+1, len(lblock)):
-                        id1 = self.data._gt_to_ids_reversed_1[lblock[i1]]
-                        id2 = self.data._gt_to_ids_reversed_1[lblock[i2]] if self.data.is_dirty_er \
-                            else self.data._gt_to_ids_reversed_2[lblock[i2]]
-                        pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True)
+                
+                for i1 in range(len(lblock)):
+                    for i2 in range(i1 + 1, len(lblock)):
+                        id1 = gt_to_ids_reversed_1[lblock[i1]]
+                        id2 = gt_to_ids_reversed_1[lblock[i2]]
+                        pairs_list.append((id1, id2))
             else:
                 for i1 in block.entities_D1:
                     for i2 in block.entities_D2:
-                        id1 = self.data._gt_to_ids_reversed_1[i1]
-                        id2 = self.data._gt_to_ids_reversed_1[i2] if self.data.is_dirty_er \
-                            else self.data._gt_to_ids_reversed_2[i2]
-                        pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True)
+                        id1 = gt_to_ids_reversed_1[i1]
+                        id2 = gt_to_ids_reversed_2[i2]
+                        pairs_list.append((id1, id2))
+
+        pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2'])
+
         return pairs_df
 
+
 class AbstractBlockBuilding(AbstractBlockProcessing):
     """Abstract class for the block building method
     """
diff --git a/docs/pyjedai/clustering.py b/docs/pyjedai/clustering.py
index 9f645e7..65560d0 100644
--- a/docs/pyjedai/clustering.py
+++ b/docs/pyjedai/clustering.py
@@ -376,31 +376,45 @@ def stats(self) -> None:
     def _configuration(self) -> dict:
         pass
 
-    def export_to_df(self, prediction: list) -> pd.DataFrame:
-        """creates a dataframe for the evaluation report
+    import pandas as pd
+
+    def export_to_df(self, prediction: list, tqdm_enable:bool = False) -> pd.DataFrame:
+        """Creates a dataframe for the evaluation report.
 
         Args:
-            prediction (any): Predicted clusters
+            prediction (list): Predicted clusters.
 
         Returns:
-            pd.DataFrame: Dataframe containg evaluation scores and stats
+            pd.DataFrame: Dataframe containing evaluation scores and stats.
         """
-        pairs_df = pd.DataFrame(columns=['id1', 'id2'])
-        for cluster in prediction:
+        pairs_list = []
+
+        dataset_limit = self.data.dataset_limit
+        is_dirty_er = self.data.is_dirty_er
+        gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1
+        gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2
+
+        for cluster in tqdm(prediction, desc="Exporting to DataFrame", disable=not tqdm_enable):
             lcluster = list(cluster)
-            for i1 in range(0, len(lcluster)):
-                for i2 in range(i1+1, len(lcluster)):
-                    if lcluster[i1] < self.data.dataset_limit:
-                        id1 = self.data._gt_to_ids_reversed_1[lcluster[i1]]
-                        id2 = self.data._gt_to_ids_reversed_1[lcluster[i2]] if self.data.is_dirty_er else self.data._gt_to_ids_reversed_2[lcluster[i2]]
+
+            for i1 in range(len(lcluster)):
+                for i2 in range(i1 + 1, len(lcluster)):
+                    node1 = lcluster[i1]
+                    node2 = lcluster[i2]
+
+                    if node1 < dataset_limit:
+                        id1 = gt_to_ids_reversed_1[node1]
+                        id2 = gt_to_ids_reversed_1[node2] if is_dirty_er else gt_to_ids_reversed_2[node2]
                     else:
-                        id2 = self.data._gt_to_ids_reversed_2[lcluster[i1]]
-                        id1 = self.data._gt_to_ids_reversed_1[lcluster[i2]]
-                    pairs_df = pd.concat(
-                        [pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], 
-                        ignore_index=True
-                    )
+                        id2 = gt_to_ids_reversed_2[node1]
+                        id1 = gt_to_ids_reversed_1[node2]
+
+                    pairs_list.append((id1, id2))
+
+        pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2'])
+
         return pairs_df
+
     
     def sorted_indicators(self, first_indicator : int, second_indicator : int):
         return (first_indicator, second_indicator) if (first_indicator < second_indicator) else (second_indicator, first_indicator)
diff --git a/docs/pyjedai/datamodel.py b/docs/pyjedai/datamodel.py
index d7bd273..2eea542 100644
--- a/docs/pyjedai/datamodel.py
+++ b/docs/pyjedai/datamodel.py
@@ -123,12 +123,12 @@ def __init__(
         self.dataset_name_2 = dataset_name_2
         
         # Fill NaN values with empty string
-        self.dataset_1 = self.dataset_1.astype(str)
         self.dataset_1.fillna("", inplace=True)
+        self.dataset_1 = self.dataset_1.astype(str)
         if not self.is_dirty_er:
-            self.dataset_2 = self.dataset_2.astype(str)
             self.dataset_2.fillna("", inplace=True)
-
+            self.dataset_2 = self.dataset_2.astype(str)
+            
         # Attributes
         if attributes_1 is None:
             if dataset_1.columns.values.tolist():
diff --git a/docs/pyjedai/joins.py b/docs/pyjedai/joins.py
index 23a9ade..944322c 100644
--- a/docs/pyjedai/joins.py
+++ b/docs/pyjedai/joins.py
@@ -309,24 +309,40 @@ def _configuration(self) -> dict:
             "qgrams": self.qgrams
         }    
 
-    def export_to_df(self, prediction) -> pd.DataFrame:
+    def export_to_df(self, prediction, tqdm_enable=False) -> pd.DataFrame:
         """creates a dataframe with the predicted pairs
 
         Args:
-            prediction (any): Predicted candidate pairs
+            prediction (any): Predicted candidate pairs,
+            tqdm_enable (bool, optional): Enable tqdm. Defaults to False.
 
         Returns:
             pd.DataFrame: Dataframe with the predicted pairs
         """
-        if self.data.ground_truth is None:
-            raise AttributeError("Can not proceed to evaluation without a ground-truth file. \
-                Data object mush have initialized with the ground-truth file")
-        pairs_df = pd.DataFrame(columns=['id1', 'id2'])
-        for edge in prediction.edges:
-            id1 = self.data._gt_to_ids_reversed_1[edge[0]]
-            id2 = self.data._gt_to_ids_reversed_1[edge[1]] if self.data.is_dirty_er \
-                        else self.data._gt_to_ids_reversed_2[edge[1]]
-            pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True)
+        pairs_list = []
+
+        is_dirty_er = self.data.is_dirty_er
+        dataset_limit = self.data.dataset_limit
+        gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1
+        gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2
+
+        for edge in tqdm(prediction.edges, disable=not tqdm_enable, desc="Exporting to DataFrame"):
+            node1, node2 = edge
+
+            if not is_dirty_er:
+                if node1 < dataset_limit:
+                    id1 = gt_to_ids_reversed_1[node1]
+                    id2 = gt_to_ids_reversed_2[node2]
+                else:
+                    id1 = gt_to_ids_reversed_2[node1]
+                    id2 = gt_to_ids_reversed_1[node2]
+            else:
+                id1 = gt_to_ids_reversed_1[node1]
+                id2 = gt_to_ids_reversed_1[node2]
+
+            pairs_list.append((id1, id2))
+
+        pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2'])
 
         return pairs_df
     
@@ -416,8 +432,7 @@ def _configuration(self) -> dict:
             "tokenization" : self.tokenization,
             "qgrams": self.qgrams
         }
-        
-        
+    
 class PETopKJoin(TopKJoin):
     """Progressive Entity Resolution Top-K class of Joins module
     """
diff --git a/docs/pyjedai/matching.py b/docs/pyjedai/matching.py
index 9955fec..b0ed0a4 100644
--- a/docs/pyjedai/matching.py
+++ b/docs/pyjedai/matching.py
@@ -155,7 +155,7 @@ def get_weights_median(self) -> float:
     def get_weights_standard_deviation(self) -> float:
         return statistics.stdev([w for _, _, w in self.pairs.edges(data='weight')])
     
-    def plot_distribution_of_all_weights(self) -> None:
+    def plot_distribution_of_all_weights(self, save_figure_path=None) -> None:
         title = "Distribution of scores with " + self.metric + " metric in graph from entity matching"
         plt.figure(figsize=(10, 6))
         all_weights = [w for _, _, w in self.pairs.edges(data='weight')]
@@ -168,9 +168,11 @@ def plot_distribution_of_all_weights(self) -> None:
         plt.axvline(x = self.get_weights_median(), color = 'black', label = 'Median weight')
         plt.axvline(x = self.get_weights_avg()+self.get_weights_standard_deviation(), color = 'green', label = 'Average + SD weight')
         plt.legend()
+        if save_figure_path:
+            plt.savefig(save_figure_path)
         plt.show()
 
-    def plot_distribution_of_all_weights_2d(self) -> None:
+    def plot_distribution_of_all_weights_2d(self, save_figure_path=None) -> None:
         title = "Distribution of scores with " + self.metric + " metric in graph from entity matching"
         plt.figure(figsize=(10, 6))
         all_weights = [w for _, _, w in self.pairs.edges(data='weight')]
@@ -182,9 +184,11 @@ def plot_distribution_of_all_weights_2d(self) -> None:
         plt.axvline(x = self.get_weights_median(), color = 'black', label = 'Median weight')
         plt.axvline(x = self.get_weights_avg()+self.get_weights_standard_deviation(), color = 'green', label = 'Average + SD weight')
         plt.legend()
+        if save_figure_path:
+            plt.savefig(save_figure_path)
         plt.show()
 
-    def plot_distribution_of_scores(self) -> None:
+    def plot_distribution_of_scores(self, save_figure_path=None) -> None:
         title = "Distribution of scores with " + self.metric + " metric in graph from entity matching"
         def weight_distribution(G):
             bins = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
@@ -221,9 +225,11 @@ def weight_distribution(G):
         plt.axvline(x = self.get_weights_median()*10, color = 'black', label = 'Median weight')
         plt.axvline(x = self.get_weights_avg()*10+self.get_weights_standard_deviation()*10, color = 'green', label = 'Average + SD weight')
         plt.legend()
+        if save_figure_path:
+            plt.savefig(save_figure_path)
         plt.show()
 
-    def plot_gt_distribution_of_scores(self) -> None:
+    def plot_gt_distribution_of_scores(self, save_figure_path=None) -> None:
         title = "Distribution of scores with " + self.metric + " metric on ground truth pairs"
         def weight_distribution():
             bins = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
@@ -257,6 +263,8 @@ def weight_distribution():
         ax.set_title(title)
         ax.set_xlabel('Similarity score range')
         fig.tight_layout()
+        if save_figure_path:
+            plt.savefig(save_figure_path)
         plt.show()
 
     def evaluate(self,
@@ -294,43 +302,43 @@ def evaluate(self,
         
     def stats(self) -> None:
         pass
-    
-    def export_pairs_to_csv(self, filename: str, with_similarity: bool = True) -> None:
-        if self.pairs is None:
-            raise AttributeError("Pairs have not been initialized yet. " +
-                                 "Please run the method `run` first.")
-
-        with open(filename, 'w') as f:
-            for e1, e2, similarity in self.pairs.edges(data='weight'):
-                e1 = self.data._ids_mapping_1[e1] if e1 < self.data.dataset_limit else self.data._ids_mapping_2[e1]
-                e2 = self.data._ids_mapping_1[e2] if e2 < self.data.dataset_limit else self.data._ids_mapping_2[e2]
-                if with_similarity:
-                    f.write(f"{e1}, {e2}, {similarity}\n")
-                else:
-                    f.write(f"{e1}, {e2}\n")
-            f.close()
-
-    def export_to_df(self, prediction: Graph) -> pd.DataFrame:
-        """creates a dataframe with the predicted pairs
+        
+    def export_to_df(self, prediction: Graph, tqdm_enable=False) -> pd.DataFrame:
+        """Creates a dataframe with the predicted pairs.
 
         Args:
-            prediction (any): Predicted graph
+            prediction (Graph): Predicted graph
+            tqdm_enable (bool): Whether to enable tqdm progress bar
 
         Returns:
             pd.DataFrame: Dataframe with the predicted pairs
         """
-        if self.data.ground_truth is None:
-            raise AttributeError("Can not proceed to evaluation without a ground-truth file. \
-                Data object mush have initialized with the ground-truth file")
-        pairs_df = pd.DataFrame(columns=['id1', 'id2'])
-        for edge in prediction.edges:
-            id1 = self.data._gt_to_ids_reversed_1[edge[0]]
-            id2 = self.data._gt_to_ids_reversed_1[edge[1]] if self.data.is_dirty_er \
-                        else self.data._gt_to_ids_reversed_2[edge[1]]
-            pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True)
+        pairs_list = []
 
-        return pairs_df
+        is_dirty_er = self.data.is_dirty_er
+        dataset_limit = self.data.dataset_limit
+        gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1
+        gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2
 
+        for edge in tqdm(prediction.edges, disable=not tqdm_enable, desc="Exporting to DataFrame"):
+            node1, node2 = edge
+
+            if not is_dirty_er:
+                if node1 < dataset_limit:
+                    id1 = gt_to_ids_reversed_1[node1]
+                    id2 = gt_to_ids_reversed_2[node2]
+                else:
+                    id1 = gt_to_ids_reversed_2[node1]
+                    id2 = gt_to_ids_reversed_1[node2]
+            else:
+                id1 = gt_to_ids_reversed_1[node1]
+                id2 = gt_to_ids_reversed_1[node2]
+
+            pairs_list.append((id1, id2))
+
+        pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2'])
+
+        return pairs_df
 
 class EntityMatching(AbstractEntityMatching):
     """Calculates similarity from 0.0 to 1.0 for all blocks
@@ -345,7 +353,7 @@ def __init__(
             tokenizer: str = 'white_space_tokenizer',
             vectorizer : str = None,
             qgram : int = 1,
-            similarity_threshold: float = 0.5,
+            similarity_threshold: float = 0.0,
             tokenizer_return_unique_values = False, # unique values or not,
             attributes: any = None,
         ) -> None:
diff --git a/docs/pyjedai/utils.py b/docs/pyjedai/utils.py
index f95d9f3..6dd56b0 100644
--- a/docs/pyjedai/utils.py
+++ b/docs/pyjedai/utils.py
@@ -1258,3 +1258,51 @@ def predict(self, id1 : int, id2 : int) -> float:
         _id2 = (id2 - self._entities_d1_num)
 
         return self.distance_matrix[_id1][_id2]
+
+def read_data_from_json(json_path, base_dir, verbose=True):
+    """
+    Reads dataset details from a JSON file and returns a Data object.
+
+    Parameters:
+    - json_path (str): Path to the JSON configuration file.
+    - verbose (bool): Whether to print information about the loaded datasets.
+
+    Returns:
+    - Data: A pyjedai Data object initialized with the dataset details.
+    """
+    # Load JSON configuration
+    with open(json_path, 'r') as f:
+        config = json.load(f)
+    
+    # Extract common settings
+    separator = config.get("separator", ",")
+    engine = config.get("engine", "python")
+    na_filter = config.get("na_filter", False)
+    dataset_dir = config.get("dir", "")
+
+    # Construct file paths
+    d1_path = f"{base_dir}{dataset_dir}/{config['d1']}.{config.get('format', 'csv')}"
+    d2_path = f"{base_dir}{dataset_dir}/{config['d2']}.{config.get('format', 'csv')}" if "d2" in config else None
+    gt_path = f"{base_dir}{dataset_dir}/{config['gt']}.{config.get('format', 'csv')}" if "gt" in config else None
+
+    # Load datasets
+    d1 = pd.read_csv(d1_path, sep=separator, engine=engine, na_filter=na_filter)
+    d2 = pd.read_csv(d2_path, sep=separator, engine=engine, na_filter=na_filter) if d2_path else None
+    gt = pd.read_csv(gt_path, sep=separator, engine=engine) if gt_path else None
+
+    # Initialize Data object
+    data = Data(
+        dataset_1=d1,
+        id_column_name_1=config["d1_id"],
+        dataset_name_1=config.get("d1", None),
+        dataset_2=d2,
+        id_column_name_2=config.get("d2_id", None),
+        dataset_name_2=config.get("d2", None),
+        ground_truth=gt,
+        skip_ground_truth_processing=config.get("skip_ground_truth_processing", False)
+    )
+
+    if verbose:
+        data.print_specs()
+    
+    return data
diff --git a/docs/pyjedai/vector_based_blocking.py b/docs/pyjedai/vector_based_blocking.py
index 3584966..f9ebec1 100644
--- a/docs/pyjedai/vector_based_blocking.py
+++ b/docs/pyjedai/vector_based_blocking.py
@@ -138,7 +138,6 @@ def build_blocks(self,
         if self.similarity_search != 'faiss':
             raise AttributeError("Only FAISS is available for now.")
         
-        print('Building blocks via Embeddings-NN Block Building [' + self.vectorizer + ', ' + self.similarity_search + ']')
         _start_time = time()
         self.blocks = dict()
         self.verbose = verbose
@@ -226,7 +225,7 @@ def build_blocks(self,
                     if verbose: print(f"{p2} -> Loaded Successfully")
                 else:
                     if verbose: print("Embeddings not found for D2. Creating new ones.")
-        if not self._d1_loaded or not self._d2_loaded:
+        if not self._d1_loaded or (not data.is_dirty_er and not self._d2_loaded):
             if self.vectorizer in ['word2vec', 'fasttext', 'doc2vec', 'glove']:
                 self.vectors_1, self.vectors_2 = self._create_gensim_embeddings()
             elif self.vectorizer in ['bert', 'distilbert', 'roberta', 'xlnet', 'albert']:
@@ -368,7 +367,6 @@ def _create_pretrained_sentence_embeddings(self):
         vectors_2 = []
         if not self.data.is_dirty_er and not self._d2_loaded:            
             for e2 in self._entities_d2:
-                # print("e2: ", e2)
                 vector = model.encode(e2)
                 vectors_2.append(vector)
                 self._progress_bar.update(1)
@@ -420,7 +418,7 @@ def _similarity_search_with_FAISS(self):
         self.blocks = dict()
         if self.verbose:
             print("Building blocks...")
-        print("disable", not self.verbose)
+
         for _entity in tqdm(range(0, self.neighbors.shape[0]), desc="Building blocks", disable=not self.verbose):
             
             _entity_id = self._si.d1_retained_ids[_entity] if self.data.is_dirty_er else self._si.d2_retained_ids[_entity]
@@ -510,23 +508,33 @@ def stats(self) -> None:
                 "\n\tIndices shape returned after search: " + str(self.neighbors.shape)
             )
         print(u'\u2500' * 123)
-        
-    
-    def export_to_df(self, prediction: dict) -> pd.DataFrame:
-        """creates a dataframe with the predicted pairs
+
+    def export_to_df(self, prediction: dict, tqdm_enable:bool = False) -> pd.DataFrame:
+        """Creates a dataframe with the predicted pairs.
 
         Args:
-            prediction (any): Predicted candidate pairs
+            prediction (dict): Predicted candidate pairs.
 
         Returns:
-            pd.DataFrame: Dataframe with the predicted pairs
+            pd.DataFrame: Dataframe with the predicted pairs.
         """
-        pairs_df = pd.DataFrame(columns=['id1', 'id2'])
-        for entity_id, candidates in prediction.items():            
-            id1 = self.data._gt_to_ids_reversed_1[entity_id]
-            for candiadate_id in candidates:
-                id2 = self.data._gt_to_ids_reversed_1[candiadate_id] if self.data.is_dirty_er \
-                        else self.data._gt_to_ids_reversed_2[candiadate_id]
-                pairs_df = pd.concat([pairs_df, pd.DataFrame([{'id1':id1, 'id2':id2}], index=[0])], ignore_index=True)
-
-        return pairs_df
\ No newline at end of file
+        pairs_list = []
+
+        is_dirty_er = self.data.is_dirty_er
+        gt_to_ids_reversed_1 = self.data._gt_to_ids_reversed_1
+        gt_to_ids_reversed_2 = self.data._gt_to_ids_reversed_2
+
+        for entity_id, candidates in tqdm(prediction.items(), desc="Exporting to DataFrame", disable=not tqdm_enable):
+            id1 = gt_to_ids_reversed_1[entity_id]
+
+            for candidate_id in candidates:
+                if is_dirty_er:
+                    id2 = gt_to_ids_reversed_1[candidate_id]
+                else:
+                    id2 = gt_to_ids_reversed_2[candidate_id]
+
+                pairs_list.append((id1, id2))
+
+        pairs_df = pd.DataFrame(pairs_list, columns=['id1', 'id2'])
+
+        return pairs_df
diff --git a/docs/pyjedai/workflow.py b/docs/pyjedai/workflow.py
index b2a9f3e..4a0d393 100644
--- a/docs/pyjedai/workflow.py
+++ b/docs/pyjedai/workflow.py
@@ -192,7 +192,7 @@ def export_pairs(self) -> pd.DataFrame:
         Returns:
             pd.DataFrame: pairs as a DataFrame
         """
-        return write(self.final_pairs, self.data)
+        return self.final_step_method.export_to_df(self.final_pairs)
 
     def _save_step(self, results: dict, configuration: dict) -> None:
         self.f1.append(results['F1 %'])
@@ -580,6 +580,7 @@ def run(self,
                                                                 if "attributes_2" in self.block_building else None,
                                                 tqdm_disable=workflow_step_tqdm_disable)
         self.final_pairs = block_building_blocks
+        self.final_step_method = block_building_method
         if data.ground_truth is not None:
             res = block_building_method.evaluate(block_building_blocks,
                                                 export_to_dict=True,
@@ -604,6 +605,7 @@ def run(self,
                                                                       tqdm_disable=workflow_step_tqdm_disable)
                 
                 self.final_pairs = bblocks = block_cleaning_blocks
+                # self.final_pairs = block_cleaning_method.export_to_df(self.final_pairs)
                 if data.ground_truth is not None:
                     res = block_cleaning_method.evaluate(bblocks,
                                                         export_to_dict=True,
@@ -625,6 +627,8 @@ def run(self,
                                                     else block_building_blocks,
                                                 data,
                                                 tqdm_disable=workflow_step_tqdm_disable)
+            self.final_step_method = comparison_cleaning_method
+
             if data.ground_truth is not None:
                 res = comparison_cleaning_method.evaluate(comparison_cleaning_blocks,
                                                         export_to_dict=True,
@@ -653,6 +657,8 @@ def run(self,
                 tqdm_disable=workflow_step_tqdm_disable,
                 **self.entity_matching["exec_params"])
 
+        self.final_step_method = entity_matching_method
+
         if data.ground_truth is not None:
             res = entity_matching_method.evaluate(em_graph,
                                                     export_to_dict=True,
@@ -671,7 +677,8 @@ def run(self,
                 self.final_pairs = components = clustering_method.process(em_graph, data)
             else:
                 self.final_pairs = components = clustering_method.process(em_graph, data, **self.clustering["exec_params"])
-            
+
+            self.final_step_method = clustering_method
             self.clusters = components
             if data.ground_truth is not None:
                 res = clustering_method.evaluate(components,
@@ -859,6 +866,8 @@ def run(self,
                                                 **self.block_building["exec_params"])                
 
         self.final_pairs = block_building_blocks
+        self.final_pairs = block_building_method.export_to_df(self.final_pairs)
+
         if data.ground_truth is not None:
             res = block_building_method.evaluate(block_building_blocks,
                                                 export_to_dict=True,
@@ -874,6 +883,8 @@ def run(self,
                                             if "params" in self.clustering \
                                             else self.clustering['method']()
             self.final_pairs = components = clustering_method.process(em_graph, data)
+            self.final_pairs = clustering_method.export_to_df(self.final_pairs)
+
             if data.ground_truth is not None:
                 res = clustering_method.evaluate(components,
                                                 export_to_dict=True,
diff --git a/pyproject.toml b/pyproject.toml
index e6453c8..2e0f044 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "pyjedai"
-version = "0.2.0"
+version = "0.2.1"
 description = "An open-source library that builds powerful end-to-end Entity Resolution workflows."
 readme = "README.md"
 authors = [