diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 54fffe4b..cb1e04f9 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -3,11 +3,7 @@
 
 name: Python package
 
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
+on: push
 
 jobs:
   build:
diff --git a/pairtools/__init__.py b/pairtools/__init__.py
index c534813d..e4aa6788 100644
--- a/pairtools/__init__.py
+++ b/pairtools/__init__.py
@@ -12,4 +12,4 @@
 
 __version__ = "1.0.0-dev1"
 
-# from . import lib
\ No newline at end of file
+# from . import lib
diff --git a/pairtools/cli/__init__.py b/pairtools/cli/__init__.py
index a1d85d81..9f42e44e 100644
--- a/pairtools/cli/__init__.py
+++ b/pairtools/cli/__init__.py
@@ -142,6 +142,7 @@ def _excepthook(exc_type, value, tb):
 
         sys.excepthook = _excepthook
 
+
 def common_io_options(func):
     @click.option(
         "--nproc-in",
@@ -200,5 +201,5 @@ def wrapper(*args, **kwargs):
     sample,
     filterbycov,
     header,
-    scaling
+    scaling,
 )
diff --git a/pairtools/cli/dedup.py b/pairtools/cli/dedup.py
index 55a3348c..74bfe29a 100644
--- a/pairtools/cli/dedup.py
+++ b/pairtools/cli/dedup.py
@@ -5,8 +5,8 @@
 import ast
 import pathlib
 
-# from distutils.log import warn
-# import warnings
+from .._logging import get_logger
+logger = get_logger()
 
 from ..lib import fileio, pairsam_format, headerops
 from . import cli, common_io_options
@@ -60,7 +60,16 @@
     " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed."
     " By default, statistics are not printed.",
 )
-
+@click.option(
+    "--output-bytile-stats",
+    type=str,
+    default="",
+    help="output file for duplicate statistics."
+    " If file exists, it will be open in the append mode."
+    " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed."
+    " By default, by-tile duplicate statistics are not printed."
+    " Note that the readID should be provided and contain tile information for this option. ",
+)
 ### Set the dedup method:
 @click.option(
     "--max-mismatch",
@@ -223,6 +232,7 @@ def dedup(
     output_dups,
     output_unmapped,
     output_stats,
+    output_bytile_stats,
     chunksize,
     carryover,
     max_mismatch,
@@ -260,6 +270,7 @@ def dedup(
         output_dups,
         output_unmapped,
         output_stats,
+        output_bytile_stats,
         chunksize,
         carryover,
         max_mismatch,
@@ -293,6 +304,7 @@ def dedup_py(
     output_dups,
     output_unmapped,
     output_stats,
+    output_bytile_stats,
     chunksize,
     carryover,
     max_mismatch,
@@ -350,8 +362,21 @@ def dedup_py(
         else None
     )
 
+    bytile_dups = False
+    if output_bytile_stats:
+        out_bytile_stats_stream = fileio.auto_open(
+                output_bytile_stats,
+                mode="w",
+                nproc=kwargs.get("nproc_out"),
+                command=kwargs.get("cmd_out", None),
+        )
+        bytile_dups = True
+        if not keep_parent_id:
+            logger.warning("Force output --parent-readID because --output-bytile-stats provided.")
+            keep_parent_id = True
+
     # generate empty PairCounter if stats output is requested:
-    out_stat = PairCounter() if output_stats else None
+    out_stat = PairCounter(bytile_dups=bytile_dups) if output_stats else None
 
     if not output_dups:
         outstream_dups = None
@@ -465,6 +490,9 @@ def dedup_py(
     if out_stat:
         out_stat.save(out_stats_stream)
 
+    if bytile_dups:
+        out_stat.save_bytile_dups(out_bytile_stats_stream)
+
     if instream != sys.stdin:
         instream.close()
 
diff --git a/pairtools/cli/stats.py b/pairtools/cli/stats.py
index f1162805..890988b0 100644
--- a/pairtools/cli/stats.py
+++ b/pairtools/cli/stats.py
@@ -10,6 +10,8 @@
 
 from ..lib.stats import PairCounter, do_merge
 
+from .._logging import get_logger
+logger = get_logger()
 
 UTIL_NAME = "pairtools_stats"
 
@@ -40,8 +42,26 @@
     default=False,
     help="Output stats in yaml format instead of table. ",
 )
+@click.option(
+    "--bytile-dups/--no-bytile-dups",
+    default=False,
+    help="If enabled, will analyse by-tile duplication statistics to estimate"
+    " library complexity more accurately."
+    " Requires parent_readID column to be saved by dedup (will be ignored otherwise)"
+    " Saves by-tile stats into --output_bytile-stats stream, or regular output if --output_bytile-stats is not provided.",
+)
+@click.option(
+    "--output-bytile-stats",
+    default="",
+    required=False,
+    help="output file for tile duplicate statistics."
+    " If file exists, it will be open in the append mode."
+    " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed."
+    " By default, by-tile duplicate statistics are not printed."
+    " Note that the readID and parent_readID should be provided and contain tile information for this option.",
+)
 @common_io_options
-def stats(input_path, output, merge, **kwargs):
+def stats(input_path, output, merge, bytile_dups, output_bytile_stats, **kwargs):
     """Calculate pairs statistics.
 
     INPUT_PATH : by default, a .pairs/.pairsam file to calculate statistics.
@@ -51,10 +71,15 @@ def stats(input_path, output, merge, **kwargs):
 
     The files with paths ending with .gz/.lz4 are decompressed by bgzip/lz4c.
     """
-    stats_py(input_path, output, merge, **kwargs)
+
+    stats_py(
+        input_path, output, merge, bytile_dups, output_bytile_stats, **kwargs,
+    )
 
 
-def stats_py(input_path, output, merge, **kwargs):
+def stats_py(
+    input_path, output, merge, bytile_dups, output_bytile_stats, **kwargs
+):
     if merge:
         do_merge(output, input_path, **kwargs)
         return
@@ -74,14 +99,25 @@ def stats_py(input_path, output, merge, **kwargs):
         nproc=kwargs.get("nproc_out"),
         command=kwargs.get("cmd_out", None),
     )
+    if bytile_dups and not output_bytile_stats:
+        output_bytile_stats = outstream
+    if output_bytile_stats:
+        bytile_dups = True
 
     header, body_stream = headerops.get_header(instream)
     cols = headerops.extract_column_names(header)
 
+    # Check necessary columns for reporting by-tile stats:
+    if bytile_dups and "parent_readID" not in cols:
+        logger.warning(
+            "No 'parent_readID' column in the file, not generating duplicate stats."
+        )
+        bytile_dups = False
+
     # new stats class stuff would come here ...
-    stats = PairCounter()
+    stats = PairCounter(bytile_dups=bytile_dups)
 
-    # collecting statistics
+    # Collecting statistics
     for chunk in pd.read_table(body_stream, names=cols, chunksize=100_000):
         stats.add_pairs_from_dataframe(chunk)
 
@@ -89,6 +125,9 @@ def stats_py(input_path, output, merge, **kwargs):
         chromsizes = headerops.extract_chromsizes(header)
         stats.add_chromsizes(chromsizes)
 
+    if bytile_dups:
+        stats.save_bytile_dups(output_bytile_stats)
+
     # save statistics to file ...
     stats.save(outstream, yaml=kwargs.get("yaml", False))
 
diff --git a/pairtools/lib/dedup.py b/pairtools/lib/dedup.py
index 29cccbbd..a7aac0a9 100644
--- a/pairtools/lib/dedup.py
+++ b/pairtools/lib/dedup.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-import warnings
 
 import scipy.spatial
 from scipy.sparse import coo_matrix
@@ -380,7 +379,7 @@ def streaming_dedup_cython(
 
         # take care of empty lines not at the end of the file separately
         if rawline and (not stripline):
-            warnings.warn("Empty line detected not at the end of the file")
+            logger.warning("Empty line detected not at the end of the file")
             continue
 
         if stripline:
diff --git a/pairtools/lib/stats.py b/pairtools/lib/stats.py
index c42370c9..167ce491 100644
--- a/pairtools/lib/stats.py
+++ b/pairtools/lib/stats.py
@@ -1,39 +1,12 @@
 import numpy as np
+import pandas as pd
+from scipy import special
 from collections.abc import Mapping
 import sys
 from . import fileio
 
-def do_merge(output, files_to_merge, **kwargs):
-    # Parse all stats files.
-    stats = []
-    for stat_file in files_to_merge:
-        f = fileio.auto_open(
-            stat_file,
-            mode="r",
-            nproc=kwargs.get("nproc_in"),
-            command=kwargs.get("cmd_in", None),
-        )
-        # use a factory method to instanciate PairCounter
-        stat = PairCounter.from_file(f)
-        stats.append(stat)
-        f.close()
-
-    # combine stats from several files (files_to_merge):
-    out_stat = sum(stats)
-
-    # Save merged stats.
-    outstream = fileio.auto_open(
-        output,
-        mode="w",
-        nproc=kwargs.get("nproc_out"),
-        command=kwargs.get("cmd_out", None),
-    )
-
-    # save statistics to file ...
-    out_stat.save(outstream)
-
-    if outstream != sys.stdout:
-        outstream.close()
+from .._logging import get_logger
+logger = get_logger()
 
 
 class PairCounter(Mapping):
@@ -52,7 +25,7 @@ class PairCounter(Mapping):
     _SEP = "\t"
     _KEY_SEP = "/"
 
-    def __init__(self, min_log10_dist=0, max_log10_dist=9, log10_dist_bin_step=0.25):
+    def __init__(self, min_log10_dist=0, max_log10_dist=9, log10_dist_bin_step=0.25, bytile_dups=False):
         self._stat = {}
         # some variables used for initialization:
         # genomic distance bining for the ++/--/-+/+- distribution
@@ -99,6 +72,29 @@ def __init__(self, min_log10_dist=0, max_log10_dist=9, log10_dist_bin_step=0.25)
             "++": np.zeros(len(self._dist_bins), dtype=np.int),
         }
 
+        # Summaries are derived from other stats and are recalculated on merge
+        self._stat["summary"] = dict(
+            [
+                ("frac_cis", 0),
+                ("frac_cis_1kb+", 0),
+                ("frac_cis_2kb+", 0),
+                ("frac_cis_4kb+", 0),
+                ("frac_cis_10kb+", 0),
+                ("frac_cis_20kb+", 0),
+                ("frac_cis_40kb+", 0),
+                ("frac_dups", 0),
+                ("complexity_naive", 0),
+            ]
+        )
+        self._save_bytile_dups = bytile_dups
+        if self._save_bytile_dups:
+            self._bytile_dups = pd.DataFrame(
+                index=pd.MultiIndex(
+                    levels=[[], []], codes=[[], []], names=["tile", "parent_tile"]
+                )
+            )
+        self._summaries_calculated = False
+
     def __getitem__(self, key):
         if isinstance(key, str):
             # let's strip any unintentional '/'
@@ -179,6 +175,54 @@ def __iter__(self):
     def __len__(self):
         return len(self._stat)
 
+    def calculate_summaries(self):
+        """calculate summary statistics (fraction of cis pairs at different cutoffs,
+        complexity estimate) based on accumulated counts. Results are saved into
+        self._stat['summary']
+        """
+
+        self._stat["summary"]["frac_dups"] = (
+            (self._stat["total_dups"] / self._stat["total_mapped"])
+            if self._stat["total_mapped"] > 0
+            else 0
+        )
+
+        for cis_count in (
+            "cis",
+            "cis_1kb+",
+            "cis_2kb+",
+            "cis_4kb+",
+            "cis_10kb+",
+            "cis_20kb+",
+            "cis_40kb+",
+        ):
+            self._stat["summary"][f"frac_{cis_count}"] = (
+                (self._stat[cis_count] / self._stat["total_nodups"])
+                if self._stat["total_nodups"] > 0
+                else 0
+            )
+
+        self._stat["summary"]["complexity_naive"] = estimate_library_complexity(
+            self._stat["total_mapped"], self._stat["total_dups"], 0
+        )
+
+        if self._save_bytile_dups:
+            # Estimate library complexity with information by tile, if provided:
+            if self._bytile_dups.shape[0] > 0:
+                self._stat["dups_by_tile_median"] = (
+                    self._bytile_dups["dup_count"].median() * self._bytile_dups.shape[0]
+                )
+            if "dups_by_tile_median" in self._stat.keys():
+                self._stat["summary"][
+                    "complexity_dups_by_tile_median"
+                ] = estimate_library_complexity(
+                    self._stat["total_mapped"],
+                    self._stat["total_dups"],
+                    self._stat["dups_by_tile_median"],
+                )
+
+        self._summaries_calculated = True
+
     @classmethod
     def from_file(cls, file_handle):
         """create instance of PairCounter from file
@@ -346,7 +390,7 @@ def add_pair(self, chrom1, pos1, strand1, chrom2, pos2, strand2, pair_type):
 
     def add_pairs_from_dataframe(self, df, unmapped_chrom="!"):
         """Gather statistics for Hi-C pairs in a dataframe and add to the PairCounter.
-
+    
         Parameters
         ----------
         df: pd.DataFrame
@@ -385,22 +429,25 @@ def add_pairs_from_dataframe(self, df, unmapped_chrom="!"):
             mask_dups = df_mapped["duplicate"]
         else:
             mask_dups = df_mapped["pair_type"] == "DD"
-        dups_count = mask_dups.sum()
+        df_dups = df_mapped[mask_dups]
+        dups_count = df_dups.shape[0]
         self._stat["total_dups"] += int(dups_count)
         self._stat["total_nodups"] += int(mapped_count - dups_count)
 
+        df_nodups = df_mapped.loc[~mask_dups, :]
+        mask_cis = df_nodups["chrom1"] == df_nodups["chrom2"]
+        df_cis = df_nodups.loc[mask_cis, :].copy()
+
         # Count pairs per chromosome:
         for (chrom1, chrom2), chrom_count in (
-            df_mapped[["chrom1", "chrom2"]].value_counts().items()
+            df_nodups[["chrom1", "chrom2"]].value_counts().items()
         ):
             self._stat["chrom_freq"][(chrom1, chrom2)] = (
                 self._stat["chrom_freq"].get((chrom1, chrom2), 0) + chrom_count
             )
 
         # Count cis-trans by pairs:
-        df_nodups = df_mapped.loc[~mask_dups, :]
-        mask_cis = df_nodups["chrom1"] == df_nodups["chrom2"]
-        df_cis = df_nodups.loc[mask_cis, :].copy()
+
         self._stat["cis"] += df_cis.shape[0]
         self._stat["trans"] += df_nodups.shape[0] - df_cis.shape[0]
         dist = np.abs(df_cis["pos2"].values - df_cis["pos1"].values)
@@ -417,6 +464,13 @@ def add_pairs_from_dataframe(self, df, unmapped_chrom="!"):
         self._stat["cis_20kb+"] += int(np.sum(dist >= 20000))
         self._stat["cis_40kb+"] += int(np.sum(dist >= 40000))
 
+        ### Add by-tile dups
+        if self._save_bytile_dups and (df_dups.shape[0] > 0):
+            bytile_dups = analyse_bytile_duplicate_stats(df_dups)
+            self._bytile_dups = self._bytile_dups.add(
+                bytile_dups, fill_value=0
+            ).astype(int)
+
     def add_chromsizes(self, chromsizes):
         """ Add chromsizes field to the output stats
 
@@ -524,6 +578,13 @@ def flatten(self):
                         )
                         # store key,value pair:
                         flat_stat[formatted_key] = freq
+                elif (k == "summary") and v:
+                    for key, frac in v.items():
+                        formatted_key = self._KEY_SEP.join(["{}", "{}"]).format(
+                            k, key
+                        )
+                        # store key,value pair:
+                        flat_stat[formatted_key] = frac
 
         # return flattened dict
         return flat_stat
@@ -551,7 +612,9 @@ def format(self):
                         for dirs, freqs in v.items():
                             # last bin is treated differently: "100000+" vs "1200-3000":
                             if i != len(self._dist_bins) - 1:
-                                dist = "{}-{}".format(self._dist_bins[i], self._dist_bins[i + 1])
+                                dist = "{}-{}".format(
+                                    self._dist_bins[i], self._dist_bins[i + 1]
+                                )
                             else:
                                 dist = "{}+".format(self._dist_bins[i])
                             if dist not in freqs_dct.keys():
@@ -568,14 +631,20 @@ def format(self):
                 elif (k == "chrom_freq") and v:
                     freqs = {}
                     for (chrom1, chrom2), freq in v.items():
-                        freqs[self._KEY_SEP.join(["{}", "{}"]).format(chrom1, chrom2)] = freq
+                        freqs[
+                            self._KEY_SEP.join(["{}", "{}"]).format(chrom1, chrom2)
+                        ] = freq
                         # store key,value pair:
                     formatted_stat[k] = deepcopy(freqs)
+                elif (k == "summary") and v:
+                    summary_stats = {}
+                    for key, frac in v.items():
+                        summary_stats[key] = frac
+                    formatted_stat[k] = deepcopy(summary_stats)
 
         # return formatted dict
         return formatted_stat
 
-
     def save(self, outstream, yaml=False):
         """save PairCounter to tab-delimited text file.
         Flattened version of PairCounter is stored in the file.
@@ -595,11 +664,152 @@ def save(self, outstream, yaml=False):
         sort(merge(A,merge(B,C))) == sort(merge(merge(A,B),C))
         """
 
+        if not self._summaries_calculated:
+            self.calculate_summaries()
+
         # write flattened version of the PairCounter to outstream
         if yaml:
             import yaml
+
             data = self.format()
             yaml.dump(data, outstream, default_flow_style=False)
         else:
             for k, v in self.flatten().items():
                 outstream.write("{}{}{}\n".format(k, self._SEP, v))
+
+    def save_bytile_dups(self, outstream):
+        """save bytile duplication counts to a tab-delimited text file.
+        Parameters
+        ----------
+        outstream: file handle
+        """
+        if self._save_bytile_dups:
+            self._bytile_dups.reset_index().to_csv(outstream, sep="\t", index=False)
+        else:
+            logger.error("Bytile dups are not calculated, cannot save.")
+
+
+##################
+# Other functions:
+
+def do_merge(output, files_to_merge, **kwargs):
+    # Parse all stats files.
+    stats = []
+    for stat_file in files_to_merge:
+        f = fileio.auto_open(
+            stat_file,
+            mode="r",
+            nproc=kwargs.get("nproc_in"),
+            command=kwargs.get("cmd_in", None),
+        )
+        # use a factory method to instanciate PairCounter
+        stat = PairCounter.from_file(f)
+        stats.append(stat)
+        f.close()
+
+    # combine stats from several files (files_to_merge):
+    out_stat = sum(stats)
+
+    # Save merged stats.
+    outstream = fileio.auto_open(
+        output,
+        mode="w",
+        nproc=kwargs.get("nproc_out"),
+        command=kwargs.get("cmd_out", None),
+    )
+
+    # save statistics to file ...
+    out_stat.save(outstream)
+
+    if outstream != sys.stdout:
+        outstream.close()
+
+
+def estimate_library_complexity(nseq, ndup, nopticaldup=0):
+    """Estimate library complexity accounting for optical/clustering duplicates
+    Parameters
+    ----------
+    nseq : int
+        Total number of sequences
+    ndup : int
+        Total number of duplicates
+    nopticaldup : int, optional
+        Number of non-PCR duplicates, by default 0
+    Returns
+    -------
+    float
+        Estimated complexity
+    """
+    nseq = nseq - nopticaldup
+    if nseq == 0:
+        logger.warning("Empty of fully duplicated library, can't estimate complexity")
+        return 0
+    ndup = ndup - nopticaldup
+    u = (nseq - ndup) / nseq
+    if u==0:
+        logger.warning("All the sequences are duplicates. Do you run complexity estimation on duplicates file?")
+        return 0
+    seq_to_complexity = special.lambertw(-np.exp(-1 / u) / u).real + 1 / u
+    complexity = float(nseq / seq_to_complexity) # clean np.int64 data type
+    return complexity
+
+
+def analyse_bytile_duplicate_stats(df_dups, tile_dup_regex=False):
+    """Count by-tile duplicates
+    Parameters
+    ----------
+    dups : pd.DataFrame
+        Dataframe with duplicates that contains pared read IDs
+    tile_dup_regex : bool, optional
+        See extract_tile_info for details, by default False
+    Returns
+    -------
+    pd.DataFrame
+        Grouped multi-indexed dataframe of pairwise by-tile duplication counts
+    """
+
+    df_dups = df_dups.copy()
+
+    df_dups["tile"] = extract_tile_info(df_dups["readID"], regex=tile_dup_regex)
+    df_dups["parent_tile"] = extract_tile_info(df_dups["parent_readID"], regex=tile_dup_regex)
+
+    df_dups["same_tile"] = (df_dups["tile"] == df_dups["parent_tile"])
+    bytile_dups = (
+        df_dups.groupby(["tile", "parent_tile"])
+        .size()
+        .reset_index(name="dup_count")
+        .sort_values(["tile", "parent_tile"])
+    )
+    bytile_dups[["tile", "parent_tile"]] = np.sort(
+        bytile_dups[["tile", "parent_tile"]].values, axis=1
+    )
+    bytile_dups = bytile_dups.groupby(["tile", "parent_tile"]).sum()
+    return bytile_dups
+
+
+def extract_tile_info(series, regex=False):
+    """Extract the name of the tile for each read name in the series
+    Parameters
+    ----------
+    series : pd.Series
+        Series containing read IDs
+    regex : bool, optional
+        Regex to extract fields from the read IDs that correspond to tile IDs.
+        By default False, uses a faster predefined approach for typical Illumina
+        read names
+        Example: r"(?:\w+):(?:\w+):(\w+):(\w+):(\w+):(?:\w+):(?:\w+)"
+    Returns
+    -------
+    Series
+        Series containing tile IDs as strings
+    """
+    if regex:
+        split = series.str.extractall(regex).unstack().droplevel(1, axis=1)
+        if split.shape[1]<4:
+            raise ValueError(f"Unable to convert tile names, does your readID have the tile information?\nHint: SRA removes tile information from readID.\nSample of your readIDs:\n{series.head()}")
+        return split[0] + ":" + split[1] + ":" + split[2]
+    else:
+        split = series.str.split(":", expand=True)
+        if split.shape[1]<5:
+            raise ValueError(f"Unable to convert tile names, does your readID have the tile information?\nHint: SRA removes tile information from readID.\nSample of your readIDs:\n{series.head()}")
+        return split[2] + ":" + split[3] + ":" + split[4]
diff --git a/tests/data/mock.4stats.pairs b/tests/data/mock.4stats.pairs
new file mode 100644
index 00000000..73c46673
--- /dev/null
+++ b/tests/data/mock.4stats.pairs
@@ -0,0 +1,21 @@
+## pairs format v1.0.0
+#shape: upper triangle
+#genome_assembly: unknown
+#samheader: @SQ	SN:chr1	LN:100
+#samheader: @SQ	SN:chr2	LN:100
+#samheader: @SQ	SN:chr3	LN:100
+#samheader: @PG	ID:bwa	PN:bwa	VN:0.7.15-r1140	CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz
+#chromosomes: chr2 chr3 chr1
+#chromsize: chr2 100
+#chromsize: chr3 100
+#chromsize: chr1 100
+#columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type
+readid01	chr1	1	chr1	50	+	+	UU
+readid02	chr1	1	chr1	50	+	+	DD
+readid03	chr1	1	chr1	2	+	+	UU
+readid04	chr1	1	chr1	3	+	+	UR
+readid05	chr1	1	chr2	20	+	+	UU
+readid06	chr2	1	chr3	2	+	+	UU
+readid07	!	0	chr1	3	-	+	NU
+readid08	!	0	chr1	3	-	+	MU
+readid09	!	0	!	0	-	-	WW
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 344ef56f..d3d6c985 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -3,12 +3,13 @@
 import sys
 import subprocess
 from nose.tools import assert_raises
+import numpy as np
 
 testdir = os.path.dirname(os.path.realpath(__file__))
 
 
 def test_mock_pairsam():
-    mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam")
+    mock_pairsam_path = os.path.join(testdir, "data", "mock.4stats.pairs")
     try:
         result = subprocess.check_output(
             ["python", "-m", "pairtools", "stats", mock_pairsam_path],
@@ -25,12 +26,16 @@ def test_mock_pairsam():
     )
 
     for k in stats:
-        stats[k] = int(stats[k])
+        try:
+            stats[k] = int(stats[k])
+        except ValueError:
+            stats[k] = float(stats[k])
     print(stats)
 
-    assert stats["total"] == 8
+    assert stats["total"] == 9
     assert stats["total_single_sided_mapped"] == 2
-    assert stats["total_mapped"] == 5
+    assert stats["total_mapped"] == 6
+    assert stats["total_dups"] == 1
     assert stats["cis"] == 3
     assert stats["trans"] == 2
     assert stats["pair_types/UU"] == 4
@@ -38,6 +43,7 @@ def test_mock_pairsam():
     assert stats["pair_types/WW"] == 1
     assert stats["pair_types/UR"] == 1
     assert stats["pair_types/MU"] == 1
+    assert stats["pair_types/DD"] == 1
     assert stats["chrom_freq/chr1/chr2"] == 1
     assert stats["chrom_freq/chr1/chr1"] == 3
     assert stats["chrom_freq/chr2/chr3"] == 1
@@ -47,7 +53,17 @@ def test_mock_pairsam():
         if k.startswith("dist_freq")
         and k not in ["dist_freq/1-2/++", "dist_freq/2-3/++", "dist_freq/32-56/++"]
     )
-
+    assert stats["dist_freq/1-2/++"] == 1
+    assert stats["dist_freq/2-3/++"] == 1
+    assert stats["dist_freq/32-56/++"] == 1
+    assert stats["summary/frac_cis"] == 0.6
+    assert stats["summary/frac_cis_1kb+"] == 0
+    assert stats["summary/frac_cis_2kb+"] == 0
+    assert stats["summary/frac_cis_4kb+"] == 0
+    assert stats["summary/frac_cis_10kb+"] == 0
+    assert stats["summary/frac_cis_20kb+"] == 0
+    assert stats["summary/frac_cis_40kb+"] == 0
+    assert np.isclose(stats["summary/frac_dups"], 1 / 6)
     assert stats["dist_freq/1-2/++"] == 1
     assert stats["dist_freq/2-3/++"] == 1
     assert stats["dist_freq/32-56/++"] == 1