+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/0.7.1/html/_images/DL-Flowchart.png b/docs/0.7.1/html/_images/DL-Flowchart.png
new file mode 100644
index 000000000..696eeb5dc
Binary files /dev/null and b/docs/0.7.1/html/_images/DL-Flowchart.png differ
diff --git a/docs/0.7.1/html/_images/histogram_example_0.png b/docs/0.7.1/html/_images/histogram_example_0.png
new file mode 100644
index 000000000..9b8301363
Binary files /dev/null and b/docs/0.7.1/html/_images/histogram_example_0.png differ
diff --git a/docs/0.7.1/html/_images/histogram_example_1.png b/docs/0.7.1/html/_images/histogram_example_1.png
new file mode 100644
index 000000000..062dfdbb9
Binary files /dev/null and b/docs/0.7.1/html/_images/histogram_example_1.png differ
diff --git a/docs/0.7.1/html/_images/histogram_example_2.png b/docs/0.7.1/html/_images/histogram_example_2.png
new file mode 100644
index 000000000..1aedf7549
Binary files /dev/null and b/docs/0.7.1/html/_images/histogram_example_2.png differ
diff --git a/docs/0.7.1/html/_sources/API.rst.txt b/docs/0.7.1/html/_sources/API.rst.txt
new file mode 100644
index 000000000..fdbf2242b
--- /dev/null
+++ b/docs/0.7.1/html/_sources/API.rst.txt
@@ -0,0 +1,16 @@
+.. _API:
+
+API
+***
+
+The API is split into 4 main components: Profilers, Labelers, Data Readers, and
+Validators.
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Contents:
+
+ dataprofiler.data_readers
+ dataprofiler.profilers
+ dataprofiler.labelers
+ dataprofiler.validators
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/add_new_model_to_data_labeler.nblink.txt b/docs/0.7.1/html/_sources/add_new_model_to_data_labeler.nblink.txt
new file mode 100644
index 000000000..130e413fc
--- /dev/null
+++ b/docs/0.7.1/html/_sources/add_new_model_to_data_labeler.nblink.txt
@@ -0,0 +1,3 @@
+{
+ "path": "../../feature_branch/examples/add_new_model_to_data_labeler.ipynb"
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/data_labeling.rst.txt b/docs/0.7.1/html/_sources/data_labeling.rst.txt
new file mode 100644
index 000000000..db76fe791
--- /dev/null
+++ b/docs/0.7.1/html/_sources/data_labeling.rst.txt
@@ -0,0 +1,365 @@
+.. _data_labeling:
+
+Labeler (Sensitive Data)
+************************
+
+In this library, the term *data labeling* refers to entity recognition.
+
+Builtin to the data profiler is a classifier which evaluates the complex data types of the dataset.
+For structured data, it determines the complex data type of each column. When
+running the data profile, it uses the default data labeling model builtin to the
+library. However, the data labeler allows users to train their own data labeler
+as well.
+
+*Data Labels* are determined per cell for structured data (column/row when
+the *profiler* is used) or at the character level for unstructured data. This
+is a list of the default labels.
+
+* UNKNOWN
+* ADDRESS
+* BAN (bank account number, 10-18 digits)
+* CREDIT_CARD
+* EMAIL_ADDRESS
+* UUID
+* HASH_OR_KEY (md5, sha1, sha256, random hash, etc.)
+* IPV4
+* IPV6
+* MAC_ADDRESS
+* PERSON
+* PHONE_NUMBER
+* SSN
+* URL
+* US_STATE
+* DRIVERS_LICENSE
+* DATE
+* TIME
+* DATETIME
+* INTEGER
+* FLOAT
+* QUANTITY
+* ORDINAL
+
+
+Identify Entities in Structured Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Makes predictions and identifying labels:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ # load data and data labeler
+ data = dp.Data("your_data.csv")
+ data_labeler = dp.DataLabeler(labeler_type='structured')
+
+ # make predictions and get labels per cell
+ predictions = data_labeler.predict(data)
+
+Identify Entities in Unstructured Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Predict which class characters belong to in unstructured text:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ data_labeler = dp.DataLabeler(labeler_type='unstructured')
+
+ # Example sample string, must be in an array (multiple arrays can be passed)
+ sample = ["Help\tJohn Macklemore\tneeds\tfood.\tPlease\tCall\t555-301-1234."
+ "\tHis\tssn\tis\tnot\t334-97-1234. I'm a BAN: 000043219499392912.\n"]
+
+ # Prediction what class each character belongs to
+ model_predictions = data_labeler.predict(
+ sample, predict_options=dict(show_confidences=True))
+
+ # Predictions / confidences are at the character level
+ final_results = model_predictions["pred"]
+ final_confidences = model_predictions["conf"]
+
+It's also possible to change output formats, output similar to a **SpaCy** format:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ data_labeler = dp.DataLabeler(labeler_type='unstructured', trainable=True)
+
+ # Example sample string, must be in an array (multiple arrays can be passed)
+ sample = ["Help\tJohn Macklemore\tneeds\tfood.\tPlease\tCall\t555-301-1234."
+ "\tHis\tssn\tis\tnot\t334-97-1234. I'm a BAN: 000043219499392912.\n"]
+
+ # Set the output to the NER format (start position, end position, label)
+ data_labeler.set_params(
+ { 'postprocessor': { 'output_format':'ner', 'use_word_level_argmax':True } }
+ )
+
+ results = data_labeler.predict(sample)
+
+ print(results)
+
+Train a New Data Labeler
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mechanism for training your own data labeler on their own set of structured data
+(tabular):
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ # Will need one column with a default label of UNKNOWN
+ data = dp.Data("your_file.csv")
+
+ data_labeler = dp.train_structured_labeler(
+ data=data,
+ save_dirpath="/path/to/save/labeler",
+ epochs=2
+ )
+
+ data_labeler.save_to_disk("my/save/path") # Saves the data labeler for reuse
+
+Load an Existing Data Labeler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mechanism for loading an existing data_labeler:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ data_labeler = dp.DataLabeler(
+ labeler_type='structured', dirpath="/path/to/my/labeler")
+
+ # get information about the parameters/inputs/output formats for the DataLabeler
+ data_labeler.help()
+
+Extending a Data Labeler with Transfer Learning
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Extending or changing labels of a data labeler w/ transfer learning:
+Note: By default, **a labeler loaded will not be trainable**. In order to load a
+trainable DataLabeler, the user must set `trainable=True` or load a labeler
+using the `TrainableDataLabeler` class.
+
+The following illustrates how to change the labels:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ labels = ['label1', 'label2', ...] # new label set can also be an encoding dict
+ data = dp.Data("your_file.csv") # contains data with new labels
+
+ # load default structured Data Labeler w/ trainable set to True
+ data_labeler = dp.DataLabeler(labeler_type='structured', trainable=True)
+
+ # this will use transfer learning to retrain the data labeler on your new
+ # dataset and labels.
+ # NOTE: data must be in an acceptable format for the preprocessor to interpret.
+ # please refer to the preprocessor/model for the expected data format.
+ # Currently, the DataLabeler cannot take in Tabular data, but requires
+ # data to be ingested with two columns [X, y] where X is the samples and
+ # y is the labels.
+ model_results = data_labeler.fit(x=data['samples'], y=data['labels'],
+ validation_split=0.2, epochs=2, labels=labels)
+
+ # final_results, final_confidences are a list of results for each epoch
+ epoch_id = 0
+ final_results = model_results[epoch_id]["pred"]
+ final_confidences = model_results[epoch_id]["conf"]
+
+The following illustrates how to extend the labels:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ new_labels = ['label1', 'label2', ...]
+ data = dp.Data("your_file.csv") # contains data with new labels
+
+ # load default structured Data Labeler w/ trainable set to True
+ data_labeler = dp.DataLabeler(labeler_type='structured', trainable=True)
+
+ # this will maintain current labels and model weights, but extend the model's
+ # labels
+ for label in new_labels:
+ data_labeler.add_label(label)
+
+ # NOTE: a user can also add a label which maps to the same index as an existing
+ # label
+ # data_labeler.add_label(label, same_as='')
+
+ # For a trainable model, the user must then train the model to be able to
+ # continue using the labeler since the model's graph has likely changed
+ # NOTE: data must be in an acceptable format for the preprocessor to interpret.
+ # please refer to the preprocessor/model for the expected data format.
+ # Currently, the DataLabeler cannot take in Tabular data, but requires
+ # data to be ingested with two columns [X, y] where X is the samples and
+ # y is the labels.
+ model_results = data_labeler.fit(x=data['samples'], y=data['labels'],
+ validation_split=0.2, epochs=2)
+
+ # final_results, final_confidences are a list of results for each epoch
+ epoch_id = 0
+ final_results = model_results[epoch_id]["pred"]
+ final_confidences = model_results[epoch_id]["conf"]
+
+
+Changing pipeline parameters:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ # load default Data Labeler
+ data_labeler = dp.DataLabeler(labeler_type='structured')
+
+ # change parameters of specific component
+ data_labeler.preprocessor.set_params({'param1': 'value1'})
+
+ # change multiple simultaneously.
+ data_labeler.set_params({
+ 'preprocessor': {'param1': 'value1'},
+ 'model': {'param2': 'value2'},
+ 'postprocessor': {'param3': 'value3'}
+ })
+
+
+Build Your Own Data Labeler
+===========================
+
+The DataLabeler has 3 main components: preprocessor, model, and postprocessor.
+To create your own DataLabeler, each one would have to be created or an
+existing component can be reused.
+
+Given a set of the 3 components, you can construct your own DataLabeler:
+
+.. code-block:: python
+ from dataprofiler.labelers.base_data_labeler import BaseDataLabeler, \
+ TrainableDataLabeler
+ from dataprofiler.labelers.character_level_cnn_model import CharacterLevelCnnModel
+ from dataprofiler.labelers.data_processing import \
+ StructCharPreprocessor, StructCharPostprocessor
+
+ # load a non-trainable data labeler
+ model = CharacterLevelCnnModel(...)
+ preprocessor = StructCharPreprocessor(...)
+ postprocessor = StructCharPostprocessor(...)
+
+ data_labeler = BaseDataLabeler.load_with_components(
+ preprocessor=preprocessor, model=model, postprocessor=postprocessor)
+
+ # check for basic compatibility between the processors and the model
+ data_labeler.check_pipeline()
+
+
+ # load trainable data labeler
+ data_labeler = TrainableDataLabeler.load_with_components(
+ preprocessor=preprocessor, model=model, postprocessor=postprocessor)
+
+ # check for basic compatibility between the processors and the model
+ data_labeler.check_pipeline()
+
+Option for swapping out specific components of an existing labeler.
+
+.. code-block:: python
+
+ import dataprofiler as dp
+ from dataprofiler.labelers.character_level_cnn_model import \
+ CharacterLevelCnnModel
+ from dataprofiler.labelers.data_processing import \
+ StructCharPreprocessor, StructCharPostprocessor
+
+ model = CharacterLevelCnnModel(...)
+ preprocessor = StructCharPreprocessor(...)
+ postprocessor = StructCharPostprocessor(...)
+
+ data_labeler = dp.DataLabeler(labeler_type='structured')
+ data_labeler.set_preprocessor(preprocessor)
+ data_labeler.set_model(model)
+ data_labeler.set_postprocessor(postprocessor)
+
+ # check for basic compatibility between the processors and the model
+ data_labeler.check_pipeline()
+
+
+Model Component
+~~~~~~~~~~~~~~~
+
+In order to create your own model component for data labeling, you can utilize
+the `BaseModel` class from `dataprofiler.labelers.base_model` and
+overriding the abstract class methods.
+
+Reviewing `CharacterLevelCnnModel` from
+`dataprofiler.labelers.character_level_cnn_model` illustrates the functions
+which need an override.
+
+#. `__init__`: specifying default parameters and calling base `__init__`
+#. `_validate_parameters`: validating parameters given by user during setting
+#. `_need_to_reconstruct_model`: flag for when to reconstruct a model (i.e.
+ parameters change or labels change require a model reconstruction)
+#. `_construct_model`: initial construction of the model given the parameters
+#. `_reconstruct_model`: updates model architecture for new label set while
+ maintaining current model weights
+#. `fit`: mechanism for the model to learn given training data
+#. `predict`: mechanism for model to make predictions on data
+#. `details`: prints a summary of the model construction
+#. `save_to_disk`: saves model and model parameters to disk
+#. `load_from_disk`: loads model given a path on disk
+
+
+Preprocessor Component
+~~~~~~~~~~~~~~~~~~~~~~
+
+In order to create your own preprocessor component for data labeling, you can
+utilize the `BaseDataPreprocessor` class
+from `dataprofiler.labelers.data_processing` and override the abstract class
+methods.
+
+Reviewing `StructCharPreprocessor` from
+`dataprofiler.labelers.data_processing` illustrates the functions which
+need an override.
+
+#. `__init__`: passing parameters to the base class and executing any
+ extraneous calculations to be saved as parameters
+#. `_validate_parameters`: validating parameters given by user during
+ setting
+#. `process`: takes in the user data and converts it into an digestible,
+ iterable format for the model
+#. `set_params` (optional): if a parameter requires processing before setting,
+ a user can override this function to assist with setting the parameter
+#. `_save_processor` (optional): if a parameter is not JSON serializable, a
+ user can override this function to assist in saving the processor and its
+ parameters
+#. `load_from_disk` (optional): if a parameter(s) is not JSON serializable, a
+ user can override this function to assist in loading the processor
+
+Postprocessor Component
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The postprocessor is nearly identical to the preprocessor except it handles
+the output of the model for processing. In order to create your own
+postprocessor component for data labeling, you can utilize the
+`BaseDataPostprocessor` class from `dataprofiler.labelers.data_processing`
+and override the abstract class methods.
+
+Reviewing `StructCharPostprocessor` from
+`dataprofiler.labelers.data_processing` illustrates the functions which
+need an override.
+
+#. `__init__`: passing parameters to the base class and executing any
+ extraneous calculations to be saved as parameters
+#. `_validate_parameters`: validating parameters given by user during
+ setting
+#. `process`: takes in the output of the model and processes for output to
+ the user
+#. `set_params` (optional): if a parameter requires processing before setting,
+ a user can override this function to assist with setting the parameter
+#. `_save_processor` (optional): if a parameter is not JSON serializable, a
+ user can override this function to assist in saving the processor and its
+ parameters
+#. `load_from_disk` (optional): if a parameter(s) is not JSON serializable, a
+ user can override this function to assist in loading the processor
diff --git a/docs/0.7.1/html/_sources/data_reader.nblink.txt b/docs/0.7.1/html/_sources/data_reader.nblink.txt
new file mode 100644
index 000000000..4722970da
--- /dev/null
+++ b/docs/0.7.1/html/_sources/data_reader.nblink.txt
@@ -0,0 +1,3 @@
+{
+ "path": "../../feature_branch/examples/data_readers.ipynb"
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/data_readers.rst.txt b/docs/0.7.1/html/_sources/data_readers.rst.txt
new file mode 100644
index 000000000..45e815b63
--- /dev/null
+++ b/docs/0.7.1/html/_sources/data_readers.rst.txt
@@ -0,0 +1,139 @@
+.. _data_readers:
+
+Data Readers
+************
+
+The `Data` class itself will identify then output one of the following `Data` class types.
+Using the data reader is easy, just pass it through the Data object.
+
+.. code-block:: python
+
+ import dataprofiler as dp
+ data = dp.Data("your_file.csv")
+
+The supported file types are:
+
+* CSV file (or any delimited file)
+* JSON object
+* Avro file
+* Parquet file
+* Text file
+* Pandas DataFrame
+* A URL that points to one of the supported file types above
+
+It's also possible to specifically call one of the data classes such as the following command:
+
+.. code-block:: python
+
+ from dataprofiler.data_readers.csv_data import CSVData
+ data = CSVData("your_file.csv", options={"delimiter": ","})
+
+Additionally any of the data classes can be loaded using a URL:
+
+.. code-block:: python
+
+ import dataprofiler as dp
+ data = dp.Data("https://you_website.com/your_file.file", options={"verify_ssl": "True"})
+
+Below are descriptions of the various `Data` classes and the available options.
+
+CSVData
+=======
+
+Data class for loading datasets of type CSV. Can be specified by passing
+in memory data or via a file path. Options pertaining the CSV may also
+be specified using the options dict parameter.
+
+`CSVData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* delimiter - Must be a string, for example `"delimiter": ","`
+* data_format - must be a string, possible choices: "dataframe", "records"
+* selected_columns - columns being selected from the entire dataset, must be a
+ list `["column 1", "ssn"]`
+* header - Define the header, for example
+
+ * `"header": 'auto'` for auto detection
+ * `"header": None` for no header
+ * `"header": ` to specify the header row (0 based index)
+
+JSONData
+========
+
+Data class for loading datasets of type JSON. Can be specified by
+passing in memory data or via a file path. Options pertaining the JSON
+may also be specified using the options dict parameter. JSON data can be
+accessed via the "data" property, the "metadata" property, and the
+"data_and_metadata" property.
+
+`JSONData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format - must be a string, choices: "dataframe", "records", "json", "flattened_dataframe"
+
+ * "flattened_dataframe" is best used for JSON structure typically found in data streams that contain
+ nested lists of dictionaries and a payload. For example: `{"data": [ columns ], "response": 200}`
+* selected_keys - columns being selected from the entire dataset, must be a list `["column 1", "ssn"]`
+* payload_keys - The dictionary keys for the payload of the JSON, typically called "data"
+ or "payload". Defaults to ["data", "payload", "response"].
+
+
+AVROData
+========
+
+Data class for loading datasets of type AVRO. Can be specified by
+passing in memory data or via a file path. Options pertaining the AVRO
+may also be specified using the options dict parameter.
+
+`AVROData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format - must be a string, choices: "dataframe", "records", "avro", "json", "flattened_dataframe"
+
+ * "flattened_dataframe" is best used for AVROs with a JSON structure typically found in data streams that contain
+ nested lists of dictionaries and a payload. For example: `{"data": [ columns ], "response": 200}`
+* selected_keys - columns being selected from the entire dataset, must be a list `["column 1", "ssn"]`
+
+ParquetData
+===========
+
+Data class for loading datasets of type PARQUET. Can be specified by
+passing in memory data or via a file path. Options pertaining the
+PARQUET may also be specified using the options dict parameter.
+
+`ParquetData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format - must be a string, choices: "dataframe", "records", "json"
+* selected_keys - columns being selected from the entire dataset, must be a list `["column 1", "ssn"]`
+
+TextData
+========
+
+Data class for loading datasets of type TEXT. Can be specified by
+passing in memory data or via a file path. Options pertaining the TEXT
+may also be specified using the options dict parameter.
+
+`TextData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format: user selected format in which to return data. Currently only supports "text".
+* samples_per_line - chunks by which to read in the specified dataset
+
+Data Using a URL
+================
+
+Data class for loading datasets of any type using a URL. Specified by passing in
+any valid URL that points to one of the valid data types. Options pertaining the
+URL may also be specified using the options dict parameter.
+
+`Data(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* verify_ssl: must be a boolean string, choices: "True", "False". Set to "True" by default.
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.avro_data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.avro_data.rst.txt
new file mode 100644
index 000000000..d3227df29
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.avro_data.rst.txt
@@ -0,0 +1,7 @@
+Avro Data
+=========
+
+.. automodule:: dataprofiler.data_readers.avro_data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.base_data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.base_data.rst.txt
new file mode 100644
index 000000000..b82883cb9
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.base_data.rst.txt
@@ -0,0 +1,7 @@
+Base Data
+=========
+
+.. automodule:: dataprofiler.data_readers.base_data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.csv_data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.csv_data.rst.txt
new file mode 100644
index 000000000..85a625d69
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.csv_data.rst.txt
@@ -0,0 +1,7 @@
+CSV Data
+========
+
+.. automodule:: dataprofiler.data_readers.csv_data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.data.rst.txt
new file mode 100644
index 000000000..813e81805
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.data.rst.txt
@@ -0,0 +1,7 @@
+Data
+====
+
+.. automodule:: dataprofiler.data_readers.data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.data_utils.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.data_utils.rst.txt
new file mode 100644
index 000000000..309208b73
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.data_utils.rst.txt
@@ -0,0 +1,7 @@
+Data Utils
+==========
+
+.. automodule:: dataprofiler.data_readers.data_utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.filepath_or_buffer.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.filepath_or_buffer.rst.txt
new file mode 100644
index 000000000..89e78cc2d
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.filepath_or_buffer.rst.txt
@@ -0,0 +1,7 @@
+Filepath Or Buffer
+==================
+
+.. automodule:: dataprofiler.data_readers.filepath_or_buffer
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.json_data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.json_data.rst.txt
new file mode 100644
index 000000000..ae0a51d13
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.json_data.rst.txt
@@ -0,0 +1,7 @@
+JSON Data
+=========
+
+.. automodule:: dataprofiler.data_readers.json_data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.parquet_data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.parquet_data.rst.txt
new file mode 100644
index 000000000..dfdcbe4bb
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.parquet_data.rst.txt
@@ -0,0 +1,7 @@
+Parquet Data
+============
+
+.. automodule:: dataprofiler.data_readers.parquet_data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.rst.txt
new file mode 100644
index 000000000..80324c993
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.rst.txt
@@ -0,0 +1,29 @@
+Data Readers
+============
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.data_readers.avro_data
+ dataprofiler.data_readers.base_data
+ dataprofiler.data_readers.csv_data
+ dataprofiler.data_readers.data
+ dataprofiler.data_readers.data_utils
+ dataprofiler.data_readers.filepath_or_buffer
+ dataprofiler.data_readers.json_data
+ dataprofiler.data_readers.parquet_data
+ dataprofiler.data_readers.structured_mixins
+ dataprofiler.data_readers.text_data
+
+.. automodule:: dataprofiler.data_readers
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.structured_mixins.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.structured_mixins.rst.txt
new file mode 100644
index 000000000..157e03d2c
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.structured_mixins.rst.txt
@@ -0,0 +1,7 @@
+Structured Mixins
+=================
+
+.. automodule:: dataprofiler.data_readers.structured_mixins
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.data_readers.text_data.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.data_readers.text_data.rst.txt
new file mode 100644
index 000000000..6ac6b9648
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.data_readers.text_data.rst.txt
@@ -0,0 +1,7 @@
+Text Data
+=========
+
+.. automodule:: dataprofiler.data_readers.text_data
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.dp_logging.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.dp_logging.rst.txt
new file mode 100644
index 000000000..d1c6c910d
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.dp_logging.rst.txt
@@ -0,0 +1,7 @@
+Dp Logging
+==========
+
+.. automodule:: dataprofiler.dp_logging
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.base_data_labeler.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.base_data_labeler.rst.txt
new file mode 100644
index 000000000..839a74157
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.base_data_labeler.rst.txt
@@ -0,0 +1,7 @@
+Base Data Labeler
+=================
+
+.. automodule:: dataprofiler.labelers.base_data_labeler
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.base_model.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.base_model.rst.txt
new file mode 100644
index 000000000..c4bc9b08b
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.base_model.rst.txt
@@ -0,0 +1,7 @@
+Base Model
+==========
+
+.. automodule:: dataprofiler.labelers.base_model
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.character_level_cnn_model.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.character_level_cnn_model.rst.txt
new file mode 100644
index 000000000..80113a935
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.character_level_cnn_model.rst.txt
@@ -0,0 +1,7 @@
+Character Level Cnn Model
+=========================
+
+.. automodule:: dataprofiler.labelers.character_level_cnn_model
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.classification_report_utils.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.classification_report_utils.rst.txt
new file mode 100644
index 000000000..4c3624869
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.classification_report_utils.rst.txt
@@ -0,0 +1,7 @@
+Classification Report Utils
+===========================
+
+.. automodule:: dataprofiler.labelers.classification_report_utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.data_labelers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.data_labelers.rst.txt
new file mode 100644
index 000000000..6ac45d9e6
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.data_labelers.rst.txt
@@ -0,0 +1,7 @@
+Data Labelers
+=============
+
+.. automodule:: dataprofiler.labelers.data_labelers
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.data_processing.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.data_processing.rst.txt
new file mode 100644
index 000000000..58d572a29
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.data_processing.rst.txt
@@ -0,0 +1,7 @@
+Data Processing
+===============
+
+.. automodule:: dataprofiler.labelers.data_processing
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.labeler_utils.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.labeler_utils.rst.txt
new file mode 100644
index 000000000..f14cabd5c
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.labeler_utils.rst.txt
@@ -0,0 +1,7 @@
+Labeler Utils
+=============
+
+.. automodule:: dataprofiler.labelers.labeler_utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.regex_model.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.regex_model.rst.txt
new file mode 100644
index 000000000..e85772ad2
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.regex_model.rst.txt
@@ -0,0 +1,7 @@
+Regex Model
+===========
+
+.. automodule:: dataprofiler.labelers.regex_model
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.labelers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.labelers.rst.txt
new file mode 100644
index 000000000..86eab391e
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.labelers.rst.txt
@@ -0,0 +1,27 @@
+Labelers
+========
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.labelers.base_data_labeler
+ dataprofiler.labelers.base_model
+ dataprofiler.labelers.character_level_cnn_model
+ dataprofiler.labelers.classification_report_utils
+ dataprofiler.labelers.data_labelers
+ dataprofiler.labelers.data_processing
+ dataprofiler.labelers.labeler_utils
+ dataprofiler.labelers.regex_model
+
+.. automodule:: dataprofiler.labelers
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.base_column_profilers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.base_column_profilers.rst.txt
new file mode 100644
index 000000000..13cab9ff4
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.base_column_profilers.rst.txt
@@ -0,0 +1,7 @@
+Base Column Profilers
+=====================
+
+.. automodule:: dataprofiler.profilers.base_column_profilers
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.categorical_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.categorical_column_profile.rst.txt
new file mode 100644
index 000000000..a525d86c8
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.categorical_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Categorical Column Profile
+==========================
+
+.. automodule:: dataprofiler.profilers.categorical_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.column_profile_compilers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.column_profile_compilers.rst.txt
new file mode 100644
index 000000000..9599deb9a
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.column_profile_compilers.rst.txt
@@ -0,0 +1,7 @@
+Column Profile Compilers
+========================
+
+.. automodule:: dataprofiler.profilers.column_profile_compilers
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.data_labeler_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.data_labeler_column_profile.rst.txt
new file mode 100644
index 000000000..282408931
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.data_labeler_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Data Labeler Column Profile
+===========================
+
+.. automodule:: dataprofiler.profilers.data_labeler_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.datetime_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.datetime_column_profile.rst.txt
new file mode 100644
index 000000000..d4467634f
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.datetime_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Datetime Column Profile
+=======================
+
+.. automodule:: dataprofiler.profilers.datetime_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.float_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.float_column_profile.rst.txt
new file mode 100644
index 000000000..d23bb4336
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.float_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Float Column Profile
+====================
+
+.. automodule:: dataprofiler.profilers.float_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.helpers.report_helpers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.helpers.report_helpers.rst.txt
new file mode 100644
index 000000000..c20d1f391
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.helpers.report_helpers.rst.txt
@@ -0,0 +1,7 @@
+Report Helpers
+==============
+
+.. automodule:: dataprofiler.profilers.helpers.report_helpers
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.helpers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.helpers.rst.txt
new file mode 100644
index 000000000..b82f660b5
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.helpers.rst.txt
@@ -0,0 +1,20 @@
+Helpers
+=======
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.profilers.helpers.report_helpers
+
+.. automodule:: dataprofiler.profilers.helpers
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.histogram_utils.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.histogram_utils.rst.txt
new file mode 100644
index 000000000..039bb08d8
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.histogram_utils.rst.txt
@@ -0,0 +1,7 @@
+Histogram Utils
+===============
+
+.. automodule:: dataprofiler.profilers.histogram_utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.int_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.int_column_profile.rst.txt
new file mode 100644
index 000000000..b6d8d7921
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.int_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Int Column Profile
+==================
+
+.. automodule:: dataprofiler.profilers.int_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.numerical_column_stats.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.numerical_column_stats.rst.txt
new file mode 100644
index 000000000..a8b6ac226
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.numerical_column_stats.rst.txt
@@ -0,0 +1,7 @@
+Numerical Column Stats
+======================
+
+.. automodule:: dataprofiler.profilers.numerical_column_stats
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.order_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.order_column_profile.rst.txt
new file mode 100644
index 000000000..7b1605659
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.order_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Order Column Profile
+====================
+
+.. automodule:: dataprofiler.profilers.order_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.profile_builder.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.profile_builder.rst.txt
new file mode 100644
index 000000000..d73e6d9a2
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.profile_builder.rst.txt
@@ -0,0 +1,7 @@
+Profile Builder
+===============
+
+.. automodule:: dataprofiler.profilers.profile_builder
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.profiler_options.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.profiler_options.rst.txt
new file mode 100644
index 000000000..127e8e1ad
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.profiler_options.rst.txt
@@ -0,0 +1,7 @@
+Profiler Options
+================
+
+.. automodule:: dataprofiler.profilers.profiler_options
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.rst.txt
new file mode 100644
index 000000000..476df4cec
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.rst.txt
@@ -0,0 +1,36 @@
+Profilers
+=========
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.profilers.helpers
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.profilers.base_column_profilers
+ dataprofiler.profilers.categorical_column_profile
+ dataprofiler.profilers.column_profile_compilers
+ dataprofiler.profilers.data_labeler_column_profile
+ dataprofiler.profilers.datetime_column_profile
+ dataprofiler.profilers.float_column_profile
+ dataprofiler.profilers.histogram_utils
+ dataprofiler.profilers.int_column_profile
+ dataprofiler.profilers.numerical_column_stats
+ dataprofiler.profilers.order_column_profile
+ dataprofiler.profilers.profile_builder
+ dataprofiler.profilers.profiler_options
+ dataprofiler.profilers.text_column_profile
+ dataprofiler.profilers.unstructured_labeler_profile
+ dataprofiler.profilers.unstructured_text_profile
+ dataprofiler.profilers.utils
+
+.. automodule:: dataprofiler.profilers
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.text_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.text_column_profile.rst.txt
new file mode 100644
index 000000000..097e6e02c
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.text_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Text Column Profile
+===================
+
+.. automodule:: dataprofiler.profilers.text_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_data_labeler_column_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_data_labeler_column_profile.rst.txt
new file mode 100644
index 000000000..412a5224e
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_data_labeler_column_profile.rst.txt
@@ -0,0 +1,7 @@
+Unstructured Data Labeler Column Profile
+========================================
+
+.. automodule:: dataprofiler.profilers.unstructured_data_labeler_column_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_labeler_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_labeler_profile.rst.txt
new file mode 100644
index 000000000..c49f68004
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_labeler_profile.rst.txt
@@ -0,0 +1,7 @@
+Unstructured Labeler Profile
+============================
+
+.. automodule:: dataprofiler.profilers.unstructured_labeler_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_text_profile.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_text_profile.rst.txt
new file mode 100644
index 000000000..27b56ea93
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.unstructured_text_profile.rst.txt
@@ -0,0 +1,7 @@
+Unstructured Text Profile
+=========================
+
+.. automodule:: dataprofiler.profilers.unstructured_text_profile
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.profilers.utils.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.profilers.utils.rst.txt
new file mode 100644
index 000000000..ddae85e63
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.profilers.utils.rst.txt
@@ -0,0 +1,7 @@
+Utils
+=====
+
+.. automodule:: dataprofiler.profilers.utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.reports.graphs.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.reports.graphs.rst.txt
new file mode 100644
index 000000000..3a7adf900
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.reports.graphs.rst.txt
@@ -0,0 +1,7 @@
+Graphs
+======
+
+.. automodule:: dataprofiler.reports.graphs
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.reports.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.reports.rst.txt
new file mode 100644
index 000000000..2b4c679c5
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.reports.rst.txt
@@ -0,0 +1,20 @@
+Reports
+=======
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.reports.graphs
+
+.. automodule:: dataprofiler.reports
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.rst.txt
new file mode 100644
index 000000000..7a57e0427
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.rst.txt
@@ -0,0 +1,27 @@
+Dataprofiler
+============
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.data_readers
+ dataprofiler.labelers
+ dataprofiler.profilers
+ dataprofiler.reports
+ dataprofiler.validators
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.dp_logging
+ dataprofiler.settings
+ dataprofiler.version
+
+.. automodule:: dataprofiler
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.settings.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.settings.rst.txt
new file mode 100644
index 000000000..81c664c07
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.settings.rst.txt
@@ -0,0 +1,7 @@
+Settings
+========
+
+.. automodule:: dataprofiler.settings
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.validators.base_validators.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.validators.base_validators.rst.txt
new file mode 100644
index 000000000..b8f328736
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.validators.base_validators.rst.txt
@@ -0,0 +1,7 @@
+Base Validators
+===============
+
+.. automodule:: dataprofiler.validators.base_validators
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/dataprofiler.validators.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.validators.rst.txt
new file mode 100644
index 000000000..704dfee21
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.validators.rst.txt
@@ -0,0 +1,20 @@
+Validators
+==========
+
+
+Modules
+-------
+
+.. toctree::
+ :maxdepth: 4
+
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler.validators.base_validators
+
+.. automodule:: dataprofiler.validators
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/0.7.1/html/_sources/dataprofiler.version.rst.txt b/docs/0.7.1/html/_sources/dataprofiler.version.rst.txt
new file mode 100644
index 000000000..3977b6379
--- /dev/null
+++ b/docs/0.7.1/html/_sources/dataprofiler.version.rst.txt
@@ -0,0 +1,7 @@
+Version
+=======
+
+.. automodule:: dataprofiler.version
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/examples.rst.txt b/docs/0.7.1/html/_sources/examples.rst.txt
new file mode 100644
index 000000000..458677b5f
--- /dev/null
+++ b/docs/0.7.1/html/_sources/examples.rst.txt
@@ -0,0 +1,19 @@
+.. _examples:
+
+Examples
+********
+
+These examples provide a more in-depth look into the details of the ``Data Profiler`` library.
+
+Basics
+------
+
+.. toctree::
+ :maxdepth: 0
+
+ Overview of Data Profiler
+ Data Reader
+ Structured Profiler
+ Unstructured Profiler
+ Labeler
+ Adding Models to a Labeler Pipeline
diff --git a/docs/0.7.1/html/_sources/graphs.rst.txt b/docs/0.7.1/html/_sources/graphs.rst.txt
new file mode 100644
index 000000000..08d7a8063
--- /dev/null
+++ b/docs/0.7.1/html/_sources/graphs.rst.txt
@@ -0,0 +1,112 @@
+.. _reports:
+
+Graphs
+******
+
+Graph Your Data
+===============
+
+We can plot some of our data as seaborn histogram plots. Below will demonstrate how to do so and provide examples.
+
+The following plots are currently available to work directly with your profilers:
+
+ * histogram (numeric columns only)
+
+Below shows how to do so with examples.
+
+What we need to import
+~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: python
+
+ from dataprofiler.reports import graphs
+
+The main functions that is used to plot histograms are in graphs. **You will also need the `dataprofiler[reports]` requirement to be installed**:
+
+.. code-block:: console
+
+ pip install 'dataprofiler[reports]'
+
+Plotting from a StructuredProfiler class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With a StructuredProfiler class variable, we can specify what columns we want to be plotted, and plot them into histograms.
+
+.. code-block:: python
+
+ graphs.plot_histograms(profiler, columns)
+
+These are what the variables mean:
+
+ * **profiler** - StructuredProfiler class variable that contains the data we want
+ * **columns** - (Optional) The list of IntColumn or FloatColumn we want to specifically plot.
+
+Plotting an individual IntColumn or FloatColumn
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example uses a CSV file for example, but CSV, JSON, Avro or Parquet should also work.
+
+.. code-block:: python
+
+ graphs.plot_col_histogram(column, axes, title)
+
+These are what the variables mean:
+
+ * **column** - The IntColumn or FloatColumn we want to plot
+ * **axes** - (Optional) The axes we want to specify.
+ * **title** - (Optional) The title of the plot we want to define.
+
+Examples
+~~~~~~~~
+
+1. This example demonstrates how we can take a StructuredProfiler class and plot histograms of the specified columns.
+
+.. code-block:: python
+
+ import dataprofiler as dp
+ from dataprofiler.reports import graphs
+
+
+ data = [[1, 'a', 1.0],
+ [2, 'b', 2.2],
+ [3, 'c', 3.5],
+ [None, 'd', 10.0]]
+ profiler = dp.StructuredProfiler(data)
+
+ # This will plot all IntColumn and FloatColumn as histograms (The first and last column).
+ fig = graphs.plot_histograms(profiler)
+ fig.show()
+
+ # This will only plot the specified column, 0.
+ columns = [0]
+ fig = graphs.plot_histograms(profiler, columns)
+ fig.show()
+
+.. image:: _static/images/histogram_example_0.png
+ :alt: First Histogram Example Image
+
+.. image:: _static/images/histogram_example_1.png
+ :alt: Second Histogram Example Image
+
+2. This example demonstrates how we can plot a low level profiler.
+
+.. code-block:: python
+
+ import pandas as pd
+
+ from dataprofiler.profilers import IntColumn
+ from dataprofiler.reports import graphs
+
+
+ data = pd.Series([1, 2, 3], dtype=str)
+ profiler = IntColumn('example')
+ profiler.update(data)
+
+ # Plot the axes
+ ax = graphs.plot_col_histogram(profiler)
+
+ # get and show the figure of the plotted histogram
+ fig = ax.get_figure()
+ fig.show()
+
+.. image:: _static/images/histogram_example_2.png
+ :alt: Histogram Column Only Example Image
diff --git a/docs/0.7.1/html/_sources/index.rst.txt b/docs/0.7.1/html/_sources/index.rst.txt
new file mode 100644
index 000000000..aaefa381b
--- /dev/null
+++ b/docs/0.7.1/html/_sources/index.rst.txt
@@ -0,0 +1,460 @@
+.. _Data Profiler:
+
+====================================
+Data Profiler | What's in your data?
+====================================
+
+Purpose
+=======
+
+The DataProfiler is a Python library designed to make data analysis, monitoring and **sensitive data detection** easy.
+
+Loading **Data** with a single command, the library automatically formats & loads files into a DataFrame. **Profiling** the Data, the library identifies the schema, statistics, entities and more. Data Profiles can then be used in downstream applications or reports.
+
+The Data Profiler comes with a cutting edge pre-trained deep learning model, used to efficiently identify **sensitive data** (or **PII**). If customization is needed, it's easy to add new entities to the existing pre-trained model or insert a new pipeline for entity recognition.
+
+The best part? Getting started only takes a few lines of code (`Example CSV`_):
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ data = Data("your_file.csv") # Auto-Detect & Load: CSV, AVRO, Parquet, JSON, Text
+ print(data.data.head(5)) # Access data directly via a compatible Pandas DataFrame
+
+ profile = Profiler(data) # Calculate Statistics, Entity Recognition, etc
+ readable_report = profile.report(report_options={"output_format":"pretty"})
+ print(json.dumps(readable_report, indent=4))
+
+
+To install the full package from pypi:
+
+.. code-block:: console
+
+ pip install DataProfiler[ml]
+
+If the ML requirements are too strict (say, you don't want to install tensorflow), you can install a slimmer package. The slimmer package disables the default sensitive data detection / entity recognition (labler)
+
+Install from pypi:
+
+.. code-block:: console
+
+ pip install DataProfiler
+
+If you have suggestions or find a bug, please open an `issue`_.
+
+Visit the :ref:`API` to explore Data Profiler's terminology.
+
+
+What is a Data Profile?
+=======================
+
+In the case of this library, a data profile is a dictionary containing statistics and predictions about the underlying dataset. There are "global statistics" or `global_stats`, which contain dataset level data and there are "column/row level statistics" or `data_stats` (each column is a new key-value entry).
+
+The format for a structured profile is below:
+
+.. code-block:: python
+
+ "global_stats": {
+ "samples_used": int,
+ "column_count": int,
+ "row_count": int,
+ "row_has_null_ratio": float,
+ "row_is_null_ratio": float,
+ "unique_row_ratio": float,
+ "duplicate_row_count": int,
+ "file_type": string,
+ "encoding": string,
+ "correlation_matrix": list(list(int)), (*)
+ "profile_schema": dict[string, list(int)]
+ },
+ "data_stats": [
+ {
+ "column_name": string,
+ "data_type": string,
+ "data_label": string,
+ "categorical": bool,
+ "order": string,
+ "samples": list(str),
+ "statistics": {
+ "sample_size": int,
+ "null_count": int,
+ "null_types": list(string),
+ "null_types_index": dict[string, list(int)],
+ "data_type_representation": dict[string, list(string)],
+ "min": [null, float],
+ "max": [null, float],
+ "sum": float,
+ "mean": float,
+ "variance": float,
+ "stddev": float,
+ "skewness": float,
+ "kurtosis": float,
+ "num_zeros": int,
+ "num_negatives": int,
+ "histogram": {
+ "bin_counts": list(int),
+ "bin_edges": list(float),
+ },
+ "quantiles": {
+ int: float
+ },
+ "vocab": list(char),
+ "avg_predictions": dict[string, float],
+ "data_label_representation": dict[string, float],
+ "categories": list(str),
+ "unique_count": int,
+ "unique_ratio": float,
+ "categorical_count": dict[string, int],
+ "gini_impurity": float,
+ "unalikeability": float,
+ "precision": {
+ 'min': int,
+ 'max': int,
+ 'mean': float,
+ 'var': float,
+ 'std': float,
+ 'sample_size': int,
+ 'margin_of_error': float,
+ 'confidence_level': float
+ },
+ "times": dict[string, float],
+ "format": string
+ }
+ }
+ ]
+
+(*) Currently the correlation matrix update is toggled off. It will be reset in a later update. Users can still use it as desired with the is_enable option set to True.
+
+The format for an unstructured profile is below:
+
+.. code-block:: python
+
+ "global_stats": {
+ "samples_used": int,
+ "empty_line_count": int,
+ "file_type": string,
+ "encoding": string,
+ "memory_size": float, # in MB
+ },
+ "data_stats": {
+ "data_label": {
+ "entity_counts": {
+ "word_level": dict[string, int],
+ "true_char_level": dict[string, int],
+ "postprocess_char_level": dict[string, int]
+ },
+ "entity_percentages": {
+ "word_level": dict[string, float],
+ "true_char_level": dict[string, float],
+ "postprocess_char_level": dict[string, float]
+ },
+ "times": dict[string, float]
+ },
+ "statistics": {
+ "vocab": list(char),
+ "vocab_count": dict[string, int],
+ "words": list(string),
+ "word_count": dict[string, int],
+ "times": dict[string, float]
+ }
+ }
+
+
+Supported Data Formats
+~~~~~~~~~~~~~~~~~~~~~~
+
+* Any delimited file (CSV, TSV, etc.)
+* JSON object
+* Avro file
+* Parquet file
+* Text file
+* Pandas DataFrame
+* A URL that points to one of the supported file types above
+
+
+Data Labels
+~~~~~~~~~~~
+
+*Data Labels* are determined per cell for structured data (column/row when the *profiler* is used) or at the character level for unstructured data.
+
+* UNKNOWN
+* ADDRESS
+* BAN (bank account number, 10-18 digits)
+* CREDIT_CARD
+* EMAIL_ADDRESS
+* UUID
+* HASH_OR_KEY (md5, sha1, sha256, random hash, etc.)
+* IPV4
+* IPV6
+* MAC_ADDRESS
+* PERSON
+* PHONE_NUMBER
+* SSN
+* URL
+* US_STATE
+* DRIVERS_LICENSE
+* DATE
+* TIME
+* DATETIME
+* INTEGER
+* FLOAT
+* QUANTITY
+* ORDINAL
+
+
+Get Started
+===========
+
+Load a File
+~~~~~~~~~~~
+
+The profiler should automatically identify the file type and load the data into a `Data Class`.
+
+Along with other attributtes the `Data class` enables structured data to be accessed via a valid Pandas DataFrame.
+
+.. code-block:: python
+
+ # Load a csv file, return a CSVData object
+ csv_data = Data('your_file.csv')
+
+ # Print the first 10 rows of the csv file
+ print(csv_data.data.head(10))
+
+ # Load a parquet file, return a ParquetData object
+ parquet_data = Data('your_file.parquet')
+
+ # Sort the data by the name column
+ parquet_data.data.sort_values(by='name', inplace=True)
+
+ # Print the sorted first 10 rows of the parquet data
+ print(parquet_data.data.head(10))
+
+
+If the file type is not automatically identified (rare), you can specify them
+specifically, see section Data Readers.
+
+Profile a File
+~~~~~~~~~~~~~~
+
+Example uses a CSV file for example, but CSV, JSON, Avro, Parquet or Text should also work.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load file (CSV should be automatically identified)
+ data = Data("your_file.csv")
+
+ # Profile the dataset
+ profile = Profiler(data)
+
+ # Generate a report and use json to prettify.
+ report = profile.report(report_options={"output_format":"pretty"})
+
+ # Print the report
+ print(json.dumps(report, indent=4))
+
+Updating Profiles
+~~~~~~~~~~~~~~~~~
+
+Currently, the data profiler is equipped to update its profile in batches.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load and profile a CSV file
+ data = Data("your_file.csv")
+ profile = Profiler(data)
+
+ # Update the profile with new data:
+ new_data = Data("new_data.csv")
+ profile.update_profile(new_data)
+
+ # Print the report using json to prettify.
+ report = profile.report(report_options={"output_format":"pretty"})
+ print(json.dumps(report, indent=4))
+
+
+Merging Profiles
+~~~~~~~~~~~~~~~~
+
+If you have two files with the same schema (but different data), it is possible to merge the two profiles together via an addition operator.
+
+This also enables profiles to be determined in a distributed manner.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load a CSV file with a schema
+ data1 = Data("file_a.csv")
+ profile1 = Profiler(data)
+
+ # Load another CSV file with the same schema
+ data2 = Data("file_b.csv")
+ profile2 = Profiler(data)
+
+ profile3 = profile1 + profile2
+
+ # Print the report using json to prettify.
+ report = profile3.report(report_options={"output_format":"pretty"})
+ print(json.dumps(report, indent=4))
+
+Profile a Pandas DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+ import pandas as pd
+ import dataprofiler as dp
+ import json
+
+ my_dataframe = pd.DataFrame([[1, 2.0],[1, 2.2],[-1, 3]])
+ profile = dp.Profiler(my_dataframe)
+
+ # print the report using json to prettify.
+ report = profile.report(report_options={"output_format":"pretty"})
+ print(json.dumps(report, indent=4))
+
+ # read a specified column, in this case it is labeled 0:
+ print(json.dumps(report["data stats"][0], indent=4))
+
+
+Unstructured Profiler
+~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the structured profiler, the Data Profiler provides unstructured
+profiling for the TextData object or string. Unstructured profiling also works
+with list(string), pd.Series(string) or pd.DataFrame(string) given profiler_type
+option specified as `unstructured`. Below is an example of unstructured profile
+with a text file.
+
+.. code-block:: python
+
+ import dataprofiler as dp
+ import json
+ my_text = dp.Data('text_file.txt')
+ profile = dp.Profiler(my_text)
+
+ # print the report using json to prettify.
+ report = profile.report(report_options={"output_format":"pretty"})
+ print(json.dumps(report, indent=4))
+
+Another example of unstructured profile with pd.Series of string is given as below
+
+.. code-block:: python
+
+ import dataprofiler as dp
+ import pandas as pd
+ import json
+
+ text_data = pd.Series(['first string', 'second string'])
+ profile = dp.Profiler(text_data, profiler_type="unstructured")
+
+ # print the report using json to prettify.
+ report = profile.report(report_options={"output_format":"pretty"})
+ print(json.dumps(report, indent=4))
+
+Specifying a Filetype or Delimiter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example of specifying a CSV data type, with a `,` delimiter.
+In addition, it utilizes only the first 10,000 rows.
+
+.. code-block:: python
+
+ import json
+ import os
+ from dataprofiler import Data, Profiler
+ from dataprofiler.data_readers.csv_data import CSVData
+
+ # Load a CSV file, with "," as the delimiter
+ data = CSVData("your_file.csv", options={"delimiter": ","})
+
+ # Split the data, such that only the first 10,000 rows are used
+ data = data.data[0:10000]
+
+ # Read in profile and print results
+ profile = Profiler(data)
+ print(json.dumps(profile.report(report_options={"output_format":"pretty"}), indent=4))
+
+
+.. toctree::
+ :maxdepth: 2
+ :hidden:
+ :caption: Getting Started:
+
+ Intro
+ install.rst
+ data_readers.rst
+ profiler.rst
+ data_labeling.rst
+ graphs.rst
+
+.. toctree::
+ :maxdepth: 2
+ :hidden:
+ :caption: User Guide:
+
+ examples.rst
+ API.rst
+
+.. toctree::
+ :maxdepth: 2
+ :hidden:
+ :caption: Community:
+
+ Changelog
+ Feedback
+ GitHub
+ Contributing
+
+.. _Example CSV: https://raw.githubusercontent.com/capitalone/DataProfiler/main/dataprofiler/tests/data/csv/aws_honeypot_marx_geo.csv
+.. _issue: https://github.com/capitalone/DataProfiler/issues/new/choose
+
+Versions
+========
+* `0.7.1`_
+* `0.7.0`_
+* `0.6.0`_
+* `0.5.3`_
+* `0.5.2`_
+* `0.5.1`_
+* `0.5.0`_
+* `0.4.7`_
+* `0.4.6`_
+* `0.4.5`_
+* `0.4.4`_
+* `0.4.3`_
+* `0.3.0`_
+
+.. _0.3.0: ../../v0.3/html/index.html
+.. _0.4.3: ../../0.4.3/html/index.html
+
+.. _0.4.4: ../../0.4.4/html/index.html
+
+.. _0.4.5: ../../0.4.5/html/index.html
+
+.. _0.4.6: ../../0.4.6/html/index.html
+
+.. _0.4.7: ../../0.4.7/html/index.html
+
+.. _0.5.0: ../../0.5.0/html/index.html
+
+.. _0.5.1: ../../0.5.1/html/index.html
+
+.. _0.5.2: ../../0.5.2/html/index.html
+
+.. _0.5.3: ../../0.5.3/html/index.html
+.. _0.6.0: ../../0.6.0/html/index.html
+
+.. _0.7.0: ../../0.7.0/html/index.html
+
+.. _0.7.1: ../../0.7.1/html/index.html
+
diff --git a/docs/0.7.1/html/_sources/install.rst.txt b/docs/0.7.1/html/_sources/install.rst.txt
new file mode 100644
index 000000000..70cdb1a2a
--- /dev/null
+++ b/docs/0.7.1/html/_sources/install.rst.txt
@@ -0,0 +1,138 @@
+.. _install:
+
+Install
+*******
+
+To install the full package from pypi:
+
+.. code-block:: console
+
+ pip install DataProfiler[ml]
+
+If the ML requirements are too strict (say, you don't want to install
+tensorflow), you can install a slimmer package. The slimmer package disables
+the default sensitive data detection / entity recognition (labler)
+
+Install from pypi:
+
+.. code-block:: console
+
+ pip install DataProfiler
+
+Snappy Installation
+===================
+
+This is required to profile parquet/avro datasets
+
+MacOS with homebrew:
+
+.. code-block:: console
+
+ brew install snappy
+
+
+Linux install:
+
+.. code-block:: console
+
+ sudo apt-get -y install libsnappy-dev
+
+
+Build From Scratch
+==================
+
+NOTE: Installation for python3
+
+virtualenv install:
+
+.. code-block:: console
+
+ python3 -m pip install virtualenv
+
+
+Setup virtual env:
+
+.. code-block:: console
+
+ python3 -m virtualenv --python=python3 venv3
+ source venv3/bin/activate
+
+
+Install requirements:
+
+.. code-block:: console
+
+ pip3 install -r requirements.txt
+
+Install labeler dependencies:
+
+.. code-block:: console
+
+ pip3 install -r requirements-ml.txt
+
+
+Install via the repo -- Build setup.py and install locally:
+
+.. code-block:: console
+
+ python3 setup.py sdist bdist bdist_wheel
+ pip3 install dist/DataProfiler*-py3-none-any.whl
+
+
+If you see:
+
+.. code-block:: console
+
+ ERROR: Double requirement given:dataprofiler==X.Y.Z from dataprofiler/dist/DataProfiler-X.Y.Z-py3-none-any.whl (already in dataprofiler==X2.Y2.Z2 from dataprofiler/dist/DataProfiler-X2.Y2.Z2-py3-none-any.whl, name='dataprofiler')
+
+This means that you have multiple versions of the DataProfiler distribution
+in the dist folder.
+To resolve, either remove the older one or delete the folder and rerun the steps
+above.
+
+Install via github:
+
+.. code-block:: console
+
+ pip3 install git+https://github.com/capitalone/dataprofiler.git#egg=dataprofiler
+
+
+
+Testing
+=======
+
+For testing, install test requirements:
+
+.. code-block:: console
+
+ pip3 install -r requirements-test.txt
+
+
+To run all unit tests, use:
+
+.. code-block:: console
+
+ DATAPROFILER_SEED=0 python3 -m unittest discover -p "test*.py"
+
+
+To run file of unit tests, use form:
+
+.. code-block:: console
+
+ DATAPROFILER_SEED=0 python3 -m unittest discover -p test_profile_builder.py
+
+
+To run a file with Pytest use:
+
+.. code-block:: console
+
+ DATAPROFILER_SEED=0 pytest dataprofiler/tests/data_readers/test_csv_data.py -v
+
+
+To run individual of unit test, use form:
+
+.. code-block:: console
+
+ DATAPROFILER_SEED=0 python3 -m unittest dataprofiler.tests.profilers.test_profile_builder.TestProfiler
+
+
diff --git a/docs/0.7.1/html/_sources/labeler.nblink.txt b/docs/0.7.1/html/_sources/labeler.nblink.txt
new file mode 100644
index 000000000..bed6517bf
--- /dev/null
+++ b/docs/0.7.1/html/_sources/labeler.nblink.txt
@@ -0,0 +1,6 @@
+{
+ "path": "../../feature_branch/examples/labeler.ipynb",
+ "extra-media": [
+ "../../feature_branch/examples/DL-Flowchart.png"
+ ]
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/modules.rst.txt b/docs/0.7.1/html/_sources/modules.rst.txt
new file mode 100644
index 000000000..0593459df
--- /dev/null
+++ b/docs/0.7.1/html/_sources/modules.rst.txt
@@ -0,0 +1,7 @@
+dataprofiler
+============
+
+.. toctree::
+ :maxdepth: 4
+
+ dataprofiler
diff --git a/docs/0.7.1/html/_sources/overview.nblink.txt b/docs/0.7.1/html/_sources/overview.nblink.txt
new file mode 100644
index 000000000..3d9f89d3d
--- /dev/null
+++ b/docs/0.7.1/html/_sources/overview.nblink.txt
@@ -0,0 +1,3 @@
+{
+ "path": "../../feature_branch/examples/intro_data_profiler.ipynb"
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/profiler.rst.txt b/docs/0.7.1/html/_sources/profiler.rst.txt
new file mode 100644
index 000000000..0fbfc5923
--- /dev/null
+++ b/docs/0.7.1/html/_sources/profiler.rst.txt
@@ -0,0 +1,678 @@
+.. _profiler:
+
+Profiler
+********
+
+Profile Your Data
+=================
+
+Profiling your data is easy. Just use the data reader, send the data to the
+profiler, and print out the report.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ data = Data("your_file.csv") # Auto-Detect & Load: CSV, AVRO, Parquet, JSON, Text
+
+ profile = Profiler(data) # Calculate Statistics, Entity Recognition, etc
+
+ readable_report = profile.report(report_options={"output_format": "pretty"})
+ print(json.dumps(readable_report, indent=4))
+
+If the data is structured, the profile will return global statistics as well as
+column by column statistics. The vast amount of statistics are listed on the
+intro page.
+
+Load a File
+~~~~~~~~~~~
+
+The profiler should automatically identify the file type and load the data into a `Data Class`.
+
+Along with other attributtes the `Data class` enables structured data to be accessed via a valid Pandas DataFrame.
+
+.. code-block:: python
+
+ # Load a csv file, return a CSVData object
+ csv_data = Data('your_file.csv')
+
+ # Print the first 10 rows of the csv file
+ print(csv_data.data.head(10))
+
+ # Load a parquet file, return a ParquetData object
+ parquet_data = Data('your_file.parquet')
+
+ # Sort the data by the name column
+ parquet_data.data.sort_values(by='name', inplace=True)
+
+ # Print the sorted first 10 rows of the parquet data
+ print(parquet_data.data.head(10))
+
+
+If the file type is not automatically identified (rare), you can specify them
+specifically, see section Data Readers.
+
+Profile a File
+~~~~~~~~~~~~~~
+
+Example uses a CSV file for example, but CSV, JSON, Avro or Parquet should also work.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load file (CSV should be automatically identified)
+ data = Data("your_file.csv")
+
+ # Profile the dataset
+ profile = Profiler(data)
+
+ # Generate a report and use json to prettify.
+ report = profile.report(report_options={"output_format": "pretty"})
+
+ # Print the report
+ print(json.dumps(report, indent=4))
+
+Updating Profiles
+~~~~~~~~~~~~~~~~~
+
+Currently, the data profiler is equipped to update its profile in batches.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load and profile a CSV file
+ data = Data("your_file.csv")
+ profile = Profiler(data)
+
+ # Update the profile with new data:
+ new_data = Data("new_data.csv")
+ profile.update_profile(new_data)
+
+ # Print the report using json to prettify.
+ report = profile.report(report_options={"output_format": "pretty"})
+ print(json.dumps(report, indent=4))
+
+
+Merging Profiles
+~~~~~~~~~~~~~~~~
+
+If you have two files with the same schema (but different data), it is possible to merge the two profiles together via an addition operator.
+
+This also enables profiles to be determined in a distributed manner.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load a CSV file with a schema
+ data1 = Data("file_a.csv")
+ profile1 = Profiler(data)
+
+ # Load another CSV file with the same schema
+ data2 = Data("file_b.csv")
+ profile2 = Profiler(data)
+
+ profile3 = profile1 + profile2
+
+ # Print the report using json to prettify.
+ report = profile3.report(report_options={"output_format": "pretty"})
+ print(json.dumps(report, indent=4))
+
+
+Profile Differences
+~~~~~~~~~~~~~~~~~~~
+
+Profile differences take two profiles and find the differences
+between them. Create the difference report like this:
+
+.. code-block:: python
+
+ from dataprofiler import Data, Profiler
+
+ # Load a CSV file
+ data1 = Data("file_a.csv")
+ profile1 = Profiler(data)
+
+ # Load another CSV file
+ data2 = Data("file_b.csv")
+ profile2 = Profiler(data)
+
+ diff_report = profile1.diff(profile2)
+ print(diff_report)
+
+The difference report contains a dictionary that mirrors the profile report.
+Each data type has its own difference:
+
+* **Int/Float** - One profile subtracts the value from the other.
+
+* **String** - The strings will be shown in a list:
+
+ - [profile1 str, profile2 str]
+* **List** - A list of 3 will be returned showing the unique values of
+ each profile and the shared values:
+
+ - [profile 1 unique values, shared values, profile 2 unique values]
+* **Dict** - Some dictionaries with varied keys will also return a list
+ of three in the format:
+
+ - [profile 1 unique key-values, shared key differences, profile 2 unique key-values]
+
+Otherwise, when no differences occur:
+
+* **Any Type No Differences** - A string will report: "unchanged".
+
+Below is the structured difference report:
+
+.. code-block:: python
+
+ {
+ 'global_stats': {
+ 'file_type': [str, str],
+ 'encoding': [str, str],
+ 'samples_used': int,
+ 'column_count': int,
+ 'row_count': int,
+ 'row_has_null_ratio': float,
+ 'row_is_null_ratio': float,
+ 'unique_row_ratio': float,
+ 'duplicate_row_count': int,
+ 'correlation_matrix': list[list[float]],
+ 'profile_schema': list[dict[str, int]]
+ },
+ 'data_stats': [{
+ 'column_name': str,
+ 'data_type': [str, str],
+ 'data_label': [list[str], list[str], list[str]],
+ 'categorical': [str, str],
+ 'order': [str, str],
+ 'statistics': {
+ 'min': float,
+ 'max': float,
+ 'sum': float,
+ 'mean': float,
+ 'variance': float,
+ 'stddev': float,
+ 't-test': {
+ 't-statistic': float,
+ 'conservative': {'df': int,
+ 'p-value': float},
+ 'welch': {'df': float,
+ 'p-value': float}},
+ "chi2-test": {
+ "chi2-statistic": float,
+ "df": int,
+ "p-value": float
+ },
+ 'unique_count': int,
+ 'unique_ratio': float,
+ 'categories': [list[str], list[str], list[str]],
+ 'gini_impurity': float,
+ 'unalikeability': float,
+ 'categorical_count': [dict[str, int], dict[str, int], dict[str, int]],
+ 'avg_predictions': [dict[str, float]],
+ 'label_representation': [dict[str, float]],
+ 'sample_size': int,
+ 'null_count': int,
+ 'null_types': [list[str], list[str], list[str]],
+ 'null_types_index': [dict[str, int], dict[str, int], dict[str, int]],
+ 'data_type_representation': [dict[str, float]]
+ }
+ }
+
+Below is the unstructured difference report:
+
+.. code-block:: python
+
+ {
+ 'global_stats': {
+ 'file_type': [str, str],
+ 'encoding': [str, str],
+ 'samples_used': int,
+ 'empty_line_count': int,
+ 'memory_size': float
+ },
+ 'data_stats': {
+ 'data_label': {
+ 'entity_counts': {
+ 'word_level': dict[str, int],
+ 'true_char_level': dict[str, int],
+ 'postprocess_char_level': dict[str, int]
+ },
+ 'entity_percentages': {
+ 'word_level': dict[str, float],
+ 'true_char_level': dict[str, float],
+ 'postprocess_char_level': dict[str, float]
+ }
+ },
+ 'statistics': {
+ 'vocab': [list[str], list[str], list[str]],
+ 'vocab_count': [dict[str, int], dict[str, int], dict[str, int]],
+ 'words': [list[str], list[str], list[str]],
+ 'word_count': [dict[str, int], dict[str, int], dict[str, int]]
+ }
+ }
+ }
+
+
+Saving and Loading a Profile
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The profiles can easily be saved and loaded as shown below:
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Load a CSV file, with "," as the delimiter
+ data = Data("your_file.csv")
+
+ # Read in profile and print results
+ profile = Profiler(data)
+ profile.save(filepath="my_profile.pkl")
+
+ loaded_profile = dp.Profiler.load("my_profile.pkl")
+ print(json.dumps(loaded_profile.report(report_options={"output_format": "compact"}),
+ indent=4))
+
+
+Structured vs Unstructured Profiles
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using the profiler, the data profiler will automatically infer whether to
+create the structured profile or the unstructured profile. However, you can be
+explicit as shown below:
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+
+ # Creating a structured profile
+ data1 = Data("normal_csv_file.csv")
+ structured_profile = Profiler(data1, profiler_type="structured")
+
+ structured_report = structured_profile.report(report_options={"output_format": "pretty"})
+ print(json.dumps(structured_report, indent=4))
+
+ # Creating an unstructured profile
+ data2 = Data("normal_text_file.txt")
+ unstructured_profile = Profiler(data2, profiler_type="unstructured")
+
+ unstructured_report = unstructured_profile.report(report_options={"output_format": "pretty"})
+ print(json.dumps(unstructured_report, indent=4))
+
+
+Setting the Sample Size
+~~~~~~~~~~~~~~~~~~~~~~~
+
+There are two ways to set sample size in a profile: samples_per_update and
+min_true_samples. Samples_per_update takes an integer as the exact amount that
+will be sampled. Min_true_samples will set the minimum amount of samples that
+are not null. For example:
+
+.. code-block:: python
+
+ from dataprofiler import Profiler
+
+ sample_array = [1.0, NULL, 2.0]
+ profile = dp.Profiler(sample_array, samples_per_update=2)
+
+The first two samples (1.0 and NULL) are used for the statistical analysis.
+
+In contrast, if we also set min_true_samples to 2 then the Data Reader will
+continue to read until the minimum true samples were found for the given column.
+For example:
+
+.. code-block:: python
+
+ from dataprofiler import Profiler
+
+ sample_array = [1.0, NULL, 2.0]
+ profile = dp.Profiler(sample_array, samples_per_update=2, min_true_samples=2)
+
+This will use all samples in the statistical analysis until the number of "true"
+(non-NULL) values are reached. Both min_true_samples and
+samples_per_update conditions must be met. In this case, the profile will grab
+the first two samples (1.0 and NULL) to satisfy the samples_per_update, and then
+it will grab the first two VALID samples (1.0 and 2.0) to satisfy the
+min_true_samples.
+
+Profile a Pandas DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+ import pandas as pd
+ import dataprofiler as dp
+ import json
+
+ my_dataframe = pd.DataFrame([[1, 2.0],[1, 2.2],[-1, 3]])
+ profile = dp.Profiler(my_dataframe)
+
+ # print the report using json to prettify.
+ report = profile.report(report_options={"output_format": "pretty"})
+ print(json.dumps(report, indent=4))
+
+ # read a specified column, in this case it is labeled 0:
+ print(json.dumps(report["data stats"][0], indent=4))
+
+
+Specifying a Filetype or Delimiter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example of specifying a CSV data type, with a `,` delimiter.
+In addition, it utilizes only the first 10,000 rows.
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler
+ from dataprofiler.data_readers.csv_data import CSVData
+
+ # Load a CSV file, with "," as the delimiter
+ data = CSVData("your_file.csv", options={"delimiter": ","})
+
+ # Split the data, such that only the first 10,000 rows are used
+ data = data.data[0:10000]
+
+ # Read in profile and print results
+ profile = Profiler(data)
+ print(json.dumps(profile.report(report_options={"output_format": "pretty"}), indent=4))
+
+Setting Profiler Seed
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example of specifying a seed for reproducibility.
+
+.. code-block:: python
+
+ import dataprofiler as dp
+
+ # Set seed to non-negative integer value or None
+ dp.set_seed(0)
+
+
+
+Profile Options
+===============
+
+The data profiler accepts several options to toggle on and off
+features. The 8 columns (int options, float options, datetime options,
+text options, order options, category options, data labeler options) can be
+enabled or disabled. By default, all options are toggled on. Below is an example
+of how to alter these options. Options shared by structured and unstructured options
+must be specified as structured or unstructured when setting (ie. datalabeler options).
+
+.. code-block:: python
+
+ import json
+ from dataprofiler import Data, Profiler, ProfilerOptions
+
+ # Load and profile a CSV file
+ data = Data("your_file.csv")
+ profile_options = ProfilerOptions()
+
+ #All of these are different examples of adjusting the profile options
+
+ # Options can be toggled directly like this:
+ profile_options.structured_options.text.is_enabled = False
+ profile_options.structured_options.text.vocab.is_enabled = True
+ profile_options.structured_options.int.variance.is_enabled = True
+ profile_options.structured_options.data_labeler.data_labeler_dirpath = \
+ "Wheres/My/Datalabeler"
+ profile_options.structured_options.data_labeler.is_enabled = False
+
+ # A dictionary can be sent in to set the properties for all the options
+ profile_options.set({"structured_options.data_labeler.is_enabled": False, "min.is_enabled": False})
+
+ # Specific columns can be set/disabled/enabled in the same way
+ profile_options.structured_options.text.set({"max.is_enabled":True,
+ "variance.is_enabled": True})
+
+ # numeric stats can be turned off/on entirely
+ profile_options.set({"is_numeric_stats_enabled": False})
+ profile_options.set({"int.is_numeric_stats_enabled": False})
+
+ profile = Profiler(data, options=profile_options)
+
+ # Print the report using json to prettify.
+ report = profile.report(report_options={"output_format": "pretty"})
+ print(json.dumps(report, indent=4))
+
+
+Below is an breakdown of all the options.
+
+* **ProfilerOptions** - The top-level options class that contains options for the Profiler class
+
+ * **structured_options** - Options responsible for all structured data
+
+ * **multiprocess** - Option to enable multiprocessing. Automatically selects the optimal number of processes to utilize based on system constraints.
+
+ * is_enabled - (Boolean) Enables or disables multiprocessing
+ * **int** - Options for the integer columns
+
+ * is_enabled - (Boolean) Enables or disables the integer operations
+ * min - Finds minimum value in a column
+
+ * is_enabled - (Boolean) Enables or disables min
+ * max - Finds maximum value in a column
+
+ * is_enabled - (Boolean) Enables or disables max
+ * sum - Finds sum of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables sum
+
+ * variance - Finds variance of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables variance
+ * skewness - Finds skewness of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables skewness
+ * kurtosis - Finds kurtosis of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables kurtosis
+ * num_zeros - Finds the count of zeros in a column
+
+ * is_enabled - (Boolean) Enables or disables num_zeros
+ * num_negatives - Finds the count of negative numbers in a column
+
+ * is_enabled - (Boolean) Enables or disables num_negatives
+ * bias_correction - Applies bias correction to variance, skewness, and kurtosis calculations
+
+ * is_enabled - (Boolean) Enables or disables bias correction
+ * histogram_and_quantiles - Generates a histogram and quantiles
+ from the column values
+
+ * bin_count_or_method - (String/List[String]) Designates preferred method for calculating histogram bins or the number of bins to use.
+ If left unspecified (None) the optimal method will be chosen by attempting all methods.
+ If multiple specified (list) the optimal method will be chosen by attempting the provided ones.
+ methods: 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'
+ Note: 'auto' is used to choose optimally between 'fd' and 'sturges'
+ * is_enabled - (Boolean) Enables or disables histogram and quantiles
+ * **float** - Options for the float columns
+
+ * is_enabled - (Boolean) Enables or disables the float operations
+ * precision - Finds the precision (significant figures) within the column
+
+ * is_enabled - (Boolean) Enables or disables precision
+ * sample_ratio - (Float) The ratio of 0 to 1 how much data (identified as floats) to utilize as samples in determining precision
+
+ * min - Finds minimum value in a column
+
+ * is_enabled - (Boolean) Enables or disables min
+ * max - Finds maximum value in a column
+
+ * is_enabled - (Boolean) Enables or disables max
+ * sum - Finds sum of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables sum
+ * variance - Finds variance of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables variance
+ * skewness - Finds skewness of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables skewness
+ * kurtosis - Finds kurtosis of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables kurtosis
+ * num_zeros - Finds the count of zeros in a column
+
+ * is_enabled - (Boolean) Enables or disables num_zeros
+ * num_negatives - Finds the count of negative numbers in a column
+
+ * is_enabled - (Boolean) Enables or disables num_negatives
+ * bias_correction - Applies bias correction to variance, skewness, and kurtosis calculations
+
+ * is_enabled - (Boolean) Enables or disables bias correction
+ * histogram_and_quantiles - Generates a histogram and quantiles
+ from the column values
+
+ * bin_count_or_method - (String/List[String]) Designates preferred method for calculating histogram bins or the number of bins to use.
+ If left unspecified (None) the optimal method will be chosen by attempting all methods.
+ If multiple specified (list) the optimal method will be chosen by attempting the provided ones.
+ methods: 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'
+ Note: 'auto' is used to choose optimally between 'fd' and 'sturges'
+ * is_enabled - (Boolean) Enables or disables histogram and quantiles
+ * **text** - Options for the text columns
+
+ * is_enabled - (Boolean) Enables or disables the text operations
+ * vocab - Finds all the unique characters used in a column
+
+ * is_enabled - (Boolean) Enables or disables vocab
+ * min - Finds minimum value in a column
+
+ * is_enabled - (Boolean) Enables or disables min
+ * max - Finds maximum value in a column
+
+ * is_enabled - (Boolean) Enables or disables max
+ * sum - Finds sum of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables sum
+ * variance - Finds variance of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables variance
+ * skewness - Finds skewness of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables skewness
+ * kurtosis - Finds kurtosis of all values in a column
+
+ * is_enabled - (Boolean) Enables or disables kurtosis
+ * bias_correction - Applies bias correction to variance, skewness, and kurtosis calculations
+
+ * is_enabled - (Boolean) Enables or disables bias correction
+ * histogram_and_quantiles - Generates a histogram and quantiles
+ from the column values
+
+ * bin_count_or_method - (String/List[String]) Designates preferred method for calculating histogram bins or the number of bins to use.
+ If left unspecified (None) the optimal method will be chosen by attempting all methods.
+ If multiple specified (list) the optimal method will be chosen by attempting the provided ones.
+ methods: 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'
+ Note: 'auto' is used to choose optimally between 'fd' and 'sturges'
+ * is_enabled - (Boolean) Enables or disables histogram and quantiles
+ * **datetime** - Options for the datetime columns
+
+ * is_enabled - (Boolean) Enables or disables the datetime operations
+ * **order** - Options for the order columns
+
+ * is_enabled - (Boolean) Enables or disables the order operations
+ * **category** - Options for the category columns
+
+ * is_enabled - (Boolean) Enables or disables the category operations
+ * **data_labeler** - Options for the data labeler columns
+
+ * is_enabled - (Boolean) Enables or disables the data labeler operations
+ * data_labeler_dirpath - (String) Directory path to data labeler
+ * data_labeler_object - (BaseDataLabeler) Datalabeler to replace
+ the default labeler
+ * max_sample_size - (Int) The max number of samples for the data
+ labeler
+ * **unstructured_options** - Options responsible for all unstructured data
+
+ * **text** - Options for the text profile
+
+ * is_case_sensitive - (Boolean) Specify whether the profile is case sensitive
+ * stop_words - (List of Strings) List of stop words to be removed when profiling
+ * top_k_chars - (Int) Number of top characters to be retrieved when profiling
+ * top_k_words - (Int) Number of top words to be retrieved when profiling
+ * vocab - Options for vocab count
+
+ * is_enabled - (Boolean) Enables or disables the vocab stats
+ * words - Options for word count
+
+ * is_enabled - (Boolean) Enables or disables the word stats
+ * **data_labeler** - Options for the data labeler
+
+ * is_enabled - (Boolean) Enables or disables the data labeler operations
+ * data_labeler_dirpath - (String) Directory path to data labeler
+ * data_labeler_object - (BaseDataLabeler) Datalabeler to replace
+ the default labeler
+ * max_sample_size - (Int) The max number of samples for the data
+ labeler
+
+
+
+Statistical Dependency on Order of Updates
+==========================================
+
+Some profile features/statistics are dependent on the order in which the profiler
+is updated with new data.
+
+Order Profile
+~~~~~~~~~~~~~
+
+The order profiler utilizes the last value in the previous data batch to ensure
+the subsequent dataset is above/below/equal to that value when predicting
+non-random order.
+
+For instance, a dataset to be predicted as ascending would require the following
+batch data update to be ascending and its first value `>=` than that of the
+previous batch of data.
+
+Ex. of ascending:
+
+.. code-block:: python
+
+ batch_1 = [0, 1, 2]
+ batch_2 = [3, 4, 5]
+
+Ex. of random:
+
+.. code-block:: python
+
+ batch_1 = [0, 1, 2]
+ batch_2 = [1, 2, 3] # notice how the first value is less than the last value in the previous batch
+
+
+Reporting Structure
+===================
+
+For every profile, we can provide a report and customize it with a couple optional parameters:
+
+* output_format (string)
+
+ * This will allow the user to decide the output format for report.
+
+ * Options are one of [pretty, compact, serializable, flat]:
+
+ * Pretty: floats are rounded to four decimal places, and lists are shortened.
+ * Compact: Similar to pretty, but removes detailed statistics such as runtimes, label probabilities, index locations of null types, etc.
+ * Serializable: Output is json serializable and not prettified
+ * Flat: Nested output is returned as a flattened dictionary
+* num_quantile_groups (int)
+
+ * You can sample your data as you like! With a minimum of one and a maximum of 1000, you can decide the number of quantile groups!
+
+.. code-block:: python
+
+ report = profile.report(report_options={"output_format": "pretty"})
+ report = profile.report(report_options={"output_format": "compact"})
+ report = profile.report(report_options={"output_format": "serializable"})
+ report = profile.report(report_options={"output_format": "flat"})
+
diff --git a/docs/0.7.1/html/_sources/profiler_example.nblink.txt b/docs/0.7.1/html/_sources/profiler_example.nblink.txt
new file mode 100644
index 000000000..8b1612784
--- /dev/null
+++ b/docs/0.7.1/html/_sources/profiler_example.nblink.txt
@@ -0,0 +1,3 @@
+{
+ "path": "../../feature_branch/examples/structured_profilers.ipynb"
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_sources/unstructured_profiler_example.nblink.txt b/docs/0.7.1/html/_sources/unstructured_profiler_example.nblink.txt
new file mode 100644
index 000000000..1589c41d4
--- /dev/null
+++ b/docs/0.7.1/html/_sources/unstructured_profiler_example.nblink.txt
@@ -0,0 +1,3 @@
+{
+ "path": "../../feature_branch/examples/unstructured_profilers.ipynb"
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_static/DataProfilerLogoLightTheme.png b/docs/0.7.1/html/_static/DataProfilerLogoLightTheme.png
new file mode 100644
index 000000000..35e59c349
Binary files /dev/null and b/docs/0.7.1/html/_static/DataProfilerLogoLightTheme.png differ
diff --git a/docs/0.7.1/html/_static/basic.css b/docs/0.7.1/html/_static/basic.css
new file mode 100644
index 000000000..912859b55
--- /dev/null
+++ b/docs/0.7.1/html/_static/basic.css
@@ -0,0 +1,904 @@
+/*
+ * basic.css
+ * ~~~~~~~~~
+ *
+ * Sphinx stylesheet -- basic theme.
+ *
+ * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/* -- main layout ----------------------------------------------------------- */
+
+div.clearer {
+ clear: both;
+}
+
+div.section::after {
+ display: block;
+ content: '';
+ clear: left;
+}
+
+/* -- relbar ---------------------------------------------------------------- */
+
+div.related {
+ width: 100%;
+ font-size: 90%;
+}
+
+div.related h3 {
+ display: none;
+}
+
+div.related ul {
+ margin: 0;
+ padding: 0 0 0 10px;
+ list-style: none;
+}
+
+div.related li {
+ display: inline;
+}
+
+div.related li.right {
+ float: right;
+ margin-right: 5px;
+}
+
+/* -- sidebar --------------------------------------------------------------- */
+
+div.sphinxsidebarwrapper {
+ padding: 10px 5px 0 10px;
+}
+
+div.sphinxsidebar {
+ float: left;
+ width: 230px;
+ margin-left: -100%;
+ font-size: 90%;
+ word-wrap: break-word;
+ overflow-wrap : break-word;
+}
+
+div.sphinxsidebar ul {
+ list-style: none;
+}
+
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points {
+ margin-left: 20px;
+ list-style: square;
+}
+
+div.sphinxsidebar ul ul {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+div.sphinxsidebar form {
+ margin-top: 10px;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar #searchbox form.search {
+ overflow: hidden;
+}
+
+div.sphinxsidebar #searchbox input[type="text"] {
+ float: left;
+ width: 80%;
+ padding: 0.25em;
+ box-sizing: border-box;
+}
+
+div.sphinxsidebar #searchbox input[type="submit"] {
+ float: left;
+ width: 20%;
+ border-left: none;
+ padding: 0.25em;
+ box-sizing: border-box;
+}
+
+
+img {
+ border: 0;
+ max-width: 100%;
+}
+
+/* -- search page ----------------------------------------------------------- */
+
+ul.search {
+ margin: 10px 0 0 20px;
+ padding: 0;
+}
+
+ul.search li {
+ padding: 5px 0 5px 20px;
+ background-image: url(file.png);
+ background-repeat: no-repeat;
+ background-position: 0 7px;
+}
+
+ul.search li a {
+ font-weight: bold;
+}
+
+ul.search li p.context {
+ color: #888;
+ margin: 2px 0 0 30px;
+ text-align: left;
+}
+
+ul.keywordmatches li.goodmatch a {
+ font-weight: bold;
+}
+
+/* -- index page ------------------------------------------------------------ */
+
+table.contentstable {
+ width: 90%;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+table.contentstable p.biglink {
+ line-height: 150%;
+}
+
+a.biglink {
+ font-size: 1.3em;
+}
+
+span.linkdescr {
+ font-style: italic;
+ padding-top: 5px;
+ font-size: 90%;
+}
+
+/* -- general index --------------------------------------------------------- */
+
+table.indextable {
+ width: 100%;
+}
+
+table.indextable td {
+ text-align: left;
+ vertical-align: top;
+}
+
+table.indextable ul {
+ margin-top: 0;
+ margin-bottom: 0;
+ list-style-type: none;
+}
+
+table.indextable > tbody > tr > td > ul {
+ padding-left: 0em;
+}
+
+table.indextable tr.pcap {
+ height: 10px;
+}
+
+table.indextable tr.cap {
+ margin-top: 10px;
+ background-color: #f2f2f2;
+}
+
+img.toggler {
+ margin-right: 3px;
+ margin-top: 3px;
+ cursor: pointer;
+}
+
+div.modindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+div.genindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+/* -- domain module index --------------------------------------------------- */
+
+table.modindextable td {
+ padding: 2px;
+ border-collapse: collapse;
+}
+
+/* -- general body styles --------------------------------------------------- */
+
+div.body {
+ min-width: 450px;
+ max-width: 800px;
+}
+
+div.body p, div.body dd, div.body li, div.body blockquote {
+ -moz-hyphens: auto;
+ -ms-hyphens: auto;
+ -webkit-hyphens: auto;
+ hyphens: auto;
+}
+
+a.headerlink {
+ visibility: hidden;
+}
+
+a.brackets:before,
+span.brackets > a:before{
+ content: "[";
+}
+
+a.brackets:after,
+span.brackets > a:after {
+ content: "]";
+}
+
+h1:hover > a.headerlink,
+h2:hover > a.headerlink,
+h3:hover > a.headerlink,
+h4:hover > a.headerlink,
+h5:hover > a.headerlink,
+h6:hover > a.headerlink,
+dt:hover > a.headerlink,
+caption:hover > a.headerlink,
+p.caption:hover > a.headerlink,
+div.code-block-caption:hover > a.headerlink {
+ visibility: visible;
+}
+
+div.body p.caption {
+ text-align: inherit;
+}
+
+div.body td {
+ text-align: left;
+}
+
+.first {
+ margin-top: 0 !important;
+}
+
+p.rubric {
+ margin-top: 30px;
+ font-weight: bold;
+}
+
+img.align-left, figure.align-left, .figure.align-left, object.align-left {
+ clear: left;
+ float: left;
+ margin-right: 1em;
+}
+
+img.align-right, figure.align-right, .figure.align-right, object.align-right {
+ clear: right;
+ float: right;
+ margin-left: 1em;
+}
+
+img.align-center, figure.align-center, .figure.align-center, object.align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+img.align-default, figure.align-default, .figure.align-default {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.align-left {
+ text-align: left;
+}
+
+.align-center {
+ text-align: center;
+}
+
+.align-default {
+ text-align: center;
+}
+
+.align-right {
+ text-align: right;
+}
+
+/* -- sidebars -------------------------------------------------------------- */
+
+div.sidebar,
+aside.sidebar {
+ margin: 0 0 0.5em 1em;
+ border: 1px solid #ddb;
+ padding: 7px;
+ background-color: #ffe;
+ width: 40%;
+ float: right;
+ clear: right;
+ overflow-x: auto;
+}
+
+p.sidebar-title {
+ font-weight: bold;
+}
+
+div.admonition, div.topic, blockquote {
+ clear: left;
+}
+
+/* -- topics ---------------------------------------------------------------- */
+
+div.topic {
+ border: 1px solid #ccc;
+ padding: 7px;
+ margin: 10px 0 10px 0;
+}
+
+p.topic-title {
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 10px;
+}
+
+/* -- admonitions ----------------------------------------------------------- */
+
+div.admonition {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ padding: 7px;
+}
+
+div.admonition dt {
+ font-weight: bold;
+}
+
+p.admonition-title {
+ margin: 0px 10px 5px 0px;
+ font-weight: bold;
+}
+
+div.body p.centered {
+ text-align: center;
+ margin-top: 25px;
+}
+
+/* -- content of sidebars/topics/admonitions -------------------------------- */
+
+div.sidebar > :last-child,
+aside.sidebar > :last-child,
+div.topic > :last-child,
+div.admonition > :last-child {
+ margin-bottom: 0;
+}
+
+div.sidebar::after,
+aside.sidebar::after,
+div.topic::after,
+div.admonition::after,
+blockquote::after {
+ display: block;
+ content: '';
+ clear: both;
+}
+
+/* -- tables ---------------------------------------------------------------- */
+
+table.docutils {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ border: 0;
+ border-collapse: collapse;
+}
+
+table.align-center {
+ margin-left: auto;
+ margin-right: auto;
+}
+
+table.align-default {
+ margin-left: auto;
+ margin-right: auto;
+}
+
+table caption span.caption-number {
+ font-style: italic;
+}
+
+table caption span.caption-text {
+}
+
+table.docutils td, table.docutils th {
+ padding: 1px 8px 1px 5px;
+ border-top: 0;
+ border-left: 0;
+ border-right: 0;
+ border-bottom: 1px solid #aaa;
+}
+
+table.footnote td, table.footnote th {
+ border: 0 !important;
+}
+
+th {
+ text-align: left;
+ padding-right: 5px;
+}
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px;
+}
+
+table.citation td {
+ border-bottom: none;
+}
+
+th > :first-child,
+td > :first-child {
+ margin-top: 0px;
+}
+
+th > :last-child,
+td > :last-child {
+ margin-bottom: 0px;
+}
+
+/* -- figures --------------------------------------------------------------- */
+
+div.figure, figure {
+ margin: 0.5em;
+ padding: 0.5em;
+}
+
+div.figure p.caption, figcaption {
+ padding: 0.3em;
+}
+
+div.figure p.caption span.caption-number,
+figcaption span.caption-number {
+ font-style: italic;
+}
+
+div.figure p.caption span.caption-text,
+figcaption span.caption-text {
+}
+
+/* -- field list styles ----------------------------------------------------- */
+
+table.field-list td, table.field-list th {
+ border: 0 !important;
+}
+
+.field-list ul {
+ margin: 0;
+ padding-left: 1em;
+}
+
+.field-list p {
+ margin: 0;
+}
+
+.field-name {
+ -moz-hyphens: manual;
+ -ms-hyphens: manual;
+ -webkit-hyphens: manual;
+ hyphens: manual;
+}
+
+/* -- hlist styles ---------------------------------------------------------- */
+
+table.hlist {
+ margin: 1em 0;
+}
+
+table.hlist td {
+ vertical-align: top;
+}
+
+/* -- object description styles --------------------------------------------- */
+
+.sig {
+ font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+}
+
+.sig-name, code.descname {
+ background-color: transparent;
+ font-weight: bold;
+}
+
+.sig-name {
+ font-size: 1.1em;
+}
+
+code.descname {
+ font-size: 1.2em;
+}
+
+.sig-prename, code.descclassname {
+ background-color: transparent;
+}
+
+.optional {
+ font-size: 1.3em;
+}
+
+.sig-paren {
+ font-size: larger;
+}
+
+.sig-param.n {
+ font-style: italic;
+}
+
+/* C++ specific styling */
+
+.sig-inline.c-texpr,
+.sig-inline.cpp-texpr {
+ font-family: unset;
+}
+
+.sig.c .k, .sig.c .kt,
+.sig.cpp .k, .sig.cpp .kt {
+ color: #0033B3;
+}
+
+.sig.c .m,
+.sig.cpp .m {
+ color: #1750EB;
+}
+
+.sig.c .s, .sig.c .sc,
+.sig.cpp .s, .sig.cpp .sc {
+ color: #067D17;
+}
+
+
+/* -- other body styles ----------------------------------------------------- */
+
+ol.arabic {
+ list-style: decimal;
+}
+
+ol.loweralpha {
+ list-style: lower-alpha;
+}
+
+ol.upperalpha {
+ list-style: upper-alpha;
+}
+
+ol.lowerroman {
+ list-style: lower-roman;
+}
+
+ol.upperroman {
+ list-style: upper-roman;
+}
+
+:not(li) > ol > li:first-child > :first-child,
+:not(li) > ul > li:first-child > :first-child {
+ margin-top: 0px;
+}
+
+:not(li) > ol > li:last-child > :last-child,
+:not(li) > ul > li:last-child > :last-child {
+ margin-bottom: 0px;
+}
+
+ol.simple ol p,
+ol.simple ul p,
+ul.simple ol p,
+ul.simple ul p {
+ margin-top: 0;
+}
+
+ol.simple > li:not(:first-child) > p,
+ul.simple > li:not(:first-child) > p {
+ margin-top: 0;
+}
+
+ol.simple p,
+ul.simple p {
+ margin-bottom: 0;
+}
+
+dl.footnote > dt,
+dl.citation > dt {
+ float: left;
+ margin-right: 0.5em;
+}
+
+dl.footnote > dd,
+dl.citation > dd {
+ margin-bottom: 0em;
+}
+
+dl.footnote > dd:after,
+dl.citation > dd:after {
+ content: "";
+ clear: both;
+}
+
+dl.field-list {
+ display: grid;
+ grid-template-columns: fit-content(30%) auto;
+}
+
+dl.field-list > dt {
+ font-weight: bold;
+ word-break: break-word;
+ padding-left: 0.5em;
+ padding-right: 5px;
+}
+
+dl.field-list > dt:after {
+ content: ":";
+}
+
+dl.field-list > dd {
+ padding-left: 0.5em;
+ margin-top: 0em;
+ margin-left: 0em;
+ margin-bottom: 0em;
+}
+
+dl {
+ margin-bottom: 15px;
+}
+
+dd > :first-child {
+ margin-top: 0px;
+}
+
+dd ul, dd table {
+ margin-bottom: 10px;
+}
+
+dd {
+ margin-top: 3px;
+ margin-bottom: 10px;
+ margin-left: 30px;
+}
+
+dl > dd:last-child,
+dl > dd:last-child > :last-child {
+ margin-bottom: 0;
+}
+
+dt:target, span.highlighted {
+ background-color: #fbe54e;
+}
+
+rect.highlighted {
+ fill: #fbe54e;
+}
+
+dl.glossary dt {
+ font-weight: bold;
+ font-size: 1.1em;
+}
+
+.versionmodified {
+ font-style: italic;
+}
+
+.system-message {
+ background-color: #fda;
+ padding: 5px;
+ border: 3px solid red;
+}
+
+.footnote:target {
+ background-color: #ffa;
+}
+
+.line-block {
+ display: block;
+ margin-top: 1em;
+ margin-bottom: 1em;
+}
+
+.line-block .line-block {
+ margin-top: 0;
+ margin-bottom: 0;
+ margin-left: 1.5em;
+}
+
+.guilabel, .menuselection {
+ font-family: sans-serif;
+}
+
+.accelerator {
+ text-decoration: underline;
+}
+
+.classifier {
+ font-style: oblique;
+}
+
+.classifier:before {
+ font-style: normal;
+ margin: 0.5em;
+ content: ":";
+}
+
+abbr, acronym {
+ border-bottom: dotted 1px;
+ cursor: help;
+}
+
+/* -- code displays --------------------------------------------------------- */
+
+pre {
+ overflow: auto;
+ overflow-y: hidden; /* fixes display issues on Chrome browsers */
+}
+
+pre, div[class*="highlight-"] {
+ clear: both;
+}
+
+span.pre {
+ -moz-hyphens: none;
+ -ms-hyphens: none;
+ -webkit-hyphens: none;
+ hyphens: none;
+}
+
+div[class*="highlight-"] {
+ margin: 1em 0;
+}
+
+td.linenos pre {
+ border: 0;
+ background-color: transparent;
+ color: #aaa;
+}
+
+table.highlighttable {
+ display: block;
+}
+
+table.highlighttable tbody {
+ display: block;
+}
+
+table.highlighttable tr {
+ display: flex;
+}
+
+table.highlighttable td {
+ margin: 0;
+ padding: 0;
+}
+
+table.highlighttable td.linenos {
+ padding-right: 0.5em;
+}
+
+table.highlighttable td.code {
+ flex: 1;
+ overflow: hidden;
+}
+
+.highlight .hll {
+ display: block;
+}
+
+div.highlight pre,
+table.highlighttable pre {
+ margin: 0;
+}
+
+div.code-block-caption + div {
+ margin-top: 0;
+}
+
+div.code-block-caption {
+ margin-top: 1em;
+ padding: 2px 5px;
+ font-size: small;
+}
+
+div.code-block-caption code {
+ background-color: transparent;
+}
+
+table.highlighttable td.linenos,
+span.linenos,
+div.highlight span.gp { /* gp: Generic.Prompt */
+ user-select: none;
+ -webkit-user-select: text; /* Safari fallback only */
+ -webkit-user-select: none; /* Chrome/Safari */
+ -moz-user-select: none; /* Firefox */
+ -ms-user-select: none; /* IE10+ */
+}
+
+div.code-block-caption span.caption-number {
+ padding: 0.1em 0.3em;
+ font-style: italic;
+}
+
+div.code-block-caption span.caption-text {
+}
+
+div.literal-block-wrapper {
+ margin: 1em 0;
+}
+
+code.xref, a code {
+ background-color: transparent;
+ font-weight: bold;
+}
+
+h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
+ background-color: transparent;
+}
+
+.viewcode-link {
+ float: right;
+}
+
+.viewcode-back {
+ float: right;
+ font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+ margin: -1px -10px;
+ padding: 0 10px;
+}
+
+/* -- math display ---------------------------------------------------------- */
+
+img.math {
+ vertical-align: middle;
+}
+
+div.body div.math p {
+ text-align: center;
+}
+
+span.eqno {
+ float: right;
+}
+
+span.eqno a.headerlink {
+ position: absolute;
+ z-index: 1;
+}
+
+div.math:hover a.headerlink {
+ visibility: visible;
+}
+
+/* -- printout stylesheet --------------------------------------------------- */
+
+@media print {
+ div.document,
+ div.documentwrapper,
+ div.bodywrapper {
+ margin: 0 !important;
+ width: 100%;
+ }
+
+ div.sphinxsidebar,
+ div.related,
+ div.footer,
+ #top-link {
+ display: none;
+ }
+}
\ No newline at end of file
diff --git a/docs/0.7.1/html/_static/custom.css b/docs/0.7.1/html/_static/custom.css
new file mode 100644
index 000000000..8a7c7cb54
--- /dev/null
+++ b/docs/0.7.1/html/_static/custom.css
@@ -0,0 +1,50 @@
+/*
+ the ipython3 code blocks coming from the notebooks
+ were not getting the dark theme styles applied, so
+ manually overriding them
+*/
+@media (prefers-color-scheme: dark) {
+ .highlight-ipython3 {
+ border: none !important;
+ border-radius: 2px !important;
+ background: #202020 !important;
+ color: #d0d0d0 !important;
+ }
+}
+
+@media (prefers-color-scheme: dark) {
+ tr:nth-child(odd) {
+ background-color: #202020 !important;
+ }
+}
+
+@media (prefers-color-scheme: dark) {
+ .dataframe {
+ color: white !important;
+ }
+}
+
+.hidden {
+ display: none;
+}
+
+.version {
+ text-align: right;
+ font-size: 24px;
+ margin-top: -47px;
+ margin-right: 3px;
+}
+
+.sidebar-brand {
+ margin-bottom: -10px;
+ margin-top: 10px;
+}
+
+/* unknown warning was showing, manually hiding */
+#Visualizing-Logged-Dataframes .admonition.warning {
+ display: none;
+}
+
+div.output_area.stderr {
+ display: none;
+}
diff --git a/docs/0.7.1/html/_static/doctools.js b/docs/0.7.1/html/_static/doctools.js
new file mode 100644
index 000000000..8cbf1b161
--- /dev/null
+++ b/docs/0.7.1/html/_static/doctools.js
@@ -0,0 +1,323 @@
+/*
+ * doctools.js
+ * ~~~~~~~~~~~
+ *
+ * Sphinx JavaScript utilities for all documentation.
+ *
+ * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/**
+ * select a different prefix for underscore
+ */
+$u = _.noConflict();
+
+/**
+ * make the code below compatible with browsers without
+ * an installed firebug like debugger
+if (!window.console || !console.firebug) {
+ var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
+ "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
+ "profile", "profileEnd"];
+ window.console = {};
+ for (var i = 0; i < names.length; ++i)
+ window.console[names[i]] = function() {};
+}
+ */
+
+/**
+ * small helper function to urldecode strings
+ *
+ * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
+ */
+jQuery.urldecode = function(x) {
+ if (!x) {
+ return x
+ }
+ return decodeURIComponent(x.replace(/\+/g, ' '));
+};
+
+/**
+ * small helper function to urlencode strings
+ */
+jQuery.urlencode = encodeURIComponent;
+
+/**
+ * This function returns the parsed url parameters of the
+ * current request. Multiple values per key are supported,
+ * it will always return arrays of strings for the value parts.
+ */
+jQuery.getQueryParameters = function(s) {
+ if (typeof s === 'undefined')
+ s = document.location.search;
+ var parts = s.substr(s.indexOf('?') + 1).split('&');
+ var result = {};
+ for (var i = 0; i < parts.length; i++) {
+ var tmp = parts[i].split('=', 2);
+ var key = jQuery.urldecode(tmp[0]);
+ var value = jQuery.urldecode(tmp[1]);
+ if (key in result)
+ result[key].push(value);
+ else
+ result[key] = [value];
+ }
+ return result;
+};
+
+/**
+ * highlight a given string on a jquery object by wrapping it in
+ * span elements with the given class name.
+ */
+jQuery.fn.highlightText = function(text, className) {
+ function highlight(node, addItems) {
+ if (node.nodeType === 3) {
+ var val = node.nodeValue;
+ var pos = val.toLowerCase().indexOf(text);
+ if (pos >= 0 &&
+ !jQuery(node.parentNode).hasClass(className) &&
+ !jQuery(node.parentNode).hasClass("nohighlight")) {
+ var span;
+ var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
+ if (isInSVG) {
+ span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
+ } else {
+ span = document.createElement("span");
+ span.className = className;
+ }
+ span.appendChild(document.createTextNode(val.substr(pos, text.length)));
+ node.parentNode.insertBefore(span, node.parentNode.insertBefore(
+ document.createTextNode(val.substr(pos + text.length)),
+ node.nextSibling));
+ node.nodeValue = val.substr(0, pos);
+ if (isInSVG) {
+ var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
+ var bbox = node.parentElement.getBBox();
+ rect.x.baseVal.value = bbox.x;
+ rect.y.baseVal.value = bbox.y;
+ rect.width.baseVal.value = bbox.width;
+ rect.height.baseVal.value = bbox.height;
+ rect.setAttribute('class', className);
+ addItems.push({
+ "parent": node.parentNode,
+ "target": rect});
+ }
+ }
+ }
+ else if (!jQuery(node).is("button, select, textarea")) {
+ jQuery.each(node.childNodes, function() {
+ highlight(this, addItems);
+ });
+ }
+ }
+ var addItems = [];
+ var result = this.each(function() {
+ highlight(this, addItems);
+ });
+ for (var i = 0; i < addItems.length; ++i) {
+ jQuery(addItems[i].parent).before(addItems[i].target);
+ }
+ return result;
+};
+
+/*
+ * backward compatibility for jQuery.browser
+ * This will be supported until firefox bug is fixed.
+ */
+if (!jQuery.browser) {
+ jQuery.uaMatch = function(ua) {
+ ua = ua.toLowerCase();
+
+ var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
+ /(webkit)[ \/]([\w.]+)/.exec(ua) ||
+ /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
+ /(msie) ([\w.]+)/.exec(ua) ||
+ ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
+ [];
+
+ return {
+ browser: match[ 1 ] || "",
+ version: match[ 2 ] || "0"
+ };
+ };
+ jQuery.browser = {};
+ jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
+}
+
+/**
+ * Small JavaScript module for the documentation.
+ */
+var Documentation = {
+
+ init : function() {
+ this.fixFirefoxAnchorBug();
+ this.highlightSearchWords();
+ this.initIndexTable();
+ if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) {
+ this.initOnKeyListeners();
+ }
+ },
+
+ /**
+ * i18n support
+ */
+ TRANSLATIONS : {},
+ PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; },
+ LOCALE : 'unknown',
+
+ // gettext and ngettext don't access this so that the functions
+ // can safely bound to a different name (_ = Documentation.gettext)
+ gettext : function(string) {
+ var translated = Documentation.TRANSLATIONS[string];
+ if (typeof translated === 'undefined')
+ return string;
+ return (typeof translated === 'string') ? translated : translated[0];
+ },
+
+ ngettext : function(singular, plural, n) {
+ var translated = Documentation.TRANSLATIONS[singular];
+ if (typeof translated === 'undefined')
+ return (n == 1) ? singular : plural;
+ return translated[Documentation.PLURALEXPR(n)];
+ },
+
+ addTranslations : function(catalog) {
+ for (var key in catalog.messages)
+ this.TRANSLATIONS[key] = catalog.messages[key];
+ this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
+ this.LOCALE = catalog.locale;
+ },
+
+ /**
+ * add context elements like header anchor links
+ */
+ addContextElements : function() {
+ $('div[id] > :header:first').each(function() {
+ $('\u00B6').
+ attr('href', '#' + this.id).
+ attr('title', _('Permalink to this headline')).
+ appendTo(this);
+ });
+ $('dt[id]').each(function() {
+ $('\u00B6').
+ attr('href', '#' + this.id).
+ attr('title', _('Permalink to this definition')).
+ appendTo(this);
+ });
+ },
+
+ /**
+ * workaround a firefox stupidity
+ * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075
+ */
+ fixFirefoxAnchorBug : function() {
+ if (document.location.hash && $.browser.mozilla)
+ window.setTimeout(function() {
+ document.location.href += '';
+ }, 10);
+ },
+
+ /**
+ * highlight the search words provided in the url in the text
+ */
+ highlightSearchWords : function() {
+ var params = $.getQueryParameters();
+ var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
+ if (terms.length) {
+ var body = $('div.body');
+ if (!body.length) {
+ body = $('body');
+ }
+ window.setTimeout(function() {
+ $.each(terms, function() {
+ body.highlightText(this.toLowerCase(), 'highlighted');
+ });
+ }, 10);
+ $('