Skip to content

Commit

Permalink
Making orjson optional. Adding PrefixOrSuffixOperator
Browse files Browse the repository at this point in the history
  • Loading branch information
seperman committed Feb 6, 2023
1 parent fdb08d4 commit df0c984
Show file tree
Hide file tree
Showing 14 changed files with 139 additions and 27 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ If you want to use DeepDiff from commandline:

`pip install "deepdiff[cli]"`

If you want to improve the performance of DeepDiff with certain processes such as json serialization:

`pip install "deepdiff[optimize]"`

### Importing

```python
Expand Down
13 changes: 12 additions & 1 deletion deepdiff/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
from deepdiff import Delta, DeepSearch, extract as deep_extract
from deepdiff.serialization import load_path_content, save_content_to_path

try:
import orjson
except ImportError:
orjson = None


@click.group()
def cli():
Expand Down Expand Up @@ -105,7 +110,13 @@ def diff(
# printing into stdout
sys.stdout.buffer.write(delta.dumps())
else:
pprint(diff, indent=2)
try:
if orjson:
print(diff.to_json(option=orjson.OPT_INDENT_2))
else:
print(diff.to_json(indent=2))
except Exception:
pprint(diff, indent=2)


@cli.command()
Expand Down
11 changes: 11 additions & 0 deletions deepdiff/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,14 @@ def match(self, level) -> bool:

def give_up_diffing(self, level, diff_instance) -> bool:
raise NotImplementedError('Please implement the diff function.')


class PrefixOrSuffixOperator:

def match(self, level) -> bool:
return level.t1 and level.t2 and isinstance(level.t1, str) and isinstance(level.t2, str)

def give_up_diffing(self, level, diff_instance) -> bool:
t1 = level.t1
t2 = level.t2
return t1.startswith(t2) or t2.startswith(t1)
24 changes: 15 additions & 9 deletions deepdiff/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import io
import os
import json
import orjson
import uuid
import logging
import re # NOQA
Expand All @@ -26,6 +25,11 @@
except ImportError: # pragma: no cover.
import csv
clevercsv = None # pragma: no cover.
try:
import orjson
except ImportError: # pragma: no cover.
orjson = None

from copy import deepcopy
from functools import partial
from collections.abc import Mapping
Expand Down Expand Up @@ -556,15 +560,17 @@ def object_hook(self, obj):
def json_dumps(item, default_mapping=None, **kwargs):
"""
Dump json with extra details that are not normally json serializable
Note: I tried to replace json with orjson for its speed. It does work
but the output it makes is a byte object and Postgres couldn't directly use it without
encoding to str. So I switched back to json.
"""
return orjson.dumps(
item,
default=json_convertor_default(default_mapping=default_mapping),
**kwargs).decode(encoding='utf-8')
if orjson:
return orjson.dumps(
item,
default=json_convertor_default(default_mapping=default_mapping),
**kwargs).decode(encoding='utf-8')
else:
return json.dumps(
item,
default=json_convertor_default(default_mapping=default_mapping),
**kwargs)


json_loads = partial(json.loads, cls=JSONDecoder)
50 changes: 45 additions & 5 deletions docs/custom.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,21 +128,61 @@ For example you could use the level object to further determine if the 2 objects
Custom Operators
----------------

Whether two objects are different or not are largely depend on the context. For example, apple and banana are the same
Whether two objects are different or not largely depends on the context. For example, apples and bananas are the same
if you are considering whether they are fruits or not.

In that case, you can pass a *custom_operators* for the job.

In fact, custom operators give you a lot of power. In the following examples we explore use cases from making DeepDiff
report the L2 Distance of items, to only include certain paths in diffing all the way to making DeepDiff stop diffing
as soon as the first diff is reported.
Custom operators give you a lot of power. In the following examples, we explore various use cases such as:

- Making DeepDiff report the L2 Distance of items
- Only include specific paths in diffing
- Making DeepDiff stop diffing once we find the first diff.

You can use one of the predefined custom operators that come with DeepDiff. Or you can define one yourself.


Built-In Custom Operators


PrefixOrSuffixOperator
......................


This operator will skip strings that are suffix or prefix of each other.

For example when this operator is used, the two strings of "joe" and "joe's car" will not be reported as different.

>>> from deepdiff import DeepDiff
>>> from deepdiff.operator import PrefixOrSuffixOperator
>>> t1 = {
... "key1": ["foo", "bar's food", "jack", "joe"]
... }
>>> t2 = {
... "key1": ["foo", "bar", "jill", "joe'car"]
... }
>>>
>>> DeepDiff(t1, t2)
{'values_changed': {"root['key1'][1]": {'new_value': 'bar', 'old_value': "bar's food"}, "root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}, "root['key1'][3]": {'new_value': "joe'car", 'old_value': 'joe'}}}
>>> DeepDiff(t1, t2, custom_operators=[
... PrefixOrSuffixOperator()
... ])
>>>
{'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}




Define A Custom Operator
------------------------


To define an custom operator, you just need to inherit a *BaseOperator* and

* implement a give_up_diffing method
* give_up_diffing(level: DiffLevel, diff_instance: DeepDiff) -> boolean

If it returns True, then we will give up diffing the 2 objects.
If it returns True, then we will give up diffing the tow objects.
You may or may not use the diff_instance.custom_report_result within this function
to report any diff. If you decide not to report anything, and this
function returns True, then the objects are basically skipped in the results.
Expand Down
4 changes: 4 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ If you want to use DeepDiff from commandline::

pip install "deepdiff[cli]"

If you want to improve the performance of DeepDiff with certain processes such as json serialization::

pip install "deepdiff[optimize]"

Read about DeepDiff optimizations at :ref:`optimizations_label`

Importing
Expand Down
9 changes: 9 additions & 0 deletions docs/optimizations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ Optimizations
If you are dealing with large nested objects and ignore_order=True, chances are DeepDiff takes a while to calculate the diff. Here are some tips that may help you with optimizations and progress report.


Optimized Libraries
-------------------

If you dump DeepDiff or Delta objects as json, you can improve the performance by installing orjson.
DeepDiff will automatically use orjson instead of Python's built-in json library to do json serialization.

pip install "deepdiff[optimize]"


Max Passes
----------

Expand Down
2 changes: 1 addition & 1 deletion requirements-dev-3.7.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
wheel==0.38.1
-r requirements.txt
-r requirements-cli.txt
bump2version==1.0.1
Expand All @@ -8,3 +7,4 @@ numpy==1.21.6
pytest==7.1.2
python-dotenv==0.20.0
python-dateutil==2.8.2
wheel==0.38.1
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
wheel==0.38.1
-r requirements.txt
-r requirements-cli.txt
bump2version==1.0.1
Expand All @@ -14,3 +13,5 @@ Sphinx==5.3.0
sphinx-sitemap==2.2.1
flake8==6.0.0
python-dateutil==2.8.2
orjson==3.8.3
wheel==0.38.1
1 change: 1 addition & 0 deletions requirements-optimize.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
orjson
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
ordered-set>=4.0.2,<4.2.0
orjson
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def get_reqs(filename):

reqs = get_reqs("requirements.txt")
cli_reqs = get_reqs("requirements-cli.txt")
optimize_reqs = get_reqs("requirements-optimize.txt")

with open('README.md') as file:
long_description = file.read()
Expand All @@ -45,6 +46,7 @@ def get_reqs(filename):
python_requires='>=3.7',
extras_require={
"cli": cli_reqs,
"optimize": optimize_reqs,
},
classifiers=[
"Intended Audience :: Developers",
Expand All @@ -54,6 +56,7 @@ def get_reqs(filename):
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: Implementation :: PyPy",
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License"
Expand Down
16 changes: 8 additions & 8 deletions tests/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
class TestCommands:

@pytest.mark.parametrize('t1, t2, expected_in_stdout, expected_exit_code', [
('t1.json', 't2.json', "'dictionary_item_added\': [root[0]", 0),
('t1.json', 't2.json', '"dictionary_item_added": [\n "root[0]', 0),
('t1_corrupt.json', 't2.json', "Expecting property name enclosed in double quotes", 1),
('t1.json', 't2_json.csv', "'old_value\': \'value2\'", 0),
('t2_json.csv', 't1.json', "'old_value\': \'value3\'", 0),
('t1.csv', 't2.csv', "\'new_value\': \'James\'", 0),
('t1.json', 't2_json.csv', '"old_value": "value2"', 0),
('t2_json.csv', 't1.json', '"old_value": "value3"', 0),
('t1.csv', 't2.csv', '"new_value": "James"', 0),
('t1.toml', 't2.toml', "10.0.0.2", 0),
('t1.pickle', 't2.pickle', "'new_value': 5, 'old_value': 1", 0),
('t1.yaml', 't2.yaml', "'new_value': 61, 'old_value': 65", 0),
('t1.pickle', 't2.pickle', '"new_value": 5,\n "old_value": 1', 0),
('t1.yaml', 't2.yaml', '"new_value": 61,\n "old_value": 65', 0),
])
def test_diff_command(self, t1, t2, expected_in_stdout, expected_exit_code):
t1 = os.path.join(FIXTURES_DIR, t1)
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_command_group_by(self):
diffed = runner.invoke(diff, [t1, t2, '--group-by', 'id'])
assert 0 == diffed.exit_code
assert 'values_changed' in diffed.output
assert '\'new_value\': \'Chicken\'' in diffed.output
assert '"new_value": "Chicken"' in diffed.output

def test_command_math_epsilon(self):
t1 = os.path.join(FIXTURES_DIR, 'd_t1.yaml')
Expand All @@ -86,7 +86,7 @@ def test_command_math_epsilon(self):

diffed2 = runner.invoke(diff, [t1, t2, '--math-epsilon', '0.001'])
assert 0 == diffed2.exit_code
assert "{'values_changed': {'root[2][2]': {'new_value': 0.289, 'old_value': 0.288}}}\n" == diffed2.output
assert '{\n "values_changed": {\n "root[2][2]": {\n "new_value": 0.289,\n "old_value": 0.288\n }\n }\n}\n' == diffed2.output

def test_command_grep(self):
path = os.path.join(FIXTURES_DIR, 'd_t1.yaml')
Expand Down
25 changes: 24 additions & 1 deletion tests/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import List
from deepdiff import DeepDiff
from deepdiff.operator import BaseOperator
from deepdiff.operator import BaseOperator, PrefixOrSuffixOperator


class TestOperators:
Expand Down Expand Up @@ -217,3 +217,26 @@ def give_up_diffing(self, level, diff_instance) -> bool:

expected = {'values_changed': {'root[0][1]': {'new_value': 3, 'old_value': 2}}}
assert expected == ddiff

def test_prefix_or_suffix_diff(self):

t1 = {
"key1": ["foo", "bar's food", "jack", "joe"]
}
t2 = {
"key1": ["foo", "bar", "jill", "joe'car"]
}

ddiff = DeepDiff(t1, t2, custom_operators=[
PrefixOrSuffixOperator()
])

expected = {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}
assert expected == ddiff

ddiff2 = DeepDiff(t1, t2, ignore_order=True, custom_operators=[
PrefixOrSuffixOperator()
])

expected2 = {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}
assert expected2 == ddiff2

0 comments on commit df0c984

Please sign in to comment.