Skip to content

Commit

Permalink
3 ant 1 (#9)
Browse files Browse the repository at this point in the history
* #3 - V4 code snippets

* #3 - Updates

* #3 - Updates
  • Loading branch information
antaenc authored Jan 6, 2025
1 parent 56cc344 commit 4853af6
Show file tree
Hide file tree
Showing 62 changed files with 369,642 additions and 65 deletions.
5 changes: 3 additions & 2 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
[pylint]
disable=
line-too-long,
good-names=
code-snippets-v4
missing-function-docstring,
missing-module-docstring,
protected-access
ignore=
__init__.py,
notes=
Expand Down
26 changes: 26 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"terminal.integrated.env.linux": {
"PYTHONPATH": ".:/home/ant/Work/Senzing/git/sz-sdk-python/src:/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing:/home/ant/Work/Senzing/git/sz-sdk-python-core/src:/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing",
// "PYTHONPATH": ".:/home/ant/Work/Senzing/git/sz-sdk-python-core/src:/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing:./sz_tools",
"LD_LIBRARY_PATH": "/opt/senzing/er/lib/",
// "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"RESOURCEPATH\":\"/opt/senzing/er/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}}"
"SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"RESOURCEPATH\":\"/opt/senzing/er/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}, \"LOGGING\":{\"CONFIG\":\"console://stdout/?style=jsonl *.TRCE;*.CRIT;*.ERR\"}}"
// "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"LICENSESTRINGBASE64\": \"AQAAADgCAAAAAAAAU2VuemluZyBJbnRlcm5hbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAU2VuemluZyBJbnRlcm5hbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIwMjQtMDUtMDIAAAAAAAAAAAAARVZBTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFNUQU5EQVJEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC4mH8AAEBCDwAAAAAAMjAyNS0wNS0wMgAAAAAAAAAAAABZRUFSTFkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFXwDOdVd1TL+0dJRXnE9ykJJyJYnGhUN1QqoS8ASfNaDioankisRviWuB3I5uQ20EEh9tjNzzOszGf1+khWl5cb+XqE+GoMMW0rrSi6ScZmgrfh2oHrRpEbnfb4uejMrl3XGdTPdHUGNSkTKDgEQrlimVt04W5gsFVcBHBiUbKoZCghI+qaYGocsNZLh1yWOklu8Fh02CWkXXQZSKvq/PsXpkHtbsxPbActcMmZRYPZNiRXq0BK3ChyCRM0zbl4mZCPBfNL9zAx6v2HLUmDp4lNEVIyS86T9/enSrsK1udnJq09jnP8gBzY6kBxpoYyxr5o2u1VX3DC9ySHiwtio6NQMo0ckGultNqYpSBejXm10YCYH6eCsnnC5z49Gp+2NYIRcgRz/N93uLd7PrkLyLreayF8HCQOg7CBZeUGcFsufdf0304eJHCsoRy1w2dUT8N2auYJxuzjwzAMvZIYrYamjiG6Mc4Wdcpuktlcht+pjhqk9vwqQI0AzjMq2oXDGYL6KlFcOAojAIZu8bl30pZGGkq2n9NFuuO4gMiRjIwYkBpwHNmBq3QT21owPb4urlidmQelmXtzk9+BNMZL34bUK7R509Rt3GTmjb2c5TDqyIatGfnBsh3658ce8ohnBJ/ZmUgJifcorgLDawDqr8spClKfwLtcwzbkNPDKHJ/e\",\"RESOURCEPATH\":\"/opt/senzing/g2/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}}"
},
"python.testing.pytestArgs": [],
"python.autoComplete.extraPaths": [
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src",
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing",
"/home/ant/Work/Senzing/git/sz-sdk-python/src",
"/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing"
],
"python.analysis.extraPaths": [
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src",
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing",
"/home/ant/Work/Senzing/git/sz-sdk-python/src",
"/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing"
],
"pylint.importStrategy": "useBundled",
}
6 changes: 2 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog], [markdownlint],
and this project adheres to [Semantic Versioning].

## [1.0.0] - 2024-11-28
### Added to 0.0.1

### Added to 1.0.0

- Initial
- Initial for V4

[Keep a Changelog]: https://keepachangelog.com/en/1.0.0/
[markdownlint]: https://dlaa.me/markdownlint/
Expand Down
72 changes: 13 additions & 59 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ To find the Senzing API V3 version of this repository, visit [code-snippets-v3].

## Overview

Succinct examples of how you might use the Senzing APIs for operational tasks.
Succinct examples of how you might use the Senzing SDK for operational tasks.

## Contents

Expand Down Expand Up @@ -63,15 +63,15 @@ The JSON configuration string is set via the environment variable `SENZING_ENGIN

## Senzing APIs Bare Metal Usage

You may already have installed the Senzing APIs and created a Senzing project by following the [Quickstart Guide]. If not, and you would like to install the Senzing APIs directly on a machine, follow the steps in the [Quickstart Guide]. Be sure to review the API [Quickstart Roadmap], especially the [System Requirements].
You may already have installed the Senzing and created a Senzing project by following the [Quickstart Guide]. If not, and you would like to install Senzing directly on a machine, follow the steps in the [Quickstart Guide]. Be sure to review the [Quickstart Roadmap], especially the [System Requirements].

### Configuration

When using a bare metal install, the initialization parameters used by the Senzing Python utilities are maintained within `<project_path>/etc/G2Module.ini`.

🤔To convert an existing Senzing project G2Module.ini file to a JSON string use one of the following methods:

- [G2ModuleIniToJson.py]
- [g2_module_ini_to_json.py]

- Modify the path to your projects G2Module.ini file.

Expand All @@ -87,12 +87,6 @@ When using a bare metal install, the initialization parameters used by the Senzi
python3 -c $'import configparser; ini_file_name = "<project_path>/etc/G2Module.ini";engine_config_json = {};cfgp = configparser.ConfigParser();cfgp.optionxform = str;cfgp.read(ini_file_name)\nfor section in cfgp.sections(): engine_config_json[section] = dict(cfgp.items(section))\nprint(engine_config_json)'
```

- [SenzingGo.py]

- ```console
<project_path>/python/SenzingGo.py --iniToJson
```

:pencil2: `<project_path>` in the above example should point to your project.

### Usage
Expand Down Expand Up @@ -120,57 +114,17 @@ The included Dockerfile leverages the [Senzing API runtime] image to provide an

### Configuration for Docker usage

When used with a container, the JSON configuration is relative to the paths within the container. The JSON configuration should look like:

```json
{
"PIPELINE": {
"CONFIGPATH": "/etc/opt/senzing",
"RESOURCEPATH": "/opt/senzing/g2/resources",
"SUPPORTPATH": "/opt/senzing/data"
},
"SQL": {
"CONNECTION": "postgresql://senzing:password@myhost:5432:g2"
}
}
```

✏️You only need to modify the `CONNECTION` string to point to your Senzing database.
Coming soon...

### Usage for Dccker usage

1. Clone this repository
1. Export the engine configuration environment variable

```console
export SENZING_ENGINE_CONFIGURATION_JSON='{"PIPELINE": {"CONFIGPATH": "/etc/opt/senzing", "RESOURCEPATH": "/opt/senzing/g2/resources", "SUPPORTPATH": "/opt/senzing/data"}, "SQL": {"CONNECTION": "postgresql://user:password@host:5432:g2"}}'
```

1. Build the Docker image

```console
cd <repository_dir>
docker build --tag senzing/code-snippets-v4 .
```

1. Run a container

```console
docker run \
--env SENZING_ENGINE_CONFIGURATION_JSON \
--interactive \
--tty \
--rm \
senzing/code-snippets-v4
```
### Usage for Docker

✏️You only need to modify the `CONNECTION` string to point to your Senzing database.
Coming soon...

## Items of Note

### With Info
### SZ_WITH_INFO flag

A feature of Senzing is the capability to pass changes from data manipulation API calls to downstream systems for analysis, consolidation and replication. Any API that can change the outcome of entity resolution have a "WithInfo" version of the API. For example, addRecord and addRecordWithInfo. The "WithInfo" version of the API returns a response message detailing any entities that were affected by the API. In the following example (from addRecordWithInfo) a single entity with the ID 7903 was affected.
A feature of Senzing is the capability to pass changes from data manipulation SDK calls to downstream systems for analysis, consolidation and replication. SDK methods `add_record()`, `delete_record()` and `process_redo_record()` accept a `flags=` argument that when set to SzEngineFlags.SZ_WITH_INFO will return a response message detailing any entities affected by the method. In the following example (from `add_record("TEST", "10945", flags=SzEngineFlags.SZ_WITH_INFO)` a single entity with the ID 7903 was affected.

```json
{
Expand All @@ -190,7 +144,7 @@ The AFFECTED_ENTITIES object contains a list of all entity IDs affected. Separat

### Parallel Processing

Many of the example tasks demonstrate concurrent execution with threads. The entity resolution process involves IO operations, the use of concurrent processes and threads when calling the Senzing APIs provides scalability and performance. If using multiple processes, each process should have its own instance of a Senzing engine, for example G2Engine. Each engine object can support multiple threads.
Many of the example tasks demonstrate concurrent execution with threads. The entity resolution process involves IO operations, the use of concurrent processes and threads when calling the Senzing APIs provides scalability and performance.

### Scalability

Expand All @@ -212,19 +166,19 @@ To run the same example again and see representative performance, first [purge]

### Input Load File Sizes

There are different sized load files within the [Data] path that can be used to decrease or increase the volume of data loaded depending on the specification of your hardware. The files are named loadx.json, where the x specifies the number of records in the file.
There are different sized load files within the [data] path that can be used to increase the volume of data loaded depending on the specification of your hardware. Note, Senzing V4 comes with a default license that allows up to 500 source records to be loaded, without a larger license you will not be able to load these larger files.

[code-snippets-v3]: https://github.com/Senzing/code-snippets-v3
[Configuration]: #configuration
[Data]: Resources/Data/
[data]: resources/data/
[Docker Usage]: #docker-usage
[G2ModuleIniToJson.py]: Python/Tasks/Initialization/
[g2_module_ini_to_json.py]: python/initialization/g2_module_ini_to_json.py
[Input Load File Sizes]: #input-load-file-sizes
[Items of Note]: #items-of-note
[jc]: https://github.com/kellyjonbrazil/jc
[Legend]: #legend
[Parallel Processing]: #parallel-processing
[purge]: Python/Tasks/Initialization/PurgeRepository.py
[purge]: python/initialization/purge_repository.py
[Purging Senzing Repository Between Examples]: #purging-senzing-repository-between-examples
[Quickstart Guide]: https://senzing.zendesk.com/hc/en-us/articles/115002408867-Quickstart-Guide
[Quickstart Roadmap]: https://senzing.zendesk.com/hc/en-us/articles/115001579954-API-Quickstart-Roadmap
Expand Down
55 changes: 55 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
[build-system]
requires = [
"setuptools>=42",
"wheel"
]
build-backend = "setuptools.build_meta"

[dependency-groups]
test = ["pytest", "coverage"]

[tool.bandit]
skips = ["B101"]

[tool.black]
line-length = 120

[tool.flake8]
extend-ignore = ["E203", "E501", "E704", "W503"]
max-line-length = 120

[tool.isort]
profile = "black"
src_paths = ["examples", "src", "tests"]

[[tool.mypy.overrides]]
module = "senzing_abstract.szengineflags.*"
ignore_missing_imports = "true"
warn_unused_ignores = "false"

[[tool.mypy.overrides]]
module = "pytest_schema.*"
ignore_missing_imports = "true"

[tool.pylint]
ignored-argument-names = "args|kwargs"
disable = [
"broad-except",
"consider-using-f-string",
"line-too-long",
"missing-function-docstring",
"missing-module-docstring",
"protected-access",
"too-many-branches",
"too-many-locals",
]
good-names = [
"template-python"
]
ignore = [
"__init__.py",
"docs/source/conf.py"
]
notes = [
"FIXME"
]
1 change: 1 addition & 0 deletions python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Python Snippets
14 changes: 14 additions & 0 deletions python/configuration/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Deleting Data
The deletion snippets outline deleting previously added source records. Deleting source records removes the previously added source record from the system, completes the entity resolution process and persists outcomes in the Senzing repository.

Deleting a record only requires the data source code and record ID for the record to be deleted.

## Snippets
* **DeleteFutures.py**
* Read and delete source records from a file using multiple threads
* **DeleteLoop.py**
* Basic read and delete source records from a file
* **DeleteWithInfoFutures.py**
* Read and delete source records from a file using multiple threads
* Collect the response from the [with info](../../../README.md#with-info) version of the API and write it to a file

33 changes: 33 additions & 0 deletions python/configuration/add_data_sources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /usr/bin/env python3

import os
import sys
from pathlib import Path

from senzing_core import SzAbstractFactory, SzError

ENGINE_CONFIG_JSON = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", "{}")
INSTANCE_NAME = Path(__file__).stem


try:
sz_factory = SzAbstractFactory("add_records", ENGINE_CONFIG_JSON, verbose_logging=False)
sz_config = sz_factory.create_config()
sz_configmanager = sz_factory.create_configmanager()

config_id = sz_configmanager.get_default_config_id()
config_definition = sz_configmanager.get_config(config_id)
config_handle = sz_config.import_config(config_definition)

for data_source in ("CUSTOMERS", "REFERENCE", "WATCHLIST"):
response = sz_config.add_data_source(config_handle, data_source)

config_definition = sz_config.export_config(config_handle)
config_id = sz_configmanager.add_config(config_definition, INSTANCE_NAME)
sz_configmanager.set_default_config_id(config_id)

response2 = sz_config.get_data_sources(config_handle)
sz_config.close_config(config_handle)
print(response2)
except SzError as err:
print(f"{err.__class__.__name__} - {err}", file=sys.stderr)
15 changes: 15 additions & 0 deletions python/deleting/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Deleting Data

The deletion snippets outline deleting previously added source records. Deleting source records removes the previously added source record from the system, completes the entity resolution process and persists outcomes in the Senzing repository.

Deleting a record only requires the data source code and record ID for the record to be deleted.

## Snippets

- **delete_futures.py**
- Read and delete source records from a file using multiple threads
- **delete_loop.py**
- Basic read and delete source records from a file
- **delete_with_info_futures.py**
- Read and delete source records from a file using multiple threads
- Collect the response using the [SZ_WITH_INFO flag](../../README.md#with-info) on the `delete_record()` method and write it to a file
80 changes: 80 additions & 0 deletions python/deleting/delete_futures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#! /usr/bin/env python3

import concurrent.futures
import itertools
import json
import os
import sys
from pathlib import Path

from senzing_core import (
SzAbstractFactory,
SzBadInputError,
SzError,
SzRetryableError,
SzUnrecoverableError,
)

ENGINE_CONFIG_JSON = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", "{}")
INPUT_FILE = Path("../../resources/data/del-500.jsonl").resolve()
INSTANCE_NAME = Path(__file__).stem


def mock_logger(level, error, error_record=None):
print(f"\n{level}: {error.__class__.__name__} - {error}", file=sys.stderr)
if error_record:
print(f"{error_record}", file=sys.stderr)


def delete_record(engine, record_to_delete):
record_dict = json.loads(record_to_delete)
data_source = record_dict.get("DATA_SOURCE", "")
record_id = record_dict.get("RECORD_ID", "")
engine.delete_record(data_source, record_id)


def futures_del(engine, input_file):
success_recs = 0
error_recs = 0

with open(input_file, "r", encoding="utf-8") as in_file:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
executor.submit(delete_record, engine, record): record
for record in itertools.islice(in_file, executor._max_workers)
}

while futures:
done, _ = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
for f in done:
try:
f.result()
except (SzBadInputError, json.JSONDecodeError) as err:
mock_logger("ERROR", err, futures[f])
error_recs += 1
except SzRetryableError as err:
mock_logger("WARN", err, futures[f])
error_recs += 1
except (SzUnrecoverableError, SzError) as err:
mock_logger("CRITICAL", err, futures[f])
raise err
else:
record = in_file.readline()
if record:
futures[executor.submit(delete_record, engine, record)] = record

success_recs += 1
if success_recs % 100 == 0:
print(f"Processed {success_recs:,} adds, with {error_recs:,} errors", flush=True)
finally:
del futures[f]

print(f"\nSuccessfully deleted {success_recs:,} records, with" f" {error_recs:,} errors")


try:
sz_factory = SzAbstractFactory(INSTANCE_NAME, ENGINE_CONFIG_JSON, verbose_logging=False)
sz_engine = sz_factory.create_engine()
futures_del(sz_engine, INPUT_FILE)
except SzError as err:
mock_logger("CRITICAL", err)
Loading

0 comments on commit 4853af6

Please sign in to comment.