Skip to content

Commit

Permalink
sqlite-utils bulk command
Browse files Browse the repository at this point in the history
* sqlite-utils bulk command, closes #375
* Refactor import_options and insert_upsert_options, refs #377
* Tests for sqlite-utils bulk, refs #377
* Documentation for sqlite-utils bulk, refs #377
  • Loading branch information
simonw authored Jan 11, 2022
1 parent 1b84c17 commit 1291415
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 43 deletions.
30 changes: 30 additions & 0 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,36 @@ The command will fail if you reference columns that do not exist on the table. T
.. note::
``upsert`` in sqlite-utils 1.x worked like ``insert ... --replace`` does in 2.x. See `issue #66 <https://github.com/simonw/sqlite-utils/issues/66>`__ for details of this change.


.. _cli_bulk:

Executing SQL in bulk
=====================

If you have a JSON, newline-delimited JSON, CSV or TSV file you can execute a bulk SQL query using each of the records in that file using the ``sqlite-utils bulk`` command.

The command takes the database file, the SQL to be executed and the file containing records to be used when evaluating the SQL query.

The SQL query should include ``:named`` parameters that match the keys in the records.

For example, given a ``chickens.csv`` CSV file containing the following::

id,name
1,Blue
2,Snowy
3,Azi
4,Lila
5,Suna
6,Cardi

You could insert those rows into a pre-created ``chickens`` table like so::

$ sqlite-utils bulk chickens.db \
'insert into chickens (id, name) values (:id, :name)' \
chickens.csv --csv

This command takes the same options as the ``sqlite-utils insert`` command - so it defaults to expecting JSON but can accept other formats using ``--csv`` or ``--tsv`` or ``--nl`` or other options described above.

.. _cli_insert_files:

Inserting data from files
Expand Down
167 changes: 124 additions & 43 deletions sqlite_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,50 @@ def reset_counts(path, load_extension):
db.reset_counts()


_import_options = (
click.option(
"--flatten",
is_flag=True,
help='Flatten nested JSON objects, so {"a": {"b": 1}} becomes {"a_b": 1}',
),
click.option("--nl", is_flag=True, help="Expect newline-delimited JSON"),
click.option("-c", "--csv", is_flag=True, help="Expect CSV input"),
click.option("--tsv", is_flag=True, help="Expect TSV input"),
click.option(
"--lines",
is_flag=True,
help="Treat each line as a single value called 'line'",
),
click.option(
"--text",
is_flag=True,
help="Treat input as a single value called 'text'",
),
click.option("--convert", help="Python code to convert each item"),
click.option(
"--import",
"imports",
type=str,
multiple=True,
help="Python modules to import",
),
click.option("--delimiter", help="Delimiter to use for CSV files"),
click.option("--quotechar", help="Quote character to use for CSV/TSV"),
click.option("--sniff", is_flag=True, help="Detect delimiter and quote character"),
click.option("--no-headers", is_flag=True, help="CSV file has no header row"),
click.option(
"--encoding",
help="Character encoding for input, defaults to utf-8",
),
)


def import_options(fn):
for decorator in reversed(_import_options):
fn = decorator(fn)
return fn


def insert_upsert_options(fn):
for decorator in reversed(
(
Expand All @@ -673,40 +717,9 @@ def insert_upsert_options(fn):
click.option(
"--pk", help="Columns to use as the primary key, e.g. id", multiple=True
),
click.option(
"--flatten",
is_flag=True,
help='Flatten nested JSON objects, so {"a": {"b": 1}} becomes {"a_b": 1}',
),
click.option("--nl", is_flag=True, help="Expect newline-delimited JSON"),
click.option("-c", "--csv", is_flag=True, help="Expect CSV input"),
click.option("--tsv", is_flag=True, help="Expect TSV input"),
click.option(
"--lines",
is_flag=True,
help="Treat each line as a single value called 'line'",
),
click.option(
"--text",
is_flag=True,
help="Treat input as a single value called 'text'",
),
click.option("--convert", help="Python code to convert each item"),
click.option(
"--import",
"imports",
type=str,
multiple=True,
help="Python modules to import",
),
click.option("--delimiter", help="Delimiter to use for CSV files"),
click.option("--quotechar", help="Quote character to use for CSV/TSV"),
click.option(
"--sniff", is_flag=True, help="Detect delimiter and quote character"
),
click.option(
"--no-headers", is_flag=True, help="CSV file has no header row"
),
)
+ _import_options
+ (
click.option(
"--batch-size", type=int, default=100, help="Commit every X records"
),
Expand All @@ -726,10 +739,6 @@ def insert_upsert_options(fn):
type=(str, str),
help="Default value that should be set for a column",
),
click.option(
"--encoding",
help="Character encoding for input, defaults to utf-8",
),
click.option(
"-d",
"--detect-types",
Expand Down Expand Up @@ -767,6 +776,7 @@ def insert_upsert_implementation(
quotechar,
sniff,
no_headers,
encoding,
batch_size,
alter,
upsert,
Expand All @@ -775,11 +785,11 @@ def insert_upsert_implementation(
truncate=False,
not_null=None,
default=None,
encoding=None,
detect_types=None,
analyze=False,
load_extension=None,
silent=False,
bulk_sql=None,
):
db = sqlite_utils.Database(path)
_load_extensions(db, load_extension)
Expand Down Expand Up @@ -886,6 +896,12 @@ def insert_upsert_implementation(
# Apply {"$base64": true, ...} decoding, if needed
docs = (decode_base64_values(doc) for doc in docs)

# For bulk_sql= we use cursor.executemany() instead
if bulk_sql:
with db.conn:
db.conn.cursor().executemany(bulk_sql, docs)
return

try:
db[table].insert_all(
docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs
Expand Down Expand Up @@ -968,9 +984,9 @@ def insert(
quotechar,
sniff,
no_headers,
encoding,
batch_size,
alter,
encoding,
detect_types,
analyze,
load_extension,
Expand Down Expand Up @@ -1020,13 +1036,13 @@ def insert(
quotechar,
sniff,
no_headers,
encoding,
batch_size,
alter=alter,
upsert=False,
ignore=ignore,
replace=replace,
truncate=truncate,
encoding=encoding,
detect_types=detect_types,
analyze=analyze,
load_extension=load_extension,
Expand Down Expand Up @@ -1058,10 +1074,10 @@ def upsert(
quotechar,
sniff,
no_headers,
encoding,
alter,
not_null,
default,
encoding,
detect_types,
analyze,
load_extension,
Expand Down Expand Up @@ -1090,12 +1106,12 @@ def upsert(
quotechar,
sniff,
no_headers,
encoding,
batch_size,
alter=alter,
upsert=True,
not_null=not_null,
default=default,
encoding=encoding,
detect_types=detect_types,
analyze=analyze,
load_extension=load_extension,
Expand All @@ -1105,6 +1121,71 @@ def upsert(
raise click.ClickException(UNICODE_ERROR.format(ex))


@cli.command()
@click.argument(
"path",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
required=True,
)
@click.argument("sql")
@click.argument("file", type=click.File("rb"), required=True)
@import_options
@load_extension_option
def bulk(
path,
file,
sql,
flatten,
nl,
csv,
tsv,
lines,
text,
convert,
imports,
delimiter,
quotechar,
sniff,
no_headers,
encoding,
load_extension,
):
"""
Execute parameterized SQL against the provided list of documents.
"""
try:
insert_upsert_implementation(
path=path,
table=None,
file=file,
pk=None,
flatten=flatten,
nl=nl,
csv=csv,
tsv=tsv,
lines=lines,
text=text,
convert=convert,
imports=imports,
delimiter=delimiter,
quotechar=quotechar,
sniff=sniff,
no_headers=no_headers,
encoding=encoding,
batch_size=1,
alter=False,
upsert=False,
not_null=set(),
default={},
detect_types=False,
load_extension=load_extension,
silent=False,
bulk_sql=sql,
)
except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
raise click.ClickException(str(e))


@cli.command(name="create-database")
@click.argument(
"path",
Expand Down
57 changes: 57 additions & 0 deletions tests/test_cli_bulk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from click.testing import CliRunner
from sqlite_utils import cli, Database
import pathlib
import pytest


@pytest.fixture
def test_db_and_path(tmpdir):
db_path = str(pathlib.Path(tmpdir) / "data.db")
db = Database(db_path)
db["example"].insert_all(
[
{"id": 1, "name": "One"},
{"id": 2, "name": "Two"},
],
pk="id",
)
return db, db_path


def test_cli_bulk(test_db_and_path):
db, db_path = test_db_and_path
result = CliRunner().invoke(
cli.cli,
[
"bulk",
db_path,
"insert into example (id, name) values (:id, :name)",
"-",
"--nl",
],
input='{"id": 3, "name": "Three"}\n{"id": 4, "name": "Four"}\n',
)
assert result.exit_code == 0, result.output
assert [
{"id": 1, "name": "One"},
{"id": 2, "name": "Two"},
{"id": 3, "name": "Three"},
{"id": 4, "name": "Four"},
] == list(db["example"].rows)


def test_cli_bulk_error(test_db_and_path):
_, db_path = test_db_and_path
result = CliRunner().invoke(
cli.cli,
[
"bulk",
db_path,
"insert into example (id, name) value (:id, :name)",
"-",
"--nl",
],
input='{"id": 3, "name": "Three"}',
)
assert result.exit_code == 1
assert result.output == 'Error: near "value": syntax error\n'

0 comments on commit 1291415

Please sign in to comment.