Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add "fixed-column" option #81

Merged
merged 12 commits into from
Nov 18, 2021
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ They will be populated with IDs that reference the new derived tables.
csvcol:dbcol(TYPE),...
--filename-column TEXT Add a column with this name and populate with
CSV file name
--fixed-column <TEXT TEXT>... Populate column with a fixed string
--fixed-column-int <TEXT INTEGER>...
Populate column with a fixed integer
--fixed-column-float <TEXT FLOAT>...
Populate column with a fixed float
--no-index-fks Skip adding index to foreign key columns
created using --extract-column (default is to
add them)
Expand Down
42 changes: 42 additions & 0 deletions csvs_to_sqlite/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,30 @@
help="Add a column with this name and populate with CSV file name",
default=None,
)
@click.option(
"fixed_columns",
"--fixed-column",
type=(str, str),
multiple=True,
help="Populate column with a fixed string",
default=None,
)
@click.option(
"fixed_columns_int",
"--fixed-column-int",
type=(str, int),
multiple=True,
help="Populate column with a fixed integer",
default=None,
)
@click.option(
"fixed_columns_float",
"--fixed-column-float",
type=(str, float),
multiple=True,
help="Populate column with a fixed float",
default=None,
)
@click.option(
"--no-index-fks",
"no_index_fks",
Expand Down Expand Up @@ -139,6 +163,9 @@ def cli(
index,
shape,
filename_column,
fixed_columns,
fixed_columns_int,
fixed_columns_float,
no_index_fks,
no_fulltext_fks,
just_strings,
Expand Down Expand Up @@ -176,6 +203,21 @@ def cli(
df[filename_column] = name
if shape:
shape += ",{}".format(filename_column)
if fixed_columns:
for colname, value in fixed_columns:
df[colname] = value
if shape:
shape += ",{}".format(colname)
if fixed_columns_int:
for colname, value in fixed_columns_int:
df[colname] = value
if shape:
shape += ",{}".format(colname)
if fixed_columns_float:
for colname, value in fixed_columns_float:
df[colname] = value
if shape:
shape += ",{}".format(colname)
sql_type_overrides = apply_shape(df, shape)
apply_dates_and_datetimes(df, date, datetime, datetime_format)
dataframes.append(df)
Expand Down
82 changes: 82 additions & 0 deletions tests/test_csvs_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,88 @@ def test_filename_column_with_shape():
).fetchall()


def test_fixed_column():
"""
Tests that all three fixed_column options are handled correctly.
"""
runner = CliRunner()
with runner.isolated_filesystem():
open("test.csv", "w").write(CSV)
result = runner.invoke(
cli.cli,
[
"test.csv",
"test.db",
"--fixed-column",
"col1",
"foo",
"--fixed-column",
"col2",
"bar",
"--fixed-column-int",
"col3",
"1",
"--fixed-column-float",
"col4",
"1.1"
]
)
assert result.exit_code == 0
assert result.output.strip().endswith("Created test.db from 1 CSV file")
conn = sqlite3.connect("test.db")
assert [
(0, "county", "TEXT", 0, None, 0),
(1, "precinct", "INTEGER", 0, None, 0),
(2, "office", "TEXT", 0, None, 0),
(3, "district", "INTEGER", 0, None, 0),
(4, "party", "TEXT", 0, None, 0),
(5, "candidate", "TEXT", 0, None, 0),
(6, "votes", "INTEGER", 0, None, 0),
(7, "col1", "TEXT", 0, None, 0),
(8, "col2", "TEXT", 0, None, 0),
(9, "col3", "INTEGER", 0, None, 0),
(10, "col4", "REAL", 0, None, 0),
] == list(conn.execute("PRAGMA table_info(test)"))
rows = conn.execute("select * from test").fetchall()
assert [
("Yolo", 100001, "President", None, "LIB", "Gary Johnson", 41, "foo", "bar", 1, 1.1),
("Yolo", 100001, "President", None, "PAF", "Gloria Estela La Riva", 8, "foo", "bar", 1, 1.1),
("Yolo", 100001, "Proposition 51", None, None, "No", 398, "foo", "bar", 1, 1.1),
("Yolo", 100001, "Proposition 51", None, None, "Yes", 460, "foo", "bar", 1, 1.1),
("Yolo", 100001, "State Assembly", 7, "DEM", "Kevin McCarty", 572, "foo", "bar", 1, 1.1),
("Yolo", 100001, "State Assembly", 7, "REP", "Ryan K. Brown", 291, "foo", "bar", 1, 1.1),
] == rows


def test_fixed_column_with_shape():
"""
Test that fixed_column works with shape.
"""
runner = CliRunner()
with runner.isolated_filesystem():
open("test.csv", "w").write(CSV)
result = runner.invoke(
cli.cli,
[
"test.csv",
"test.db",
"--fixed-column",
"col1",
"foo",
"--fixed-column",
"col2",
"bar",
"--shape",
"county:Cty,votes:Vts",
],
)
assert result.exit_code == 0
conn = sqlite3.connect("test.db")
assert [("Yolo", 41, "foo", "bar")] == conn.execute(
"select Cty, Vts, col1, col2 from test limit 1"
).fetchall()


def test_shape_with_extract_columns():
runner = CliRunner()
with runner.isolated_filesystem():
Expand Down