diff --git a/README.md b/README.md index 31776b9..f3551ae 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,11 @@ They will be populated with IDs that reference the new derived tables. csvcol:dbcol(TYPE),... --filename-column TEXT Add a column with this name and populate with CSV file name + --fixed-column ... Populate column with a fixed string + --fixed-column-int ... + Populate column with a fixed integer + --fixed-column-float ... + Populate column with a fixed float --no-index-fks Skip adding index to foreign key columns created using --extract-column (default is to add them) diff --git a/csvs_to_sqlite/cli.py b/csvs_to_sqlite/cli.py index 10b7d30..310f0f3 100644 --- a/csvs_to_sqlite/cli.py +++ b/csvs_to_sqlite/cli.py @@ -104,6 +104,30 @@ help="Add a column with this name and populate with CSV file name", default=None, ) +@click.option( + "fixed_columns", + "--fixed-column", + type=(str, str), + multiple=True, + help="Populate column with a fixed string", + default=None, +) +@click.option( + "fixed_columns_int", + "--fixed-column-int", + type=(str, int), + multiple=True, + help="Populate column with a fixed integer", + default=None, +) +@click.option( + "fixed_columns_float", + "--fixed-column-float", + type=(str, float), + multiple=True, + help="Populate column with a fixed float", + default=None, +) @click.option( "--no-index-fks", "no_index_fks", @@ -139,6 +163,9 @@ def cli( index, shape, filename_column, + fixed_columns, + fixed_columns_int, + fixed_columns_float, no_index_fks, no_fulltext_fks, just_strings, @@ -176,6 +203,21 @@ def cli( df[filename_column] = name if shape: shape += ",{}".format(filename_column) + if fixed_columns: + for colname, value in fixed_columns: + df[colname] = value + if shape: + shape += ",{}".format(colname) + if fixed_columns_int: + for colname, value in fixed_columns_int: + df[colname] = value + if shape: + shape += ",{}".format(colname) + if fixed_columns_float: + for colname, value in fixed_columns_float: + df[colname] = value + if shape: + shape += ",{}".format(colname) sql_type_overrides = apply_shape(df, shape) apply_dates_and_datetimes(df, date, datetime, datetime_format) dataframes.append(df) diff --git a/tests/test_csvs_to_sqlite.py b/tests/test_csvs_to_sqlite.py index e4c5c44..2d685ff 100644 --- a/tests/test_csvs_to_sqlite.py +++ b/tests/test_csvs_to_sqlite.py @@ -357,6 +357,88 @@ def test_filename_column_with_shape(): ).fetchall() +def test_fixed_column(): + """ + Tests that all three fixed_column options are handled correctly. + """ + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.csv", "w").write(CSV) + result = runner.invoke( + cli.cli, + [ + "test.csv", + "test.db", + "--fixed-column", + "col1", + "foo", + "--fixed-column", + "col2", + "bar", + "--fixed-column-int", + "col3", + "1", + "--fixed-column-float", + "col4", + "1.1" + ] + ) + assert result.exit_code == 0 + assert result.output.strip().endswith("Created test.db from 1 CSV file") + conn = sqlite3.connect("test.db") + assert [ + (0, "county", "TEXT", 0, None, 0), + (1, "precinct", "INTEGER", 0, None, 0), + (2, "office", "TEXT", 0, None, 0), + (3, "district", "INTEGER", 0, None, 0), + (4, "party", "TEXT", 0, None, 0), + (5, "candidate", "TEXT", 0, None, 0), + (6, "votes", "INTEGER", 0, None, 0), + (7, "col1", "TEXT", 0, None, 0), + (8, "col2", "TEXT", 0, None, 0), + (9, "col3", "INTEGER", 0, None, 0), + (10, "col4", "REAL", 0, None, 0), + ] == list(conn.execute("PRAGMA table_info(test)")) + rows = conn.execute("select * from test").fetchall() + assert [ + ("Yolo", 100001, "President", None, "LIB", "Gary Johnson", 41, "foo", "bar", 1, 1.1), + ("Yolo", 100001, "President", None, "PAF", "Gloria Estela La Riva", 8, "foo", "bar", 1, 1.1), + ("Yolo", 100001, "Proposition 51", None, None, "No", 398, "foo", "bar", 1, 1.1), + ("Yolo", 100001, "Proposition 51", None, None, "Yes", 460, "foo", "bar", 1, 1.1), + ("Yolo", 100001, "State Assembly", 7, "DEM", "Kevin McCarty", 572, "foo", "bar", 1, 1.1), + ("Yolo", 100001, "State Assembly", 7, "REP", "Ryan K. Brown", 291, "foo", "bar", 1, 1.1), + ] == rows + + +def test_fixed_column_with_shape(): + """ + Test that fixed_column works with shape. + """ + runner = CliRunner() + with runner.isolated_filesystem(): + open("test.csv", "w").write(CSV) + result = runner.invoke( + cli.cli, + [ + "test.csv", + "test.db", + "--fixed-column", + "col1", + "foo", + "--fixed-column", + "col2", + "bar", + "--shape", + "county:Cty,votes:Vts", + ], + ) + assert result.exit_code == 0 + conn = sqlite3.connect("test.db") + assert [("Yolo", 41, "foo", "bar")] == conn.execute( + "select Cty, Vts, col1, col2 from test limit 1" + ).fetchall() + + def test_shape_with_extract_columns(): runner = CliRunner() with runner.isolated_filesystem():