-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* DRAFT: new datasets, analysis * data: add palmerspenguins data * restore analysis doc * fix: siuba data getattr should use AttributeError * feat(data): add band_members and related data * docs: fix siuba data readme * ci: update bigquery test versions
- Loading branch information
Showing
12 changed files
with
86 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
include siuba/data/*.csv | ||
include siuba/data/*.csv.gz | ||
include siuba/spec/series.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# siuba datasets | ||
|
||
| name | R package | | ||
| ---- | --------- | | ||
| mtcars | [datasets] | | ||
| penguins | [palmerspenguins] | | ||
| penguins_raw | [palmerspenguins] | | ||
| bandmembers | [dplyr] | | ||
| bandinstruments | [dplyr] | | ||
| bandinstruments2 | [dplyr] | | ||
|
||
[datasets]: https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html | ||
[dplyr]: https://dplyr.tidyverse.org/ | ||
[palmerspenguins]: https://github.com/allisonhorst/palmerpenguins/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,57 @@ | ||
import pandas as pd | ||
import pkg_resources | ||
__all__ = [ | ||
"mtcars", | ||
"cars", | ||
"penguins", | ||
"penguins_raw", | ||
"cars_sql", | ||
"band_members", | ||
"band_instruments", | ||
"band_instruments2", | ||
] | ||
|
||
# mtcars ---------------------------------------------------------------------- | ||
_fname = pkg_resources.resource_filename("siuba.data", "mtcars.csv") | ||
def __dir__(): | ||
return __all__ | ||
|
||
mtcars = pd.read_csv(_fname) | ||
mtcars.__doc__ = """ | ||
mtcars data. | ||
|
||
Source: Henderson and Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391–411. | ||
def _load_data_csv_gz(name): | ||
import pandas as pd | ||
import pkg_resources | ||
|
||
--- Original DataFrame docs below --- | ||
""" + mtcars.__doc__ | ||
fname = pkg_resources.resource_filename("siuba.data", f"{name}.csv.gz") | ||
return pd.read_csv(fname) | ||
|
||
|
||
# cars ------------------------------------------------------------------------ | ||
cars = mtcars[["cyl", "mpg", "hp"]] | ||
def _load_data_csv(name): | ||
import pandas as pd | ||
import pkg_resources | ||
|
||
fname = pkg_resources.resource_filename("siuba.data", f"{name}.csv") | ||
return pd.read_csv(fname) | ||
|
||
# cars_sql -------------------------------------------------------------------- | ||
import siuba.sql.utils as _sql_utils | ||
from siuba.sql import LazyTbl as _LazyTbl | ||
cars_sql = _LazyTbl( | ||
_sql_utils.mock_sqlalchemy_engine("postgresql"), | ||
"cars", | ||
["cyl", "mpg", "hp"] | ||
) | ||
|
||
def _load_data_cars_sql(): | ||
import siuba.sql.utils as _sql_utils | ||
from siuba.sql import LazyTbl as _LazyTbl | ||
cars_sql = _LazyTbl( | ||
_sql_utils.mock_sqlalchemy_engine("postgresql"), | ||
"cars", | ||
["cyl", "mpg", "hp"] | ||
) | ||
|
||
|
||
def __getattr__(name): | ||
if name not in __all__: | ||
raise AttributeError(f"No dataset named: {name}") | ||
|
||
if name == "cars": | ||
return _load_data_csv_gz("mtcars")[["cyl", "mpg", "hp"]] | ||
|
||
elif name == "cars_sql": | ||
return _load_data_cars_sql() | ||
|
||
elif name in {"band_members", "band_instruments", "band_instruments2"}: | ||
return _load_data_csv(name) | ||
|
||
return _load_data_csv_gz(name) | ||
|
||
# cars_sql -------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
name,plays | ||
John,guitar | ||
Paul,bass | ||
Keith,guitar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
artist,plays | ||
John,guitar | ||
Paul,bass | ||
Keith,guitar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
name,band | ||
Mick,Stones | ||
John,Beatles | ||
Paul,Beatles |
This file was deleted.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import importlib | ||
|
||
def test_data_imports(): | ||
import siuba.data | ||
from siuba.data import __all__ | ||
|
||
# note that we can't do import * inside a function, so programmatically fetch | ||
# each dataset | ||
for entry in __all__: | ||
getattr(siuba.data, entry) |