Skip to content

Commit

Permalink
New In-Db2 ML Demo Content (IBM#35)
Browse files Browse the repository at this point in the history
* ML Pipeline Deployment Demo
  • Loading branch information
adrianmahjour authored Mar 9, 2021
1 parent d9ee031 commit b208337
Show file tree
Hide file tree
Showing 8 changed files with 13,580 additions and 1 deletion.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file not shown.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "97aed183-c9b5-488c-958e-0a38f1a0e88e"
},
"source": [
"# Inserting Training and Test Data into a Db2 Table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "856ae9ae-ff30-4599-aa8a-2734cfca6d1b"
},
"outputs": [],
"source": [
"#Establishing Connection to Database\n",
"\n",
"import pandas as pd\n",
"import ibm_db_dbi\n",
"import ibm_db\n",
"from project_lib import Project \n",
"\n",
"# Define connection string and connect to database\n",
"project = Project.access()\n",
"LocalDB2_credentials = project.get_connection(name = \"CSSDB3\")\n",
"\n",
"bluedb_connection = ibm_db.connect(\"DATABASE={};HOSTNAME={};PORT={};PROTOCOL=TCPIP;UID={};PWD={}\".format(LocalDB2_credentials['database'],\n",
" LocalDB2_credentials['host'],\n",
" LocalDB2_credentials['port'],\n",
" LocalDB2_credentials['username'],\n",
" LocalDB2_credentials['password']),\"\",\"\")\n",
"dbi_bluedb_connection = ibm_db_dbi.Connection(bluedb_connection)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Insert Training Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "47cc7326-94cb-4b7e-98e0-efbf37c4973f"
},
"outputs": [],
"source": [
"#Reading in CSV file with raw data and preparing to write to DB2 \n",
"\n",
"train_data = pd.read_csv('/project_data/data_asset/customer_full_summary_latest.csv')\n",
"\n",
"train_data = test_data.where(pd.notnull(train_data),None)\n",
"\n",
"tuple_of_tuples = tuple([tuple(x) for x in train_data.values])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "19ae6527-d3f8-4622-b8e8-3acdcb617f3e"
},
"outputs": [],
"source": [
"#Creating string concatenations for sql query to write data \n",
"\n",
"type_mapping = {'int64': 'DOUBLE',\n",
" 'float64': 'DOUBLE',\n",
" 'object': 'VARCHAR(90)',\n",
" 'datetime64[ns]': 'VARCHAR(90)'\n",
" }\n",
"\n",
"value_string = ', '.join(['?' for i in range(len(train_data.columns))])\n",
"dtype_mapping_4table = [type_mapping[str(x)] for x in list(train_data.dtypes)]\n",
"table_creation_column_string = ', '.join('{0} {1} '.format(str(x),str(y)) for x,y in zip(train_data.columns,dtype_mapping_4table))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "48fde0cb-e315-4727-9f8a-11656d7deef2"
},
"outputs": [],
"source": [
"sql = 'CREATE TABLE DSE.CUST_SEG_DATA_TRAIN({});'.format(table_creation_column_string)\n",
"\n",
"stmt = ibm_db.prepare(bluedb_connection, sql)\n",
"\n",
"ibm_db.execute(stmt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "e6a427d2-dfd3-47ab-ac33-339b5025bc53"
},
"outputs": [],
"source": [
"value_string = ', '.join(['?' for i in range(len(train_data.columns))])\n",
"\n",
"sql = \"INSERT INTO DSE.CUST_SEG_DATA_TRAIN VALUES({})\".format(value_string)\n",
"\n",
"stmt = ibm_db.prepare(bluedb_connection, sql)\n",
"\n",
"ibm_db.execute_many(stmt, tuple_of_tuples)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eb225a5b-30c8-4e30-abc0-ae2dd7e8e4fa"
},
"outputs": [],
"source": [
"# Validate successful data import\n",
"input_df = pd.read_sql('select * from DSE.CUST_SEG_DATA_TRAIN',con = dbi_bluedb_connection)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3f87eb38-020b-4d04-9d7a-77040c9e6e71",
"scrolled": true
},
"outputs": [],
"source": [
"input_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cceef222-6910-4a37-8452-84acf385295b",
"scrolled": true
},
"outputs": [],
"source": [
"input_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Insert Test Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "47cc7326-94cb-4b7e-98e0-efbf37c4973f"
},
"outputs": [],
"source": [
"#Reading in CSV file with raw data and preparing to write to DB2 \n",
"\n",
"test_data = pd.read_csv('/project_data/data_asset/test_data_10K.csv')\n",
"\n",
"test_data = test_data.where(pd.notnull(test_data),None)\n",
"\n",
"tuple_of_tuples = tuple([tuple(x) for x in test_data.values])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "19ae6527-d3f8-4622-b8e8-3acdcb617f3e"
},
"outputs": [],
"source": [
"#Creating string concatenations for sql query to write data \n",
"\n",
"type_mapping = {'int64': 'DOUBLE',\n",
" 'float64': 'DOUBLE',\n",
" 'object': 'VARCHAR(90)',\n",
" 'datetime64[ns]': 'VARCHAR(90)'\n",
" }\n",
"\n",
"value_string = ', '.join(['?' for i in range(len(test_data.columns))])\n",
"dtype_mapping_4table = [type_mapping[str(x)] for x in list(test_data.dtypes)]\n",
"table_creation_column_string = ', '.join('{0} {1} '.format(str(x),str(y)) for x,y in zip(test_data.columns,dtype_mapping_4table))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "48fde0cb-e315-4727-9f8a-11656d7deef2"
},
"outputs": [],
"source": [
"sql = 'CREATE TABLE DSE.CUST_SEG_DATA_TEST({});'.format(table_creation_column_string)\n",
"\n",
"stmt = ibm_db.prepare(bluedb_connection, sql)\n",
"\n",
"ibm_db.execute(stmt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "e6a427d2-dfd3-47ab-ac33-339b5025bc53"
},
"outputs": [],
"source": [
"value_string = ', '.join(['?' for i in range(len(test_data.columns))])\n",
"\n",
"sql = \"INSERT INTO DSE.CUST_SEG_DATA_TEST VALUES({})\".format(value_string)\n",
"\n",
"stmt = ibm_db.prepare(bluedb_connection, sql)\n",
"\n",
"ibm_db.execute_many(stmt, tuple_of_tuples)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eb225a5b-30c8-4e30-abc0-ae2dd7e8e4fa"
},
"outputs": [],
"source": [
"# Validate successful data import\n",
"input_df = pd.read_sql('select * from DSE.CUST_SEG_DATA_TEST',con = dbi_bluedb_connection)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3f87eb38-020b-4d04-9d7a-77040c9e6e71"
},
"outputs": [],
"source": [
"input_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cceef222-6910-4a37-8452-84acf385295b"
},
"outputs": [],
"source": [
"input_df.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Loading

0 comments on commit b208337

Please sign in to comment.