diff --git a/exchange_pairs.ipynb b/exchange_pairs.ipynb
index f866f56..d708f42 100644
--- a/exchange_pairs.ipynb
+++ b/exchange_pairs.ipynb
@@ -455,13 +455,6 @@
"df_keep"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -1837,44 +1830,90 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# 12/28"
+ "# 12/29"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "df_matched = pd.read_stata('sample_matched_demean.dta')\n",
- "df_unmatched = pd.read_stata('sample_unmatched_demean.dta')"
+ "# num is the # of random samples(or the # of simulations)\n",
+ "# ratio is the random sampling ratio, which is 10% here\n",
+ "def Simulation(num, ratio, df_matched, df_unmatched):\n",
+ " df_result = pd.DataFrame()\n",
+ " for i in range(0, num):\n",
+ " # df_matched_sample = Subsample(df_matched, ratio)\n",
+ " # df_unmatched_sample = Subsample(df_unmatched, ratio)\n",
+ " \n",
+ " loan_id = df_unmatched[\"loan_id\"].unique()\n",
+ " sample_loan_id = np.random.choice(loan_id, round(loan_id.shape[0] * ratio), replace = False)\n",
+ " df_unmatched_sample = df_unmatched.loc[df_unmatched[\"loan_id\"].isin(sample_loan_id)]\n",
+ " df_matched_sample = df_matched.loc[df_matched[\"loan_id\"].isin(sample_loan_id)]\n",
+ " \n",
+ " df_keep = Exchange_pairs(df_matched_sample, df_unmatched_sample)\n",
+ " bounds = [(1, 1.000000001), (-100, 100), (-100, 100), (-100, 100), (-100, 100)] # fix beta_1 = 1\n",
+ " result = differential_evolution(objectfunc, bounds)\n",
+ " df_result = df_result.append(pd.Series(result.x), ignore_index = True)\n",
+ " # print(i)\n",
+ " print(\"The 5% quantile of parameters are\")\n",
+ " print(df_result.quantile(0.05))\n",
+ " print(\"The 95% quantile of parameters are\")\n",
+ " print(df_result.quantile(0.95))\n",
+ " return df_result\n",
+ "\n",
+ "\n",
+ "t1 = time.time()\n",
+ "df_result = Simulation(10, 0.1, df_matched, df_unmatched)\n",
+ "t2 = time.time()\n",
+ "print(\"Simulation time: \", t2-t1)\n",
+ "df_result"
]
},
{
- "cell_type": "code",
- "execution_count": 31,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- "df_matched = df_matched[df_matched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- "\n",
- "df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- "df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2021].iloc[:,: 7] "
+ "# 1/3"
]
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 93,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:4312: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " errors=errors,\n"
+ ]
+ }
+ ],
"source": [
- "df_keep = Exchange_pairs(df_matched, df_unmatched)"
+ "df_sample = pd.read_stata('sample.dta')\n",
+ "df_matched = df_sample[df_sample[\"match\"] == 1]\n",
+ "df_unmatched = df_sample[df_sample[\"match\"] == 0]\n",
+ "\n",
+ "df_matched.drop([\"match\"], axis = 1, inplace = True) \n",
+ "df_unmatched.drop([\"match\"], axis = 1, inplace = True) \n",
+ "df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
+ "df_matched = df_matched[df_matched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
+ "\n",
+ "df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
+ "df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
+ "\n",
+ "df_exchange_pairs = Exchange_pairs(df_matched, df_unmatched)"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 253,
"metadata": {},
"outputs": [
{
@@ -1898,53 +1937,89 @@
" \n",
" \n",
" | \n",
- " value1 | \n",
- " value2 | \n",
- " value3 | \n",
- " value4 | \n",
- " value5 | \n",
+ " lender_id | \n",
+ " loan_id | \n",
+ " match | \n",
+ " var1 | \n",
+ " var2 | \n",
+ " var3 | \n",
+ " var4 | \n",
+ " var5 | \n",
+ " USPS_ZIP_PREF_CITY | \n",
+ " USPS_ZIP_PREF_STATE | \n",
+ " yearapproved | \n",
"
\n",
" \n",
"
\n",
" \n",
" 0 | \n",
+ " 1339.0 | \n",
+ " 27917.0 | \n",
" 0.0 | \n",
- " -0.062622 | \n",
- " 282140.000000 | \n",
- " -7.450581e-09 | \n",
" 0.0 | \n",
+ " 17.322744 | \n",
+ " 376543.375000 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 1 | \n",
+ " 1339.0 | \n",
+ " 97252.0 | \n",
" 0.0 | \n",
- " -134.978088 | \n",
- " -112856.000000 | \n",
- " 0.000000e+00 | \n",
" 0.0 | \n",
+ " 17.322744 | \n",
+ " 399935.531250 | \n",
+ " 0.091452 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 2 | \n",
+ " 1339.0 | \n",
+ " 78177.0 | \n",
" 0.0 | \n",
- " -57.051239 | \n",
- " 14934.007812 | \n",
- " 7.450581e-09 | \n",
" 0.0 | \n",
+ " 17.322744 | \n",
+ " 376543.375000 | \n",
+ " 0.091452 | \n",
+ " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 3 | \n",
+ " 3402.0 | \n",
+ " 78177.0 | \n",
" 0.0 | \n",
- " -11.757538 | \n",
- " 13364.000000 | \n",
- " 7.450581e-09 | \n",
" 0.0 | \n",
+ " 10.350215 | \n",
+ " 379006.218750 | \n",
+ " 0.091452 | \n",
+ " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 4 | \n",
+ " 3402.0 | \n",
+ " 27917.0 | \n",
" 0.0 | \n",
- " 6.960754 | \n",
- " 739.999023 | \n",
- " -7.450581e-09 | \n",
" 0.0 | \n",
+ " 10.350215 | \n",
+ " 379006.218750 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" ... | \n",
@@ -1953,125 +2028,130 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " 101681 | \n",
+ " 4117067 | \n",
+ " 1631.0 | \n",
+ " 52924.0 | \n",
" 0.0 | \n",
- " 6.236145 | \n",
- " 12312.000000 | \n",
- " 0.000000e+00 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
+ " 255598.812500 | \n",
+ " -0.139812 | \n",
+ " 0.045413 | \n",
+ " KETCHIKAN | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101682 | \n",
+ " 4117068 | \n",
+ " 3923.0 | \n",
+ " 34122.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
- " 14364.000000 | \n",
- " 0.000000e+00 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
+ " -32325.736328 | \n",
+ " 1.069300 | \n",
+ " -0.347326 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101683 | \n",
- " 0.0 | \n",
- " -162.729980 | \n",
- " 20072.031250 | \n",
- " 0.000000e+00 | \n",
+ " 4117069 | \n",
+ " 1631.0 | \n",
+ " 34122.0 | \n",
+ " 1.0 | \n",
" 0.0 | \n",
+ " 65.689995 | \n",
+ " 163466.203125 | \n",
+ " -0.139812 | \n",
+ " 0.045413 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101684 | \n",
- " 0.0 | \n",
- " -1.490173 | \n",
- " -3750.031250 | \n",
- " 0.000000e+00 | \n",
+ " 4117070 | \n",
+ " 1631.0 | \n",
+ " 34127.0 | \n",
+ " 1.0 | \n",
" 0.0 | \n",
+ " 61.713989 | \n",
+ " 370764.562500 | \n",
+ " -0.139812 | \n",
+ " 0.045413 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101685 | \n",
- " 0.0 | \n",
- " -1.490173 | \n",
- " -3750.031250 | \n",
- " 0.000000e+00 | \n",
+ " 4117071 | \n",
+ " 1631.0 | \n",
+ " 34131.0 | \n",
+ " 1.0 | \n",
" 0.0 | \n",
+ " 61.713989 | \n",
+ " 439864.031250 | \n",
+ " -0.139812 | \n",
+ " -0.070219 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
"\n",
- "101686 rows × 5 columns
\n",
+ "4117072 rows × 11 columns
\n",
""
],
"text/plain": [
- " value1 value2 value3 value4 value5\n",
- "0 0.0 -0.062622 282140.000000 -7.450581e-09 0.0\n",
- "1 0.0 -134.978088 -112856.000000 0.000000e+00 0.0\n",
- "2 0.0 -57.051239 14934.007812 7.450581e-09 0.0\n",
- "3 0.0 -11.757538 13364.000000 7.450581e-09 0.0\n",
- "4 0.0 6.960754 739.999023 -7.450581e-09 0.0\n",
- "... ... ... ... ... ...\n",
- "101681 0.0 6.236145 12312.000000 0.000000e+00 0.0\n",
- "101682 0.0 0.000000 14364.000000 0.000000e+00 0.0\n",
- "101683 0.0 -162.729980 20072.031250 0.000000e+00 0.0\n",
- "101684 0.0 -1.490173 -3750.031250 0.000000e+00 0.0\n",
- "101685 0.0 -1.490173 -3750.031250 0.000000e+00 0.0\n",
+ " lender_id loan_id match var1 var2 var3 var4 \\\n",
+ "0 1339.0 27917.0 0.0 0.0 17.322744 376543.375000 -0.024180 \n",
+ "1 1339.0 97252.0 0.0 0.0 17.322744 399935.531250 0.091452 \n",
+ "2 1339.0 78177.0 0.0 0.0 17.322744 376543.375000 0.091452 \n",
+ "3 3402.0 78177.0 0.0 0.0 10.350215 379006.218750 0.091452 \n",
+ "4 3402.0 27917.0 0.0 0.0 10.350215 379006.218750 -0.024180 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "4117067 1631.0 52924.0 0.0 0.0 0.000000 255598.812500 -0.139812 \n",
+ "4117068 3923.0 34122.0 0.0 0.0 0.000000 -32325.736328 1.069300 \n",
+ "4117069 1631.0 34122.0 1.0 0.0 65.689995 163466.203125 -0.139812 \n",
+ "4117070 1631.0 34127.0 1.0 0.0 61.713989 370764.562500 -0.139812 \n",
+ "4117071 1631.0 34131.0 1.0 0.0 61.713989 439864.031250 -0.139812 \n",
"\n",
- "[101686 rows x 5 columns]"
+ " var5 USPS_ZIP_PREF_CITY USPS_ZIP_PREF_STATE yearapproved \n",
+ "0 0.045413 AGAWAM MA 2020.0 \n",
+ "1 0.045413 AGAWAM MA 2020.0 \n",
+ "2 -0.070219 AGAWAM MA 2020.0 \n",
+ "3 -0.070219 AGAWAM MA 2020.0 \n",
+ "4 0.045413 AGAWAM MA 2020.0 \n",
+ "... ... ... ... ... \n",
+ "4117067 0.045413 KETCHIKAN AK 2020.0 \n",
+ "4117068 -0.347326 CRAIG AK 2020.0 \n",
+ "4117069 0.045413 CRAIG AK 2020.0 \n",
+ "4117070 0.045413 WRANGELL AK 2020.0 \n",
+ "4117071 -0.070219 WRANGELL AK 2020.0 \n",
+ "\n",
+ "[4117072 rows x 11 columns]"
]
},
- "execution_count": 33,
+ "execution_count": 253,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_keep"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "def Exchange_pairs(df_matched, df_unmatched):\n",
- " # t1 = time.time()\n",
- " \n",
- " df_matched_column = df_matched.columns\n",
- " df_matched.columns = df_matched_column + '1m'\n",
- " df_unmatched.columns = df_unmatched.columns.str.replace('lender_id', 'lender_id1m')\n",
- " df1 = pd.merge(df_matched, df_unmatched, on = 'lender_id1m', how = 'inner')\n",
- " \n",
- " l = df1.columns[:-6].append([df1.columns[-6:] + '1um'])\n",
- " df1.columns = l\n",
- " df_matched.columns = df_matched_column\n",
- " df2 = pd.merge(df_matched, df1, left_on = 'loan_id', right_on = 'loan_id1um', how = 'inner') \n",
- " \n",
- " ll = (df2.columns[:7]+'2m').append(df2.columns[7:])\n",
- " df2.columns = ll\n",
- " df_unmatched.columns = df_unmatched.columns.str.replace('lender_id1m', 'lender_id')\n",
- " df3 = pd.merge(df_unmatched, df2, left_on = ['lender_id','loan_id'], right_on = ['lender_id2m','loan_id1m'], how = 'inner')\n",
- " lll = (df3.columns[:7]+'2um').append(df3.columns[7:])\n",
- " df3.columns = lll\n",
- " \n",
- " df_keep = pd.DataFrame()\n",
- " for i in range(1, 6):\n",
- " name = \"value\" + str(i)\n",
- " df_keep[name] = df3[\"var\"+str(i)+\"1m\"] + df3[\"var\"+str(i)+\"2m\"] - df3[\"var\"+str(i)+\"1um\"] - df3[\"var\"+str(i)+\"2um\"]\n",
- " # t2 = time.time()\n",
- " # print(\"Running time: \", t2-t1)\n",
- " return df_keep"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_keep = Exchange_pairs(df_matched, df_unmatched)"
+ "df_sample"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 94,
"metadata": {},
"outputs": [
{
@@ -2097,6 +2177,7 @@
" | \n",
" value1 | \n",
" value2 | \n",
+ " value3 | \n",
" value4 | \n",
" value5 | \n",
" \n",
@@ -2105,35 +2186,40 @@
" \n",
" 0 | \n",
" 0.0 | \n",
- " -0.062622 | \n",
+ " -0.062592 | \n",
+ " 282140.000000 | \n",
" -7.450581e-09 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
- " -134.978088 | \n",
+ " -134.978073 | \n",
+ " -112856.000000 | \n",
" 0.000000e+00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
- " -57.051239 | \n",
+ " -57.051155 | \n",
+ " 14934.007812 | \n",
" 7.450581e-09 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
- " -11.757538 | \n",
+ " -11.757530 | \n",
+ " 13364.000000 | \n",
" 7.450581e-09 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
- " 6.960754 | \n",
+ " 6.960756 | \n",
+ " 739.999023 | \n",
" -7.450581e-09 | \n",
" 0.0 | \n",
"
\n",
@@ -2143,11 +2229,13 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
" \n",
" \n",
" 101681 | \n",
" 0.0 | \n",
- " 6.236145 | \n",
+ " 6.236197 | \n",
+ " 12312.000000 | \n",
" 0.000000e+00 | \n",
" 0.0 | \n",
"
\n",
@@ -2155,89 +2243,98 @@
" 101682 | \n",
" 0.0 | \n",
" 0.000000 | \n",
+ " 14364.000000 | \n",
" 0.000000e+00 | \n",
" 0.0 | \n",
" \n",
" \n",
" 101683 | \n",
" 0.0 | \n",
- " -162.729980 | \n",
+ " -162.729996 | \n",
+ " 20072.031250 | \n",
" 0.000000e+00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 101684 | \n",
" 0.0 | \n",
- " -1.490173 | \n",
+ " -1.490269 | \n",
+ " -3750.031250 | \n",
" 0.000000e+00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 101685 | \n",
" 0.0 | \n",
- " -1.490173 | \n",
+ " -1.490270 | \n",
+ " -3750.031250 | \n",
" 0.000000e+00 | \n",
" 0.0 | \n",
"
\n",
" \n",
"\n",
- "101686 rows × 4 columns
\n",
+ "101686 rows × 5 columns
\n",
""
],
"text/plain": [
- " value1 value2 value4 value5\n",
- "0 0.0 -0.062622 -7.450581e-09 0.0\n",
- "1 0.0 -134.978088 0.000000e+00 0.0\n",
- "2 0.0 -57.051239 7.450581e-09 0.0\n",
- "3 0.0 -11.757538 7.450581e-09 0.0\n",
- "4 0.0 6.960754 -7.450581e-09 0.0\n",
- "... ... ... ... ...\n",
- "101681 0.0 6.236145 0.000000e+00 0.0\n",
- "101682 0.0 0.000000 0.000000e+00 0.0\n",
- "101683 0.0 -162.729980 0.000000e+00 0.0\n",
- "101684 0.0 -1.490173 0.000000e+00 0.0\n",
- "101685 0.0 -1.490173 0.000000e+00 0.0\n",
+ " value1 value2 value3 value4 value5\n",
+ "0 0.0 -0.062592 282140.000000 -7.450581e-09 0.0\n",
+ "1 0.0 -134.978073 -112856.000000 0.000000e+00 0.0\n",
+ "2 0.0 -57.051155 14934.007812 7.450581e-09 0.0\n",
+ "3 0.0 -11.757530 13364.000000 7.450581e-09 0.0\n",
+ "4 0.0 6.960756 739.999023 -7.450581e-09 0.0\n",
+ "... ... ... ... ... ...\n",
+ "101681 0.0 6.236197 12312.000000 0.000000e+00 0.0\n",
+ "101682 0.0 0.000000 14364.000000 0.000000e+00 0.0\n",
+ "101683 0.0 -162.729996 20072.031250 0.000000e+00 0.0\n",
+ "101684 0.0 -1.490269 -3750.031250 0.000000e+00 0.0\n",
+ "101685 0.0 -1.490270 -3750.031250 0.000000e+00 0.0\n",
"\n",
- "[101686 rows x 4 columns]"
+ "[101686 rows x 5 columns]"
]
},
- "execution_count": 14,
+ "execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_keep.drop([\"value3\"], axis = 1, inplace = True) \n",
- "df_keep"
+ "df_exchange_pairs"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 181,
"metadata": {},
"outputs": [],
"source": [
- "def Fox_func(path1, path2, num, ratio):\n",
- "\n",
- " df_matched = pd.read_stata(path1)\n",
- " df_unmatched = pd.read_stata(path2)\n",
+ "def Fox_func(num, ratio):\n",
" \n",
+ " df_sample = pd.read_stata('sample.dta')\n",
+ " df_matched = df_sample[df_sample[\"match\"] == 1]\n",
+ " df_unmatched = df_sample[df_sample[\"match\"] == 0]\n",
+ " df_matched.drop([\"match\"], axis = 1, inplace = True) \n",
+ " df_unmatched.drop([\"match\"], axis = 1, inplace = True) \n",
" df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
" df_matched = df_matched[df_matched[\"yearapproved\"] == 2020].iloc[:,: 7]\n",
- "\n",
" df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
" df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2020].iloc[:,: 7]\n",
" \n",
" df_keep = Exchange_pairs(df_matched, df_unmatched)\n",
- " df_keep.drop([\"value2\"], axis = 1, inplace = True) \n",
+ " \n",
" df_keep.drop([\"value3\"], axis = 1, inplace = True) \n",
+ " # df_keep.drop([\"value4\"], axis = 1, inplace = True) \n",
" \n",
+ " # df_keep.drop([\"value2\"], axis = 1) \n",
+ " # df_keep.drop([\"value3\"], axis = 1) \n",
+ "\n",
" def objectfunc(beta, df = df_keep):\n",
" return -sum(df.dot(beta) >=0 )\n",
" t1 = time.time()\n",
" # bounds = [(1, 1.0000000001), (-100, 100), (-100, 100), (-100, 100)]\n",
- " # bounds = [(-1.000000001, -1), (-100, 100), (-100, 100)]\n",
- " bounds = [(-100, 100), (-100, 100), (-100, 100)]\n",
+ " # bounds = [(-1.000000001, -1), (-500, 500), (-500, 500)]\n",
+ " bounds = [(-500, 500), (-1.000000001, -1), (-500, 500), (-500, 500)]\n",
+ " # bounds = [(-500, 500), (-500, 500), (-500, 500)]\n",
" result = differential_evolution(objectfunc, bounds)\n",
" # print(result)\n",
"\n",
@@ -2253,198 +2350,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
+ "execution_count": 182,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "0\n",
- "1\n",
- "2\n",
- "3\n",
- "4\n",
- "5\n",
- "6\n",
- "7\n",
- "8\n",
- "9\n",
- "10\n",
- "11\n",
- "12\n",
- "13\n",
- "14\n",
- "15\n",
- "16\n",
- "17\n",
- "18\n",
- "19\n",
- "20\n",
- "21\n",
- "22\n",
- "23\n",
- "24\n",
- "25\n",
- "26\n",
- "27\n",
- "28\n",
- "29\n",
- "30\n",
- "31\n",
- "32\n",
- "33\n",
- "34\n",
- "35\n",
- "36\n",
- "37\n",
- "38\n",
- "39\n",
- "40\n",
- "41\n",
- "42\n",
- "43\n",
- "44\n",
- "45\n",
- "46\n",
- "47\n",
- "48\n",
- "49\n",
- "50\n",
- "51\n",
- "52\n",
- "53\n",
- "54\n",
- "55\n",
- "56\n",
- "57\n",
- "58\n",
- "59\n",
- "60\n",
- "61\n",
- "62\n",
- "63\n",
- "64\n",
- "65\n",
- "66\n",
- "67\n",
- "68\n",
- "69\n",
- "70\n",
- "71\n",
- "72\n",
- "73\n",
- "74\n",
- "75\n",
- "76\n",
- "77\n",
- "78\n",
- "79\n",
- "80\n",
- "81\n",
- "82\n",
- "83\n",
- "84\n",
- "85\n",
- "86\n",
- "87\n",
- "88\n",
- "89\n",
- "90\n",
- "91\n",
- "92\n",
- "93\n",
- "94\n",
- "95\n",
- "96\n",
- "97\n",
- "98\n",
- "99\n"
+ "0\n"
]
- }
- ],
- "source": [
- "df_result = pd.DataFrame()\n",
- "for i in range(0, 100):\n",
- " print(i)\n",
- " result = Fox_func(\"sample_matched_demean.dta\", \"sample_unmatched_demean.dta\", 100, 0.1)\n",
- " df_result = df_result.append([list(result)], ignore_index = True)\n",
- "df_result.columns = [\"Beta_1\", \"Beta_4\", \"Beta_5\", \"Number of of inequalities satisfied\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_result.to_csv(\"NY2020;ExcludeVar2Var3.csv\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 12/29"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# num is the # of random samples(or the # of simulations)\n",
- "# ratio is the random sampling ratio, which is 10% here\n",
- "def Simulation(num, ratio, df_matched, df_unmatched):\n",
- " df_result = pd.DataFrame()\n",
- " for i in range(0, num):\n",
- " # df_matched_sample = Subsample(df_matched, ratio)\n",
- " # df_unmatched_sample = Subsample(df_unmatched, ratio)\n",
- " \n",
- " loan_id = df_unmatched[\"loan_id\"].unique()\n",
- " sample_loan_id = np.random.choice(loan_id, round(loan_id.shape[0] * ratio), replace = False)\n",
- " df_unmatched_sample = df_unmatched.loc[df_unmatched[\"loan_id\"].isin(sample_loan_id)]\n",
- " df_matched_sample = df_matched.loc[df_matched[\"loan_id\"].isin(sample_loan_id)]\n",
- " \n",
- " df_keep = Exchange_pairs(df_matched_sample, df_unmatched_sample)\n",
- " bounds = [(1, 1.000000001), (-100, 100), (-100, 100), (-100, 100), (-100, 100)] # fix beta_1 = 1\n",
- " result = differential_evolution(objectfunc, bounds)\n",
- " df_result = df_result.append(pd.Series(result.x), ignore_index = True)\n",
- " # print(i)\n",
- " print(\"The 5% quantile of parameters are\")\n",
- " print(df_result.quantile(0.05))\n",
- " print(\"The 95% quantile of parameters are\")\n",
- " print(df_result.quantile(0.95))\n",
- " return df_result\n",
- "\n",
- "\n",
- "t1 = time.time()\n",
- "df_result = Simulation(10, 0.1, df_matched, df_unmatched)\n",
- "t2 = time.time()\n",
- "print(\"Simulation time: \", t2-t1)\n",
- "df_result"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 12/30"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 93,
- "metadata": {},
- "outputs": [
+ },
{
"name": "stderr",
"output_type": "stream",
@@ -2455,245 +2375,7 @@
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" errors=errors,\n"
]
- }
- ],
- "source": [
- "df_sample = pd.read_stata('sample.dta')\n",
- "df_matched = df_sample[df_sample[\"match\"] == 1]\n",
- "df_unmatched = df_sample[df_sample[\"match\"] == 0]\n",
- "\n",
- "df_matched.drop([\"match\"], axis = 1, inplace = True) \n",
- "df_unmatched.drop([\"match\"], axis = 1, inplace = True) \n",
- "df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- "df_matched = df_matched[df_matched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- "\n",
- "df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- "df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- "\n",
- "df_exchange_pairs = Exchange_pairs(df_matched, df_unmatched)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 94,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " value1 | \n",
- " value2 | \n",
- " value3 | \n",
- " value4 | \n",
- " value5 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0.0 | \n",
- " -0.062592 | \n",
- " 282140.000000 | \n",
- " -7.450581e-09 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.0 | \n",
- " -134.978073 | \n",
- " -112856.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.0 | \n",
- " -57.051155 | \n",
- " 14934.007812 | \n",
- " 7.450581e-09 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0.0 | \n",
- " -11.757530 | \n",
- " 13364.000000 | \n",
- " 7.450581e-09 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.0 | \n",
- " 6.960756 | \n",
- " 739.999023 | \n",
- " -7.450581e-09 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 101681 | \n",
- " 0.0 | \n",
- " 6.236197 | \n",
- " 12312.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 101682 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 14364.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 101683 | \n",
- " 0.0 | \n",
- " -162.729996 | \n",
- " 20072.031250 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 101684 | \n",
- " 0.0 | \n",
- " -1.490269 | \n",
- " -3750.031250 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 101685 | \n",
- " 0.0 | \n",
- " -1.490270 | \n",
- " -3750.031250 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
101686 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " value1 value2 value3 value4 value5\n",
- "0 0.0 -0.062592 282140.000000 -7.450581e-09 0.0\n",
- "1 0.0 -134.978073 -112856.000000 0.000000e+00 0.0\n",
- "2 0.0 -57.051155 14934.007812 7.450581e-09 0.0\n",
- "3 0.0 -11.757530 13364.000000 7.450581e-09 0.0\n",
- "4 0.0 6.960756 739.999023 -7.450581e-09 0.0\n",
- "... ... ... ... ... ...\n",
- "101681 0.0 6.236197 12312.000000 0.000000e+00 0.0\n",
- "101682 0.0 0.000000 14364.000000 0.000000e+00 0.0\n",
- "101683 0.0 -162.729996 20072.031250 0.000000e+00 0.0\n",
- "101684 0.0 -1.490269 -3750.031250 0.000000e+00 0.0\n",
- "101685 0.0 -1.490270 -3750.031250 0.000000e+00 0.0\n",
- "\n",
- "[101686 rows x 5 columns]"
- ]
- },
- "execution_count": 94,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_exchange_pairs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 111,
- "metadata": {},
- "outputs": [],
- "source": [
- "def Fox_func(num, ratio):\n",
- " \n",
- " df_sample = pd.read_stata('sample.dta')\n",
- " df_matched = df_sample[df_sample[\"match\"] == 1]\n",
- " df_unmatched = df_sample[df_sample[\"match\"] == 0]\n",
- " df_matched.drop([\"match\"], axis = 1, inplace = True) \n",
- " df_unmatched.drop([\"match\"], axis = 1, inplace = True) \n",
- " df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- " df_matched = df_matched[df_matched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- " df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- " df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- " \n",
- " df_keep = Exchange_pairs(df_matched, df_unmatched)\n",
- " \n",
- " df_keep.drop([\"value1\"], axis = 1, inplace = True) \n",
- " df_keep.drop([\"value3\"], axis = 1, inplace = True) \n",
- " \n",
- " # df_keep.drop([\"value2\"], axis = 1) \n",
- " # df_keep.drop([\"value3\"], axis = 1) \n",
- "\n",
- " def objectfunc(beta, df = df_keep):\n",
- " return -sum(df.dot(beta) >=0 )\n",
- " t1 = time.time()\n",
- " # bounds = [(1, 1.0000000001), (-100, 100), (-100, 100), (-100, 100)]\n",
- " bounds = [(-1.000000001, -1), (-500, 500), (-500, 500)]\n",
- " # bounds = [(-500, 500), (-500, 500), (-500, 500)]\n",
- " result = differential_evolution(objectfunc, bounds)\n",
- " # print(result)\n",
- "\n",
- " r = np.append(result.x, round(-result.fun))\n",
- " \n",
- " # t2 = time.time()\n",
- " # print(\"Differential Evolution time: \", t2 - t1)\n",
- " # print(\"The number of inequalities satisfied is\")\n",
- " # print(round(-result.fun))\n",
- " # Simulation(num, ratio, df_matched, df_unmatched)\n",
- " return r"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 112,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:4312: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " errors=errors,\n"
- ]
- },
+ },
{
"name": "stdout",
"output_type": "stream",
@@ -4483,60 +4165,88 @@
" print(i)\n",
" result = Fox_func(100, 0.1)\n",
" df_result = df_result.append([list(result)], ignore_index = True)\n",
- "df_result.columns = [\"Beta_2\", \"Beta_4\", \"Beta_5\", \"Number of of inequalities satisfied\"]"
+ "df_result.columns = [\"Beta_1\", \"Beta_2\", \"Beta_4\", \"Beta_5\", \"Number of of inequalities satisfied\"]"
]
},
{
"cell_type": "code",
- "execution_count": 113,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_result.to_csv(\"NY2021;Beta_2=-1;ExcludeVar1Var3;1230.csv\")"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 226,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Beta_1 | \n",
+ " Beta_2 | \n",
+ " Beta_4 | \n",
+ " Beta_5 | \n",
+ " Number of of inequalities satisfied | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 82 | \n",
+ " 449.525596 | \n",
+ " -1.0 | \n",
+ " -0.406714 | \n",
+ " 265.011253 | \n",
+ " 5936247.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Beta_1 Beta_2 Beta_4 Beta_5 \\\n",
+ "82 449.525596 -1.0 -0.406714 265.011253 \n",
+ "\n",
+ " Number of of inequalities satisfied \n",
+ "82 5936247.0 "
+ ]
+ },
+ "execution_count": 226,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# 1/1"
+ "# Don't use argmax or idxmax since if the maximum is achieved in multiple locations, only the first is returned.\n",
+ "num_max = df_result[\"Number of of inequalities satisfied\"].max()\n",
+ "df_result[df_result[\"Number of of inequalities satisfied\"] == num_max]"
]
},
{
"cell_type": "code",
- "execution_count": 116,
+ "execution_count": 227,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:4312: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " errors=errors,\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "df_sample = pd.read_stata('sample.dta')\n",
- "df_matched = df_sample[df_sample[\"match\"] == 1]\n",
- "df_unmatched = df_sample[df_sample[\"match\"] == 0]\n",
- "\n",
- "df_matched.drop([\"match\"], axis = 1, inplace = True) \n",
- "df_unmatched.drop([\"match\"], axis = 1, inplace = True) \n",
- "df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- "df_matched = df_matched[df_matched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- "\n",
- "df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
- "df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
- "\n"
+ "df_test = df_result\n",
+ "df_test = df_test.append(df_result[df_result[\"Number of of inequalities satisfied\"] == num_max])\n",
+ "df_test = df_test.append(df_result.mean(axis = 0).rename(\"mean\"))"
]
},
{
"cell_type": "code",
- "execution_count": 118,
+ "execution_count": 228,
"metadata": {},
"outputs": [
{
@@ -4560,89 +4270,53 @@
" \n",
" \n",
" | \n",
- " lender_id | \n",
- " loan_id | \n",
- " match | \n",
- " var1 | \n",
- " var2 | \n",
- " var3 | \n",
- " var4 | \n",
- " var5 | \n",
- " USPS_ZIP_PREF_CITY | \n",
- " USPS_ZIP_PREF_STATE | \n",
- " yearapproved | \n",
+ " Beta_1 | \n",
+ " Beta_2 | \n",
+ " Beta_4 | \n",
+ " Beta_5 | \n",
+ " Number of of inequalities satisfied | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 1339.0 | \n",
- " 27917.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 17.322744 | \n",
- " 376543.375000 | \n",
- " -0.024180 | \n",
- " 0.045413 | \n",
- " AGAWAM | \n",
- " MA | \n",
- " 2020.0 | \n",
+ " 450.221994 | \n",
+ " -1.0 | \n",
+ " -2.630644 | \n",
+ " 121.118366 | \n",
+ " 5926412.0 | \n",
"
\n",
" \n",
" 1 | \n",
- " 1339.0 | \n",
- " 97252.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 17.322744 | \n",
- " 399935.531250 | \n",
- " 0.091452 | \n",
- " 0.045413 | \n",
- " AGAWAM | \n",
- " MA | \n",
- " 2020.0 | \n",
+ " 295.447559 | \n",
+ " -1.0 | \n",
+ " -1.718904 | \n",
+ " 143.286266 | \n",
+ " 5930628.0 | \n",
"
\n",
" \n",
" 2 | \n",
- " 1339.0 | \n",
- " 78177.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 17.322744 | \n",
- " 376543.375000 | \n",
- " 0.091452 | \n",
- " -0.070219 | \n",
- " AGAWAM | \n",
- " MA | \n",
- " 2020.0 | \n",
+ " 368.195846 | \n",
+ " -1.0 | \n",
+ " -1.472396 | \n",
+ " 262.935498 | \n",
+ " 5935122.0 | \n",
"
\n",
" \n",
" 3 | \n",
- " 3402.0 | \n",
- " 78177.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 10.350215 | \n",
- " 379006.218750 | \n",
- " 0.091452 | \n",
- " -0.070219 | \n",
- " AGAWAM | \n",
- " MA | \n",
- " 2020.0 | \n",
+ " 353.166882 | \n",
+ " -1.0 | \n",
+ " -2.383213 | \n",
+ " 333.106743 | \n",
+ " 5929836.0 | \n",
"
\n",
" \n",
" 4 | \n",
- " 3402.0 | \n",
- " 27917.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 10.350215 | \n",
- " 379006.218750 | \n",
- " -0.024180 | \n",
- " 0.045413 | \n",
- " AGAWAM | \n",
- " MA | \n",
- " 2020.0 | \n",
+ " 443.246448 | \n",
+ " -1.0 | \n",
+ " -0.538437 | \n",
+ " 461.061858 | \n",
+ " 5923845.0 | \n",
"
\n",
" \n",
" ... | \n",
@@ -4651,130 +4325,210 @@
" ... | \n",
" ... | \n",
" ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
"
\n",
" \n",
- " 4117067 | \n",
- " 1631.0 | \n",
- " 52924.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 255598.812500 | \n",
- " -0.139812 | \n",
- " 0.045413 | \n",
- " KETCHIKAN | \n",
- " AK | \n",
- " 2020.0 | \n",
+ " 97 | \n",
+ " 286.969504 | \n",
+ " -1.0 | \n",
+ " -3.166268 | \n",
+ " 277.449091 | \n",
+ " 5930284.0 | \n",
"
\n",
" \n",
- " 4117068 | \n",
- " 3923.0 | \n",
- " 34122.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " -32325.736328 | \n",
- " 1.069300 | \n",
- " -0.347326 | \n",
- " CRAIG | \n",
- " AK | \n",
- " 2020.0 | \n",
+ " 98 | \n",
+ " 285.237308 | \n",
+ " -1.0 | \n",
+ " -0.521603 | \n",
+ " 151.499033 | \n",
+ " 5931376.0 | \n",
"
\n",
" \n",
- " 4117069 | \n",
- " 1631.0 | \n",
- " 34122.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 65.689995 | \n",
- " 163466.203125 | \n",
- " -0.139812 | \n",
- " 0.045413 | \n",
- " CRAIG | \n",
- " AK | \n",
- " 2020.0 | \n",
+ " 99 | \n",
+ " 303.845445 | \n",
+ " -1.0 | \n",
+ " -2.425117 | \n",
+ " 299.887522 | \n",
+ " 5927658.0 | \n",
"
\n",
" \n",
- " 4117070 | \n",
- " 1631.0 | \n",
- " 34127.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 61.713989 | \n",
- " 370764.562500 | \n",
- " -0.139812 | \n",
- " 0.045413 | \n",
- " WRANGELL | \n",
- " AK | \n",
- " 2020.0 | \n",
+ " 82 | \n",
+ " 449.525596 | \n",
+ " -1.0 | \n",
+ " -0.406714 | \n",
+ " 265.011253 | \n",
+ " 5936247.0 | \n",
"
\n",
" \n",
- " 4117071 | \n",
- " 1631.0 | \n",
- " 34131.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 61.713989 | \n",
- " 439864.031250 | \n",
- " -0.139812 | \n",
- " -0.070219 | \n",
- " WRANGELL | \n",
- " AK | \n",
- " 2020.0 | \n",
+ " mean | \n",
+ " 381.182240 | \n",
+ " -1.0 | \n",
+ " -3.194430 | \n",
+ " 262.139217 | \n",
+ " 5928906.2 | \n",
"
\n",
" \n",
"\n",
- "4117072 rows × 11 columns
\n",
+ "102 rows × 5 columns
\n",
""
],
"text/plain": [
- " lender_id loan_id match var1 var2 var3 var4 \\\n",
- "0 1339.0 27917.0 0.0 0.0 17.322744 376543.375000 -0.024180 \n",
- "1 1339.0 97252.0 0.0 0.0 17.322744 399935.531250 0.091452 \n",
- "2 1339.0 78177.0 0.0 0.0 17.322744 376543.375000 0.091452 \n",
- "3 3402.0 78177.0 0.0 0.0 10.350215 379006.218750 0.091452 \n",
- "4 3402.0 27917.0 0.0 0.0 10.350215 379006.218750 -0.024180 \n",
- "... ... ... ... ... ... ... ... \n",
- "4117067 1631.0 52924.0 0.0 0.0 0.000000 255598.812500 -0.139812 \n",
- "4117068 3923.0 34122.0 0.0 0.0 0.000000 -32325.736328 1.069300 \n",
- "4117069 1631.0 34122.0 1.0 0.0 65.689995 163466.203125 -0.139812 \n",
- "4117070 1631.0 34127.0 1.0 0.0 61.713989 370764.562500 -0.139812 \n",
- "4117071 1631.0 34131.0 1.0 0.0 61.713989 439864.031250 -0.139812 \n",
+ " Beta_1 Beta_2 Beta_4 Beta_5 \\\n",
+ "0 450.221994 -1.0 -2.630644 121.118366 \n",
+ "1 295.447559 -1.0 -1.718904 143.286266 \n",
+ "2 368.195846 -1.0 -1.472396 262.935498 \n",
+ "3 353.166882 -1.0 -2.383213 333.106743 \n",
+ "4 443.246448 -1.0 -0.538437 461.061858 \n",
+ "... ... ... ... ... \n",
+ "97 286.969504 -1.0 -3.166268 277.449091 \n",
+ "98 285.237308 -1.0 -0.521603 151.499033 \n",
+ "99 303.845445 -1.0 -2.425117 299.887522 \n",
+ "82 449.525596 -1.0 -0.406714 265.011253 \n",
+ "mean 381.182240 -1.0 -3.194430 262.139217 \n",
"\n",
- " var5 USPS_ZIP_PREF_CITY USPS_ZIP_PREF_STATE yearapproved \n",
- "0 0.045413 AGAWAM MA 2020.0 \n",
- "1 0.045413 AGAWAM MA 2020.0 \n",
- "2 -0.070219 AGAWAM MA 2020.0 \n",
- "3 -0.070219 AGAWAM MA 2020.0 \n",
- "4 0.045413 AGAWAM MA 2020.0 \n",
- "... ... ... ... ... \n",
- "4117067 0.045413 KETCHIKAN AK 2020.0 \n",
- "4117068 -0.347326 CRAIG AK 2020.0 \n",
- "4117069 0.045413 CRAIG AK 2020.0 \n",
- "4117070 0.045413 WRANGELL AK 2020.0 \n",
- "4117071 -0.070219 WRANGELL AK 2020.0 \n",
+ " Number of of inequalities satisfied \n",
+ "0 5926412.0 \n",
+ "1 5930628.0 \n",
+ "2 5935122.0 \n",
+ "3 5929836.0 \n",
+ "4 5923845.0 \n",
+ "... ... \n",
+ "97 5930284.0 \n",
+ "98 5931376.0 \n",
+ "99 5927658.0 \n",
+ "82 5936247.0 \n",
+ "mean 5928906.2 \n",
"\n",
- "[4117072 rows x 11 columns]"
+ "[102 rows x 5 columns]"
]
},
- "execution_count": 118,
+ "execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_sample"
+ "df_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 183,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_result.to_csv(\"NY2020;Beta_2=-1;ExcludeVar3;1230.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Not defining Fox_func"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_result = pd.DataFrame()\n",
+ "for i in range(0, 100):\n",
+ " print(i)\n",
+ " df_sample = pd.read_stata('sample.dta')\n",
+ " df_matched = df_sample[df_sample[\"match\"] == 1]\n",
+ " df_unmatched = df_sample[df_sample[\"match\"] == 0]\n",
+ " df_matched.drop([\"match\"], axis = 1, inplace = True) \n",
+ " df_unmatched.drop([\"match\"], axis = 1, inplace = True) \n",
+ " df_matched = df_matched[df_matched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
+ " df_matched = df_matched[df_matched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
+ " df_unmatched = df_unmatched[df_unmatched[\"USPS_ZIP_PREF_STATE\"] == \"NY\"]\n",
+ " df_unmatched = df_unmatched[df_unmatched[\"yearapproved\"] == 2021].iloc[:,: 7]\n",
+ " \n",
+ " df_keep = Exchange_pairs(df_matched, df_unmatched)\n",
+ " \n",
+ " df_keep.drop([\"value1\"], axis = 1, inplace = True) \n",
+ " df_keep.drop([\"value3\"], axis = 1, inplace = True) \n",
+ " \n",
+ " # df_keep.drop([\"value2\"], axis = 1) \n",
+ " # df_keep.drop([\"value3\"], axis = 1) \n",
+ "\n",
+ " def objectfunc(beta, df = df_keep):\n",
+ " return -sum(df.dot(beta) >=0 )\n",
+ " t1 = time.time()\n",
+ " # bounds = [(1, 1.0000000001), (-100, 100), (-100, 100), (-100, 100)]\n",
+ " bounds = [(-1.000000001, -1), (-500, 500), (-500, 500)]\n",
+ " # bounds = [(-500, 500), (-500, 500), (-500, 500)]\n",
+ " result = differential_evolution(objectfunc, bounds)\n",
+ " # print(result)\n",
+ "\n",
+ " r = np.append(result.x, round(-result.fun))\n",
+ " \n",
+ " \n",
+ " \n",
+ " result = Fox_func(100, 0.1)\n",
+ " df_result = df_result.append([list(r)], ignore_index = True)\n",
+ "df_result.columns = [\"Beta_2\", \"Beta_4\", \"Beta_5\", \"Number of of inequalities satisfied\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_result.to_csv(\"NY2021;Beta_2=-1;ExcludeVar1Var3;1230.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Demean"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "df_w1\n",
+ "- var1 = Relationship_Dum(demean)\n",
+ "- var2 = mi_to_zcta5(demean)\n",
+ "- var3 = FinTechIndicator(demean) * rating_avg(demean)\n",
+ "- var4 = FinTechIndicator(demean) * minority_yelp(demean)\n",
+ "\n",
+ "\n",
+ "df_w2\n",
+ "- var1 = Relationship_Dum\n",
+ "- var2 = mi_to_zcta5\n",
+ "- var3 = FinTechIndicator(demean) * rating_avg(demean)\n",
+ "- var4 = FinTechIndicator(demean) * minority_yelp(demean)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 250,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_org = pd.read_stata('sample_org.dta')"
]
},
{
"cell_type": "code",
- "execution_count": 117,
+ "execution_count": 251,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_org['var1'] = df_org['Relationship_Dum'] - df_org['Relationship_Dum'].mean(axis = 0)\n",
+ "df_org['var2'] = df_org['mi_to_zcta5'] - df_org['mi_to_zcta5'].mean(axis = 0)\n",
+ "df_org['var3'] = (df_org['FinTechIndicator'] - df_org['FinTechIndicator'].mean(axis = 0)) * (df_org['rating_avg'] - df_org['rating_avg'].mean(axis = 0))\n",
+ "df_org['var4'] = (df_org['FinTechIndicator'] - df_org['FinTechIndicator'].mean(axis = 0)) * (df_org['minority_yelp'] - df_org['minority_yelp'].mean(axis = 0))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 252,
"metadata": {},
"outputs": [
{
@@ -4800,62 +4554,110 @@
" | \n",
" lender_id | \n",
" loan_id | \n",
+ " rating_avg | \n",
+ " minority_yelp | \n",
+ " USPS_ZIP_PREF_CITY | \n",
+ " USPS_ZIP_PREF_STATE | \n",
+ " FinTechIndicator | \n",
+ " Relationship_Dum | \n",
+ " match | \n",
+ " mi_to_zcta5 | \n",
+ " yearapproved | \n",
" var1 | \n",
" var2 | \n",
" var3 | \n",
" var4 | \n",
- " var5 | \n",
" \n",
" \n",
" \n",
" \n",
- " 159043 | \n",
- " 3113.0 | \n",
- " 61055.0 | \n",
+ " 0 | \n",
+ " 1339.0 | \n",
+ " 27917.0 | \n",
+ " 4.0 | \n",
" 0.0 | \n",
- " 15.709789 | \n",
- " -9.689972e+03 | \n",
- " -0.096758 | \n",
- " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 17.322745 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -543.263339 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
"
\n",
" \n",
- " 167398 | \n",
- " 1224.0 | \n",
- " 26566.0 | \n",
+ " 1 | \n",
+ " 1339.0 | \n",
+ " 97252.0 | \n",
+ " 3.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
- " 9.749870e+04 | \n",
- " -1.583804 | \n",
- " 0.537042 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 17.322745 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -543.263339 | \n",
+ " 0.091452 | \n",
+ " 0.045413 | \n",
"
\n",
" \n",
- " 171449 | \n",
- " 1239.0 | \n",
- " 28637.0 | \n",
+ " 2 | \n",
+ " 1339.0 | \n",
+ " 78177.0 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 0 | \n",
" 0.0 | \n",
- " 90.848930 | \n",
- " -1.001105e+06 | \n",
- " 0.004728 | \n",
+ " 0.0 | \n",
+ " 17.322745 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -543.263339 | \n",
+ " 0.091452 | \n",
" -0.070219 | \n",
"
\n",
" \n",
- " 173833 | \n",
- " 2572.0 | \n",
- " 47469.0 | \n",
+ " 3 | \n",
+ " 3402.0 | \n",
+ " 27917.0 | \n",
+ " 4.0 | \n",
" 0.0 | \n",
- " 474.592255 | \n",
- " -3.921419e+06 | \n",
- " 0.004728 | \n",
- " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 10.350215 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -550.235868 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
"
\n",
" \n",
- " 186512 | \n",
- " 2572.0 | \n",
- " 48791.0 | \n",
+ " 4 | \n",
+ " 3402.0 | \n",
+ " 78177.0 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 0 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
- " 474.473328 | \n",
- " -1.972524e+06 | \n",
- " -0.001054 | \n",
+ " 10.350215 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -550.235868 | \n",
+ " 0.091452 | \n",
" -0.070219 | \n",
"
\n",
" \n",
@@ -4867,100 +4669,205 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " 599173 | \n",
- " 2113.0 | \n",
- " 40865.0 | \n",
+ " 4117067 | \n",
+ " 1631.0 | \n",
+ " 34117.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " KETCHIKAN | \n",
+ " AK | \n",
+ " 0 | \n",
" 0.0 | \n",
- " 22.602900 | \n",
- " 2.241549e+05 | \n",
+ " 1.0 | \n",
+ " 0.000000 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -560.586083 | \n",
" -0.024180 | \n",
" 0.045413 | \n",
"
\n",
" \n",
- " 599180 | \n",
- " 4715.0 | \n",
- " 83066.0 | \n",
+ " 4117068 | \n",
+ " 3923.0 | \n",
+ " 34122.0 | \n",
+ " 5.0 | \n",
" 0.0 | \n",
- " 10.844549 | \n",
- " 4.477320e+05 | \n",
- " -0.139812 | \n",
- " 0.045413 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -560.586083 | \n",
+ " 1.069299 | \n",
+ " -0.347326 | \n",
"
\n",
" \n",
- " 599195 | \n",
- " 821.0 | \n",
- " 19509.0 | \n",
+ " 4117069 | \n",
+ " 1631.0 | \n",
+ " 34122.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 0 | \n",
" 0.0 | \n",
- " 29.061094 | \n",
- " -4.331456e+04 | \n",
+ " 1.0 | \n",
+ " 65.689996 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -494.896087 | \n",
" -0.139812 | \n",
" 0.045413 | \n",
"
\n",
" \n",
- " 599246 | \n",
- " 821.0 | \n",
- " 19523.0 | \n",
+ " 4117070 | \n",
+ " 1631.0 | \n",
+ " 34131.0 | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 0 | \n",
" 0.0 | \n",
- " 22.441452 | \n",
- " 3.891473e+05 | \n",
+ " 1.0 | \n",
+ " 61.713988 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -498.872095 | \n",
" -0.139812 | \n",
- " 0.045413 | \n",
+ " -0.070219 | \n",
"
\n",
" \n",
- " 599271 | \n",
- " 4706.0 | \n",
- " 83032.0 | \n",
+ " 4117071 | \n",
+ " 1631.0 | \n",
+ " 34127.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 0 | \n",
" 0.0 | \n",
- " 13.504504 | \n",
- " 2.844143e+05 | \n",
+ " 1.0 | \n",
+ " 61.713988 | \n",
+ " 2020.0 | \n",
+ " -0.001122 | \n",
+ " -498.872095 | \n",
" -0.139812 | \n",
" 0.045413 | \n",
"
\n",
" \n",
"\n",
- "574 rows × 7 columns
\n",
+ "4117072 rows × 15 columns
\n",
""
],
"text/plain": [
- " lender_id loan_id var1 var2 var3 var4 var5\n",
- "159043 3113.0 61055.0 0.0 15.709789 -9.689972e+03 -0.096758 -0.070219\n",
- "167398 1224.0 26566.0 0.0 0.000000 9.749870e+04 -1.583804 0.537042\n",
- "171449 1239.0 28637.0 0.0 90.848930 -1.001105e+06 0.004728 -0.070219\n",
- "173833 2572.0 47469.0 0.0 474.592255 -3.921419e+06 0.004728 -0.070219\n",
- "186512 2572.0 48791.0 0.0 474.473328 -1.972524e+06 -0.001054 -0.070219\n",
- "... ... ... ... ... ... ... ...\n",
- "599173 2113.0 40865.0 0.0 22.602900 2.241549e+05 -0.024180 0.045413\n",
- "599180 4715.0 83066.0 0.0 10.844549 4.477320e+05 -0.139812 0.045413\n",
- "599195 821.0 19509.0 0.0 29.061094 -4.331456e+04 -0.139812 0.045413\n",
- "599246 821.0 19523.0 0.0 22.441452 3.891473e+05 -0.139812 0.045413\n",
- "599271 4706.0 83032.0 0.0 13.504504 2.844143e+05 -0.139812 0.045413\n",
+ " lender_id loan_id rating_avg minority_yelp USPS_ZIP_PREF_CITY \\\n",
+ "0 1339.0 27917.0 4.0 0.0 AGAWAM \n",
+ "1 1339.0 97252.0 3.0 0.0 AGAWAM \n",
+ "2 1339.0 78177.0 3.0 1.0 AGAWAM \n",
+ "3 3402.0 27917.0 4.0 0.0 AGAWAM \n",
+ "4 3402.0 78177.0 3.0 1.0 AGAWAM \n",
+ "... ... ... ... ... ... \n",
+ "4117067 1631.0 34117.0 4.0 0.0 KETCHIKAN \n",
+ "4117068 3923.0 34122.0 5.0 0.0 CRAIG \n",
+ "4117069 1631.0 34122.0 5.0 0.0 CRAIG \n",
+ "4117070 1631.0 34131.0 5.0 1.0 WRANGELL \n",
+ "4117071 1631.0 34127.0 5.0 0.0 WRANGELL \n",
+ "\n",
+ " USPS_ZIP_PREF_STATE FinTechIndicator Relationship_Dum match \\\n",
+ "0 MA 0 0.0 0.0 \n",
+ "1 MA 0 0.0 0.0 \n",
+ "2 MA 0 0.0 0.0 \n",
+ "3 MA 0 0.0 0.0 \n",
+ "4 MA 0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "4117067 AK 0 0.0 1.0 \n",
+ "4117068 AK 1 0.0 0.0 \n",
+ "4117069 AK 0 0.0 1.0 \n",
+ "4117070 AK 0 0.0 1.0 \n",
+ "4117071 AK 0 0.0 1.0 \n",
"\n",
- "[574 rows x 7 columns]"
+ " mi_to_zcta5 yearapproved var1 var2 var3 var4 \n",
+ "0 17.322745 2020.0 -0.001122 -543.263339 -0.024180 0.045413 \n",
+ "1 17.322745 2020.0 -0.001122 -543.263339 0.091452 0.045413 \n",
+ "2 17.322745 2020.0 -0.001122 -543.263339 0.091452 -0.070219 \n",
+ "3 10.350215 2020.0 -0.001122 -550.235868 -0.024180 0.045413 \n",
+ "4 10.350215 2020.0 -0.001122 -550.235868 0.091452 -0.070219 \n",
+ "... ... ... ... ... ... ... \n",
+ "4117067 0.000000 2020.0 -0.001122 -560.586083 -0.024180 0.045413 \n",
+ "4117068 0.000000 2020.0 -0.001122 -560.586083 1.069299 -0.347326 \n",
+ "4117069 65.689996 2020.0 -0.001122 -494.896087 -0.139812 0.045413 \n",
+ "4117070 61.713988 2020.0 -0.001122 -498.872095 -0.139812 -0.070219 \n",
+ "4117071 61.713988 2020.0 -0.001122 -498.872095 -0.139812 0.045413 \n",
+ "\n",
+ "[4117072 rows x 15 columns]"
]
},
- "execution_count": 117,
+ "execution_count": 252,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_matched"
+ "df_org"
]
},
{
"cell_type": "code",
- "execution_count": 119,
+ "execution_count": 351,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:1597: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " self.obj[key] = value\n",
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:1676: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " self._setitem_single_column(ilocs[0], value, pi)\n",
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " self._setitem_single_column(loc, value[:, i].tolist(), pi)\n"
+ ]
+ }
+ ],
"source": [
- "df_exchange_pairs = Exchange_pairs(df_matched, df_unmatched)"
+ "df_w1 = pd.DataFrame()\n",
+ "df_w1 = df_org[['lender_id', 'loan_id', 'match']]\n",
+ "df_w1.loc[:, 'var1'] = df_org['var1'].values\n",
+ "df_w1.loc[:, 'var2'] = df_org['var2'].values\n",
+ "df_w1.loc[:, 'var3'] = df_org['var3'].values\n",
+ "df_w1.loc[:, 'var4'] = df_org['var4'].values\n",
+ "df_w1.loc[:, ('USPS_ZIP_PREF_CITY', 'USPS_ZIP_PREF_STATE', 'yearapproved')] = df_org[['USPS_ZIP_PREF_CITY', 'USPS_ZIP_PREF_STATE', 'yearapproved']].values"
]
},
{
"cell_type": "code",
- "execution_count": 120,
+ "execution_count": 352,
"metadata": {},
"outputs": [
{
@@ -4984,53 +4891,83 @@
" \n",
" \n",
" | \n",
- " value1 | \n",
- " value2 | \n",
- " value3 | \n",
- " value4 | \n",
- " value5 | \n",
+ " lender_id | \n",
+ " loan_id | \n",
+ " match | \n",
+ " var1 | \n",
+ " var2 | \n",
+ " var3 | \n",
+ " var4 | \n",
+ " USPS_ZIP_PREF_CITY | \n",
+ " USPS_ZIP_PREF_STATE | \n",
+ " yearapproved | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
+ " 1339.0 | \n",
+ " 27917.0 | \n",
" 0.0 | \n",
- " -0.062592 | \n",
- " 282140.000000 | \n",
- " -7.450581e-09 | \n",
- " 0.0 | \n",
+ " -0.001122 | \n",
+ " -543.263339 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 1 | \n",
+ " 1339.0 | \n",
+ " 97252.0 | \n",
" 0.0 | \n",
- " -134.978073 | \n",
- " -112856.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
+ " -0.001122 | \n",
+ " -543.263339 | \n",
+ " 0.091452 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 2 | \n",
+ " 1339.0 | \n",
+ " 78177.0 | \n",
" 0.0 | \n",
- " -57.051155 | \n",
- " 14934.007812 | \n",
- " 7.450581e-09 | \n",
- " 0.0 | \n",
+ " -0.001122 | \n",
+ " -543.263339 | \n",
+ " 0.091452 | \n",
+ " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 3 | \n",
+ " 3402.0 | \n",
+ " 27917.0 | \n",
" 0.0 | \n",
- " -11.757530 | \n",
- " 13364.000000 | \n",
- " 7.450581e-09 | \n",
- " 0.0 | \n",
+ " -0.001122 | \n",
+ " -550.235868 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" 4 | \n",
+ " 3402.0 | \n",
+ " 78177.0 | \n",
" 0.0 | \n",
- " 6.960756 | \n",
- " 739.999023 | \n",
- " -7.450581e-09 | \n",
- " 0.0 | \n",
+ " -0.001122 | \n",
+ " -550.235868 | \n",
+ " 0.091452 | \n",
+ " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" ... | \n",
@@ -5039,540 +4976,164 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " 101681 | \n",
- " 0.0 | \n",
- " 6.236197 | \n",
- " 12312.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 101682 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 14364.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
+ " 4117067 | \n",
+ " 1631.0 | \n",
+ " 34117.0 | \n",
+ " 1.0 | \n",
+ " -0.001122 | \n",
+ " -560.586083 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " KETCHIKAN | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101683 | \n",
- " 0.0 | \n",
- " -162.729996 | \n",
- " 20072.031250 | \n",
- " 0.000000e+00 | \n",
+ " 4117068 | \n",
+ " 3923.0 | \n",
+ " 34122.0 | \n",
" 0.0 | \n",
+ " -0.001122 | \n",
+ " -560.586083 | \n",
+ " 1.069299 | \n",
+ " -0.347326 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101684 | \n",
- " 0.0 | \n",
- " -1.490269 | \n",
- " -3750.031250 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 101685 | \n",
- " 0.0 | \n",
- " -1.490270 | \n",
- " -3750.031250 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- "\n",
- "101686 rows × 5 columns
\n",
- ""
- ],
- "text/plain": [
- " value1 value2 value3 value4 value5\n",
- "0 0.0 -0.062592 282140.000000 -7.450581e-09 0.0\n",
- "1 0.0 -134.978073 -112856.000000 0.000000e+00 0.0\n",
- "2 0.0 -57.051155 14934.007812 7.450581e-09 0.0\n",
- "3 0.0 -11.757530 13364.000000 7.450581e-09 0.0\n",
- "4 0.0 6.960756 739.999023 -7.450581e-09 0.0\n",
- "... ... ... ... ... ...\n",
- "101681 0.0 6.236197 12312.000000 0.000000e+00 0.0\n",
- "101682 0.0 0.000000 14364.000000 0.000000e+00 0.0\n",
- "101683 0.0 -162.729996 20072.031250 0.000000e+00 0.0\n",
- "101684 0.0 -1.490269 -3750.031250 0.000000e+00 0.0\n",
- "101685 0.0 -1.490270 -3750.031250 0.000000e+00 0.0\n",
- "\n",
- "[101686 rows x 5 columns]"
- ]
- },
- "execution_count": 120,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_exchange_pairs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 125,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " lender_id | \n",
- " loan_id | \n",
- " var1 | \n",
- " var2 | \n",
- " var3 | \n",
- " var4 | \n",
- " var5 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 159043 | \n",
- " 3113.0 | \n",
- " 61055.0 | \n",
- " 0.0 | \n",
- " 15.709789 | \n",
- " -9.689972e+03 | \n",
- " -0.096758 | \n",
- " -0.070219 | \n",
- "
\n",
- " \n",
- " 167398 | \n",
- " 1224.0 | \n",
- " 26566.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 9.749870e+04 | \n",
- " -1.583804 | \n",
- " 0.537042 | \n",
- "
\n",
- " \n",
- " 171449 | \n",
- " 1239.0 | \n",
- " 28637.0 | \n",
- " 0.0 | \n",
- " 90.848930 | \n",
- " -1.001105e+06 | \n",
- " 0.004728 | \n",
- " -0.070219 | \n",
- "
\n",
- " \n",
- " 173833 | \n",
- " 2572.0 | \n",
- " 47469.0 | \n",
- " 0.0 | \n",
- " 474.592255 | \n",
- " -3.921419e+06 | \n",
- " 0.004728 | \n",
- " -0.070219 | \n",
- "
\n",
- " \n",
- " 186512 | \n",
- " 2572.0 | \n",
- " 48791.0 | \n",
- " 0.0 | \n",
- " 474.473328 | \n",
- " -1.972524e+06 | \n",
- " -0.001054 | \n",
- " -0.070219 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 599173 | \n",
- " 2113.0 | \n",
- " 40865.0 | \n",
- " 0.0 | \n",
- " 22.602900 | \n",
- " 2.241549e+05 | \n",
- " -0.024180 | \n",
- " 0.045413 | \n",
- "
\n",
- " \n",
- " 599180 | \n",
- " 4715.0 | \n",
- " 83066.0 | \n",
- " 0.0 | \n",
- " 10.844549 | \n",
- " 4.477320e+05 | \n",
- " -0.139812 | \n",
- " 0.045413 | \n",
- "
\n",
- " \n",
- " 599195 | \n",
- " 821.0 | \n",
- " 19509.0 | \n",
- " 0.0 | \n",
- " 29.061094 | \n",
- " -4.331456e+04 | \n",
+ " 4117069 | \n",
+ " 1631.0 | \n",
+ " 34122.0 | \n",
+ " 1.0 | \n",
+ " -0.001122 | \n",
+ " -494.896087 | \n",
" -0.139812 | \n",
" 0.045413 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 599246 | \n",
- " 821.0 | \n",
- " 19523.0 | \n",
- " 0.0 | \n",
- " 22.441452 | \n",
- " 3.891473e+05 | \n",
+ " 4117070 | \n",
+ " 1631.0 | \n",
+ " 34131.0 | \n",
+ " 1.0 | \n",
+ " -0.001122 | \n",
+ " -498.872095 | \n",
" -0.139812 | \n",
- " 0.045413 | \n",
+ " -0.070219 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 599271 | \n",
- " 4706.0 | \n",
- " 83032.0 | \n",
- " 0.0 | \n",
- " 13.504504 | \n",
- " 2.844143e+05 | \n",
+ " 4117071 | \n",
+ " 1631.0 | \n",
+ " 34127.0 | \n",
+ " 1.0 | \n",
+ " -0.001122 | \n",
+ " -498.872095 | \n",
" -0.139812 | \n",
" 0.045413 | \n",
- "
\n",
- " \n",
- "
\n",
- "
574 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " lender_id loan_id var1 var2 var3 var4 var5\n",
- "159043 3113.0 61055.0 0.0 15.709789 -9.689972e+03 -0.096758 -0.070219\n",
- "167398 1224.0 26566.0 0.0 0.000000 9.749870e+04 -1.583804 0.537042\n",
- "171449 1239.0 28637.0 0.0 90.848930 -1.001105e+06 0.004728 -0.070219\n",
- "173833 2572.0 47469.0 0.0 474.592255 -3.921419e+06 0.004728 -0.070219\n",
- "186512 2572.0 48791.0 0.0 474.473328 -1.972524e+06 -0.001054 -0.070219\n",
- "... ... ... ... ... ... ... ...\n",
- "599173 2113.0 40865.0 0.0 22.602900 2.241549e+05 -0.024180 0.045413\n",
- "599180 4715.0 83066.0 0.0 10.844549 4.477320e+05 -0.139812 0.045413\n",
- "599195 821.0 19509.0 0.0 29.061094 -4.331456e+04 -0.139812 0.045413\n",
- "599246 821.0 19523.0 0.0 22.441452 3.891473e+05 -0.139812 0.045413\n",
- "599271 4706.0 83032.0 0.0 13.504504 2.844143e+05 -0.139812 0.045413\n",
- "\n",
- "[574 rows x 7 columns]"
- ]
- },
- "execution_count": 125,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_matched"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 121,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'\\ndf_keep = pd.DataFrame()\\nfor i in range(1, 6):\\n name = \"value\" + str(i)\\n df_keep[name] = df3[\"var\"+str(i)+\"1m\"] + df3[\"var\"+str(i)+\"2m\"] - df3[\"var\"+str(i)+\"1um\"] - df3[\"var\"+str(i)+\"2um\"]\\n # t2 = time.time()\\n # print(\"Running time: \", t2-t1)\\n'"
- ]
- },
- "execution_count": 121,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_matched_column = df_matched.columns\n",
- "df_matched.columns = df_matched_column + '1m'\n",
- "df_unmatched.columns = df_unmatched.columns.str.replace('lender_id', 'lender_id1m')\n",
- "df1 = pd.merge(df_matched, df_unmatched, on = 'lender_id1m', how = 'inner')\n",
- " \n",
- "l = df1.columns[:-6].append([df1.columns[-6:] + '1um'])\n",
- "df1.columns = l\n",
- "df_matched.columns = df_matched_column\n",
- "df2 = pd.merge(df_matched, df1, left_on = 'loan_id', right_on = 'loan_id1um', how = 'inner') \n",
- "\n",
- "ll = (df2.columns[:7]+'2m').append(df2.columns[7:])\n",
- "df2.columns = ll\n",
- "df_unmatched.columns = df_unmatched.columns.str.replace('lender_id1m', 'lender_id')\n",
- "df3 = pd.merge(df_unmatched, df2, left_on = ['lender_id','loan_id'], right_on = ['lender_id2m','loan_id1m'], how = 'inner')\n",
- "lll = (df3.columns[:7]+'2um').append(df3.columns[7:])\n",
- "df3.columns = lll\n",
- "\n",
- "'''\n",
- "df_keep = pd.DataFrame()\n",
- "for i in range(1, 6):\n",
- " name = \"value\" + str(i)\n",
- " df_keep[name] = df3[\"var\"+str(i)+\"1m\"] + df3[\"var\"+str(i)+\"2m\"] - df3[\"var\"+str(i)+\"1um\"] - df3[\"var\"+str(i)+\"2um\"]\n",
- " # t2 = time.time()\n",
- " # print(\"Running time: \", t2-t1)\n",
- "'''"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 124,
- "metadata": {},
- "outputs": [],
- "source": [
- "df3.loc[0:100].to_csv(\"df3.csv\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 以下的部分,把var4的计算过程中的每一项列出观察"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 135,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 -0.096758\n",
- "1 -0.096758\n",
- "2 -0.096758\n",
- "3 -0.096758\n",
- "4 -0.096758\n",
- " ... \n",
- "101681 -0.139812\n",
- "101682 -0.139812\n",
- "101683 -0.139812\n",
- "101684 -0.139812\n",
- "101685 -0.139812\n",
- "Name: var41m, Length: 101686, dtype: float32"
- ]
- },
- "execution_count": 135,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df3[\"var41m\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 132,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 -0.088420\n",
- "1 -0.105122\n",
- "2 -0.139812\n",
- "3 -0.057218\n",
- "4 -0.128249\n",
- " ... \n",
- "101681 -0.139812\n",
- "101682 -0.139812\n",
- "101683 -0.139812\n",
- "101684 -0.139812\n",
- "101685 -0.139812\n",
- "Name: var42m, Length: 101686, dtype: float32"
- ]
- },
- "execution_count": 132,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df3[\"var42m\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 133,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 -0.088420\n",
- "1 -0.105122\n",
- "2 -0.139812\n",
- "3 -0.057218\n",
- "4 -0.128249\n",
- " ... \n",
- "101681 -0.139812\n",
- "101682 -0.139812\n",
- "101683 -0.139812\n",
- "101684 -0.139812\n",
- "101685 -0.139812\n",
- "Name: var41um, Length: 101686, dtype: float32"
- ]
- },
- "execution_count": 133,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df3[\"var41um\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 134,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 -0.096758\n",
- "1 -0.096758\n",
- "2 -0.096758\n",
- "3 -0.096758\n",
- "4 -0.096758\n",
- " ... \n",
- "101681 -0.139812\n",
- "101682 -0.139812\n",
- "101683 -0.139812\n",
- "101684 -0.139812\n",
- "101685 -0.139812\n",
- "Name: var42um, Length: 101686, dtype: float32"
- ]
- },
- "execution_count": 134,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df3[\"var42um\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 140,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 True\n",
- "1 True\n",
- "2 True\n",
- "3 True\n",
- "4 True\n",
- " ... \n",
- "101681 True\n",
- "101682 True\n",
- "101683 True\n",
- "101684 True\n",
- "101685 True\n",
- "Length: 101686, dtype: bool"
- ]
- },
- "execution_count": 140,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df3[\"var41m\"] == df3[\"var42um\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 138,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(101686,)"
- ]
- },
- "execution_count": 138,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df3[\"var41m\"].shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 141,
- "metadata": {},
- "outputs": [
- {
- "data": {
+ " WRANGELL | \n",
+ " AK | \n",
+ " 2020.0 | \n",
+ " \n",
+ " \n",
+ "\n",
+ "4117072 rows × 10 columns
\n",
+ ""
+ ],
"text/plain": [
- "50528"
+ " lender_id loan_id match var1 var2 var3 var4 \\\n",
+ "0 1339.0 27917.0 0.0 -0.001122 -543.263339 -0.024180 0.045413 \n",
+ "1 1339.0 97252.0 0.0 -0.001122 -543.263339 0.091452 0.045413 \n",
+ "2 1339.0 78177.0 0.0 -0.001122 -543.263339 0.091452 -0.070219 \n",
+ "3 3402.0 27917.0 0.0 -0.001122 -550.235868 -0.024180 0.045413 \n",
+ "4 3402.0 78177.0 0.0 -0.001122 -550.235868 0.091452 -0.070219 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "4117067 1631.0 34117.0 1.0 -0.001122 -560.586083 -0.024180 0.045413 \n",
+ "4117068 3923.0 34122.0 0.0 -0.001122 -560.586083 1.069299 -0.347326 \n",
+ "4117069 1631.0 34122.0 1.0 -0.001122 -494.896087 -0.139812 0.045413 \n",
+ "4117070 1631.0 34131.0 1.0 -0.001122 -498.872095 -0.139812 -0.070219 \n",
+ "4117071 1631.0 34127.0 1.0 -0.001122 -498.872095 -0.139812 0.045413 \n",
+ "\n",
+ " USPS_ZIP_PREF_CITY USPS_ZIP_PREF_STATE yearapproved \n",
+ "0 AGAWAM MA 2020.0 \n",
+ "1 AGAWAM MA 2020.0 \n",
+ "2 AGAWAM MA 2020.0 \n",
+ "3 AGAWAM MA 2020.0 \n",
+ "4 AGAWAM MA 2020.0 \n",
+ "... ... ... ... \n",
+ "4117067 KETCHIKAN AK 2020.0 \n",
+ "4117068 CRAIG AK 2020.0 \n",
+ "4117069 CRAIG AK 2020.0 \n",
+ "4117070 WRANGELL AK 2020.0 \n",
+ "4117071 WRANGELL AK 2020.0 \n",
+ "\n",
+ "[4117072 rows x 10 columns]"
]
},
- "execution_count": 141,
+ "execution_count": 352,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "sum(df3[\"var41m\"] == df3[\"var42um\"])"
+ "df_w1"
]
},
{
"cell_type": "code",
- "execution_count": 143,
+ "execution_count": 355,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "50528"
- ]
- },
- "execution_count": 143,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:1597: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " self.obj[key] = value\n",
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:1676: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " self._setitem_single_column(ilocs[0], value, pi)\n",
+ "/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " self._setitem_single_column(loc, value[:, i].tolist(), pi)\n"
+ ]
}
],
"source": [
- "sum(df3[\"var41um\"] == df3[\"var42m\"])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 差不多一半的var41um = var42m,var41m = var42um"
+ "df_w2 = pd.DataFrame()\n",
+ "df_w2 = df_org[['lender_id', 'loan_id', 'match']]\n",
+ "df_w2.loc[:, 'var1'] = df_org['Relationship_Dum'].values\n",
+ "df_w2.loc[:, 'var2'] = df_org['mi_to_zcta5'].values\n",
+ "df_w2.loc[:, 'var3'] = df_org['var3'].values\n",
+ "df_w2.loc[:, 'var4'] = df_org['var4'].values\n",
+ "df_w2.loc[:, ('USPS_ZIP_PREF_CITY', 'USPS_ZIP_PREF_STATE', 'yearapproved')] = df_org[['USPS_ZIP_PREF_CITY', 'USPS_ZIP_PREF_STATE', 'yearapproved']].values"
]
},
{
"cell_type": "code",
- "execution_count": 164,
+ "execution_count": 356,
"metadata": {},
"outputs": [
{
@@ -5596,225 +5157,204 @@
" \n",
" \n",
" | \n",
- " var41m | \n",
- " var42um | \n",
+ " lender_id | \n",
+ " loan_id | \n",
+ " match | \n",
+ " var1 | \n",
+ " var2 | \n",
+ " var3 | \n",
+ " var4 | \n",
+ " USPS_ZIP_PREF_CITY | \n",
+ " USPS_ZIP_PREF_STATE | \n",
+ " yearapproved | \n",
"
\n",
" \n",
" \n",
" \n",
- " 5 | \n",
- " -0.096758 | \n",
- " 0.740014 | \n",
+ " 0 | \n",
+ " 1339.0 | \n",
+ " 27917.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 17.322745 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 6 | \n",
- " -0.096758 | \n",
- " 0.740014 | \n",
+ " 1 | \n",
+ " 1339.0 | \n",
+ " 97252.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 17.322745 | \n",
+ " 0.091452 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 7 | \n",
- " -0.096758 | \n",
- " 0.740014 | \n",
+ " 2 | \n",
+ " 1339.0 | \n",
+ " 78177.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 17.322745 | \n",
+ " 0.091452 | \n",
+ " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 8 | \n",
- " -0.096758 | \n",
- " 0.740014 | \n",
+ " 3 | \n",
+ " 3402.0 | \n",
+ " 27917.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 10.350215 | \n",
+ " -0.024180 | \n",
+ " 0.045413 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 9 | \n",
- " -0.096758 | \n",
- " 0.740014 | \n",
+ " 4 | \n",
+ " 3402.0 | \n",
+ " 78177.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 10.350215 | \n",
+ " 0.091452 | \n",
+ " -0.070219 | \n",
+ " AGAWAM | \n",
+ " MA | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " 101674 | \n",
+ " 4117067 | \n",
+ " 1631.0 | \n",
+ " 34117.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
" -0.024180 | \n",
- " 0.184932 | \n",
+ " 0.045413 | \n",
+ " KETCHIKAN | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101675 | \n",
- " -0.110904 | \n",
- " 0.848208 | \n",
+ " 4117068 | \n",
+ " 3923.0 | \n",
+ " 34122.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 1.069299 | \n",
+ " -0.347326 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101676 | \n",
- " -0.110904 | \n",
- " 0.848208 | \n",
+ " 4117069 | \n",
+ " 1631.0 | \n",
+ " 34122.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 65.689996 | \n",
+ " -0.139812 | \n",
+ " 0.045413 | \n",
+ " CRAIG | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101677 | \n",
- " -0.110904 | \n",
- " 0.848208 | \n",
+ " 4117070 | \n",
+ " 1631.0 | \n",
+ " 34131.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 61.713988 | \n",
+ " -0.139812 | \n",
+ " -0.070219 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
- " 101678 | \n",
- " -0.110904 | \n",
- " 0.848208 | \n",
+ " 4117071 | \n",
+ " 1631.0 | \n",
+ " 34127.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 61.713988 | \n",
+ " -0.139812 | \n",
+ " 0.045413 | \n",
+ " WRANGELL | \n",
+ " AK | \n",
+ " 2020.0 | \n",
"
\n",
" \n",
"\n",
- "51158 rows × 2 columns
\n",
+ "4117072 rows × 10 columns
\n",
""
],
"text/plain": [
- " var41m var42um\n",
- "5 -0.096758 0.740014\n",
- "6 -0.096758 0.740014\n",
- "7 -0.096758 0.740014\n",
- "8 -0.096758 0.740014\n",
- "9 -0.096758 0.740014\n",
- "... ... ...\n",
- "101674 -0.024180 0.184932\n",
- "101675 -0.110904 0.848208\n",
- "101676 -0.110904 0.848208\n",
- "101677 -0.110904 0.848208\n",
- "101678 -0.110904 0.848208\n",
+ " lender_id loan_id match var1 var2 var3 var4 \\\n",
+ "0 1339.0 27917.0 0.0 0.0 17.322745 -0.024180 0.045413 \n",
+ "1 1339.0 97252.0 0.0 0.0 17.322745 0.091452 0.045413 \n",
+ "2 1339.0 78177.0 0.0 0.0 17.322745 0.091452 -0.070219 \n",
+ "3 3402.0 27917.0 0.0 0.0 10.350215 -0.024180 0.045413 \n",
+ "4 3402.0 78177.0 0.0 0.0 10.350215 0.091452 -0.070219 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "4117067 1631.0 34117.0 1.0 0.0 0.000000 -0.024180 0.045413 \n",
+ "4117068 3923.0 34122.0 0.0 0.0 0.000000 1.069299 -0.347326 \n",
+ "4117069 1631.0 34122.0 1.0 0.0 65.689996 -0.139812 0.045413 \n",
+ "4117070 1631.0 34131.0 1.0 0.0 61.713988 -0.139812 -0.070219 \n",
+ "4117071 1631.0 34127.0 1.0 0.0 61.713988 -0.139812 0.045413 \n",
"\n",
- "[51158 rows x 2 columns]"
- ]
- },
- "execution_count": 164,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 这里看一下不一样的长什么样\n",
- "df3[df3[\"var41m\"] != df3[\"var42um\"]][[\"var41m\",\"var42um\"]]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 165,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[,\n",
- " ]], dtype=object)"
- ]
- },
- "execution_count": 165,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df3[df3[\"var41m\"] != df3[\"var42um\"]][[\"var41m\",\"var42um\"]].hist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 163,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 163,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUUElEQVR4nO3dfYxd9X3n8fenOEssXJ6WZJbFaI2EVZWHhq5HwApFGsdsmW0Q0BVIrmhwtF5ZQkRKJVYFGmm71cqSUUXpEgq7VhxhHlpj0SJbIG/rGkZRJR6CWxLzEBanWMTAYhEMxdnAyuS7f9zfbK+H8czcefC94PdLurr3fs/5nfs9Y2Y+9/zOuZdUFZIk/VK/G5AkDQYDQZIEGAiSpMZAkCQBBoIkqVnU7wZm64wzzqhly5bNauzPfvYzTjrppPltaB7YV2/sq3eD2pt99WYufe3evfudqvrCpAur6lN5W7FiRc3Wk08+OeuxC8m+emNfvRvU3uyrN3PpC3iujvJ31SkjSRLgOQRJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQI+xV9dIQ2qPW+8z9dvfbwvr71vw1f78rr6bPAIQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkZkaBkGRfkj1Jnk/yXKudnmRnklfb/Wld69+WZG+SV5Jc0VVf0bazN8ldSdLqJyZ5uNWfSbJsnvdTkjSNXo4QVlbVRVU13J7fCuyqquXArvacJOcBq4HzgVHgniQntDH3AuuA5e022uprgYNVdS5wJ3D77HdJkjQbc5kyuhrY3B5vBq7pqm+pqo+q6jVgL3BxkjOBk6vqqaoq4P4JY8a39QiwavzoQZJ0bKTzt3malZLXgINAAf+jqjYmea+qTu1a52BVnZbkbuDpqnqw1TcBO4B9wIaqurzVvwzcUlVXJnkBGK2q/W3Zj4FLquqdCX2so3OEwdDQ0IotW7bMaqcPHTrEkiVLZjV2IdlXbwa1rwPvvs/bP+/Pa1941ilTLh/Un5l99WYufa1cuXJ310zPEWb69deXVdWbSb4I7EzyoynWneydfU1Rn2rMkYWqjcBGgOHh4RoZGZmy6aMZGxtjtmMXkn31ZlD7+vZD27hjT3++WX7f9SNTLh/Un5l99Wah+prRlFFVvdnuDwCPAhcDb7dpINr9gbb6fuDsruFLgTdbfekk9SPGJFkEnAK82/vuSJJma9pASHJSkl8efwz8BvACsB1Y01ZbA2xrj7cDq9uVQ+fQOXn8bFW9BXyQ5NJ2fuCGCWPGt3Ut8ETNZC5LkjRvZnJcOwQ82s7xLgL+rKr+Z5LvA1uTrAVeB64DqKoXk2wFXgIOAzdV1cdtWzcC9wGL6ZxX2NHqm4AHkuylc2Sweh72TZLUg2kDoar+AfjSJPWfAquOMmY9sH6S+nPABZPUP6QFiiSpP/yksiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJKAHgIhyQlJ/j7JY+356Ul2Jnm13Z/Wte5tSfYmeSXJFV31FUn2tGV3JUmrn5jk4VZ/JsmyedxHSdIM9HKE8E3g5a7ntwK7qmo5sKs9J8l5wGrgfGAUuCfJCW3MvcA6YHm7jbb6WuBgVZ0L3AncPqu9kSTN2owCIclS4KvAd7rKVwOb2+PNwDVd9S1V9VFVvQbsBS5OciZwclU9VVUF3D9hzPi2HgFWjR89SJKOjZkeIfwJ8HvAL7pqQ1X1FkC7/2KrnwX8pGu9/a12Vns8sX7EmKo6DLwP/POZ7oQkae4WTbdCkiuBA1W1O8nIDLY52Tv7mqI+1ZiJvayjM+XE0NAQY2NjM2jnkw4dOjTrsQvJvnozqH0NLYabLzzcl9ee7ucxqD8z++rNQvU1bSAAlwFXJflN4PPAyUkeBN5OcmZVvdWmgw609fcDZ3eNXwq82epLJ6l3j9mfZBFwCvDuxEaqaiOwEWB4eLhGRkZmtJMTjY2NMduxC8m+ejOofX37oW3csWcmv1rzb9/1I1MuH9SfmX31ZqH6mnbKqKpuq6qlVbWMzsniJ6rqd4DtwJq22hpgW3u8HVjdrhw6h87J42fbtNIHSS5t5wdumDBmfFvXttf4xBGCJGnhzOVtzAZga5K1wOvAdQBV9WKSrcBLwGHgpqr6uI25EbgPWAzsaDeATcADSfbSOTJYPYe+JEmz0FMgVNUYMNYe/xRYdZT11gPrJ6k/B1wwSf1DWqBIkvrDTypLkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEnNtIGQ5PNJnk3ygyQvJvnDVj89yc4kr7b707rG3JZkb5JXklzRVV+RZE9bdleStPqJSR5u9WeSLFuAfZUkTWEmRwgfAV+pqi8BFwGjSS4FbgV2VdVyYFd7TpLzgNXA+cAocE+SE9q27gXWAcvbbbTV1wIHq+pc4E7g9rnvmiSpF9MGQnUcak8/124FXA1sbvXNwDXt8dXAlqr6qKpeA/YCFyc5Ezi5qp6qqgLunzBmfFuPAKvGjx4kScdGOn+bp1mp8w5/N3Au8KdVdUuS96rq1K51DlbVaUnuBp6uqgdbfROwA9gHbKiqy1v9y8AtVXVlkheA0ara35b9GLikqt6Z0Mc6OkcYDA0NrdiyZcusdvrQoUMsWbJkVmMXkn31ZlD7OvDu+7z98/689oVnnTLl8kH9mdlXb+bS18qVK3dX1fBkyxbNZANV9TFwUZJTgUeTXDDF6pO9s68p6lONmdjHRmAjwPDwcI2MjEzRxtGNjY0x27ELyb56M6h9ffuhbdyxZ0a/WvNu3/UjUy4f1J+ZffVmofrq6SqjqnoPGKMz9/92mwai3R9oq+0Hzu4athR4s9WXTlI/YkySRcApwLu99CZJmpuZXGX0hXZkQJLFwOXAj4DtwJq22hpgW3u8HVjdrhw6h87J42er6i3ggySXtvMDN0wYM76ta4EnaiZzWZKkeTOT49ozgc3tPMIvAVur6rEkTwFbk6wFXgeuA6iqF5NsBV4CDgM3tSkngBuB+4DFdM4r7Gj1TcADSfbSOTJYPR87J0mauWkDoap+CPz6JPWfAquOMmY9sH6S+nPAJ84/VNWHtECRJPWHn1SWJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJKaaQMhydlJnkzycpIXk3yz1U9PsjPJq+3+tK4xtyXZm+SVJFd01Vck2dOW3ZUkrX5ikodb/ZkkyxZgXyVJU5jJEcJh4Oaq+lXgUuCmJOcBtwK7qmo5sKs9py1bDZwPjAL3JDmhbeteYB2wvN1GW30tcLCqzgXuBG6fh32TJPVg0XQrVNVbwFvt8QdJXgbOAq4GRtpqm4Ex4JZW31JVHwGvJdkLXJxkH3ByVT0FkOR+4BpgRxvzX9q2HgHuTpKqqjnvoY5by259vC+ve/OFfXlZac7Sy9/cNpXzPeAC4PWqOrVr2cGqOi3J3cDTVfVgq2+i80d/H7Chqi5v9S8Dt1TVlUleAEaran9b9mPgkqp6Z8Lrr6NzhMHQ0NCKLVu2zGqnDx06xJIlS2Y1diHZV2+m62vPG+8fw27+ydBiePvnfXlpLjzrlCmXf1r/Lfvls9jXypUrd1fV8GTLpj1CGJdkCfAXwO9W1T+26f9JV52kVlPUpxpzZKFqI7ARYHh4uEZGRqbpenJjY2PMduxCsq/eTNfX1/t2hHCYO/bM+FdrXu27fmTK5Z/Wf8t+Od76mtFVRkk+RycMHqqqv2zlt5Oc2ZafCRxo9f3A2V3DlwJvtvrSSepHjEmyCDgFeLfXnZEkzd5MrjIKsAl4uar+uGvRdmBNe7wG2NZVX92uHDqHzsnjZ9u5iA+SXNq2ecOEMePbuhZ4wvMHknRszeS49jLga8CeJM+32u8DG4CtSdYCrwPXAVTVi0m2Ai/RuULppqr6uI27EbgPWEznvMKOVt8EPNBOQL9L5yolSdIxNJOrjP6Wyef4AVYdZcx6YP0k9efonJCeWP+QFiiSpP7wk8qSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAmYQCEm+m+RAkhe6aqcn2Znk1XZ/Wtey25LsTfJKkiu66iuS7GnL7kqSVj8xycOt/kySZfO8j5KkGZjJEcJ9wOiE2q3ArqpaDuxqz0lyHrAaOL+NuSfJCW3MvcA6YHm7jW9zLXCwqs4F7gRun+3OSJJmb9pAqKrvAe9OKF8NbG6PNwPXdNW3VNVHVfUasBe4OMmZwMlV9VRVFXD/hDHj23oEWDV+9CBJOnbS+fs8zUqdaZzHquqC9vy9qjq1a/nBqjotyd3A01X1YKtvAnYA+4ANVXV5q38ZuKWqrmxTUaNVtb8t+zFwSVW9M0kf6+gcZTA0NLRiy5Yts9rpQ4cOsWTJklmNXUj21Zvp+trzxvvHsJt/MrQY3v55X16aC886Zcrln9Z/y375LPa1cuXK3VU1PNmyRXPq6pMme2dfU9SnGvPJYtVGYCPA8PBwjYyMzKJFGBsbY7ZjF5J99Wa6vr5+6+PHrpkuN194mDv2zPev1szsu35kyuWf1n/Lfjne+prtVUZvt2kg2v2BVt8PnN213lLgzVZfOkn9iDFJFgGn8MkpKknSApttIGwH1rTHa4BtXfXV7cqhc+icPH62qt4CPkhyaTs/cMOEMePbuhZ4omYyjyVJmlfTHtcm+XNgBDgjyX7gD4ANwNYka4HXgesAqurFJFuBl4DDwE1V9XHb1I10rlhaTOe8wo5W3wQ8kGQvnSOD1fOyZ5KknkwbCFX120dZtOoo668H1k9Sfw64YJL6h7RAkST1j59UliQBBoIkqTEQJEnA/H8OQVIfLZvmsxc3X3h4QT6fsW/DV+d9mzr2PEKQJAEGgiSpccpIC266aYzZWqjpD+l45RGCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCfCTyseNuXxa2E8ES8cHjxAkSYCBIElqnDKSNGdz/QLDuUxL+v9imD8eIUiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2XnR5jU12e5yeCJfWTgSDpU22un4GYylRv0j6Ln39wykiSBBynRwh73njfqRlJmmBgAiHJKPDfgBOA71TVhj63JElHtZBTVdO5b/SkBdnuQEwZJTkB+FPg3wHnAb+d5Lz+diVJx5eBCATgYmBvVf1DVf1fYAtwdZ97kqTjSqqq3z2Q5FpgtKr+Y3v+NeCSqvrGhPXWAeva018BXpnlS54BvDPLsQvJvnpjX70b1N7sqzdz6etfVdUXJlswKOcQMkntE0lVVRuBjXN+seS5qhqe63bmm331xr56N6i92VdvFqqvQZky2g+c3fV8KfBmn3qRpOPSoATC94HlSc5J8s+A1cD2PvckSceVgZgyqqrDSb4B/BWdy06/W1UvLuBLznnaaYHYV2/sq3eD2pt99WZB+hqIk8qSpP4blCkjSVKfGQiSJMBAIMl/SlJJzuh3LwBJ/muSHyZ5PslfJ/mX/e4JIMkfJflR6+3RJKf2uyeAJNcleTHJL5L0/fLAJKNJXkmyN8mt/e4HIMl3kxxI8kK/e+mW5OwkTyZ5uf0bfrPfPQEk+XySZ5P8oPX1h/3uqVuSE5L8fZLH5nvbx3UgJDkb+LfA6/3upcsfVdWvVdVFwGPAf+5zP+N2AhdU1a8B/wu4rc/9jHsB+PfA9/rdyAB/Bct9wGi/m5jEYeDmqvpV4FLgpgH5eX0EfKWqvgRcBIwmubS/LR3hm8DLC7Hh4zoQgDuB32OSD8H1S1X9Y9fTkxiQ3qrqr6vqcHv6NJ3PivRdVb1cVbP9xPp8G8ivYKmq7wHv9ruPiarqrar6u/b4Azp/5M7qb1dQHYfa08+120D8HiZZCnwV+M5CbP+4DYQkVwFvVNUP+t3LREnWJ/kJcD2Dc4TQ7T8AO/rdxAA6C/hJ1/P9DMAfuE+DJMuAXwee6XMrwP+flnkeOADsrKqB6Av4EzpvYn+xEBsfiM8hLJQkfwP8i0kWfQv4feA3jm1HHVP1VVXbqupbwLeS3AZ8A/iDQeirrfMtOof6Dx2Lnmba14CY0Vew6EhJlgB/AfzuhCPkvqmqj4GL2rmyR5NcUFV9PQeT5ErgQFXtTjKyEK/xmQ6Eqrp8snqSC4FzgB8kgc70x98lubiq/ne/+prEnwGPc4wCYbq+kqwBrgRW1TH8AEsPP69+8ytYepTkc3TC4KGq+st+9zNRVb2XZIzOOZh+n5S/DLgqyW8CnwdOTvJgVf3OfL3AcTllVFV7quqLVbWsqpbR+UX+18ciDKaTZHnX06uAH/Wrl27tf2B0C3BVVf2ffvczoPwKlh6k825sE/ByVf1xv/sZl+QL41fRJVkMXM4A/B5W1W1VtbT9zVoNPDGfYQDHaSAMuA1JXkjyQzpTWgNxKR5wN/DLwM52Sex/73dDAEl+K8l+4N8Ajyf5q3710k66j38Fy8vA1gX+CpYZSfLnwFPAryTZn2Rtv3tqLgO+Bnyl/Tf1fHv3229nAk+238Hv0zmHMO+XeA4iv7pCkgR4hCBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSp+X/fOaSwIMSgtAAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_exchange_pairs[\"value4\"].hist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 168,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 168,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAASX0lEQVR4nO3df4xl9X3e8fcTcAhiLH4UM9kA6hJ1WwVYhYYRdWupmilRoG6UxVKx1rLcRSbaRMJSou4fWRKpcRWt5LQlUStC2k2xshWOpysSysqYJgQxQpFKCWuBlwVTNmFFYdGuYmPwuBbt4k//mEM9We7MvXdmztzh2/dLurrnnh/3PPvl8syZc8+9k6pCktSWH5p0AEnSxrPcJalBlrskNchyl6QGWe6S1KDzJx0A4PLLL6/t27ePtc13v/tdLrroon4CbTCz9sOs/TBrP/rIevTo0b+qqo8MXFhVE7/deOONNa4nnnhi7G0mxaz9MGs/zNqPPrICz9QKveppGUlqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJatCW+PoBSe+3ff8jE9nvvp1nmZ3InrWRPHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDRpa7kl+JMnTSZ5LcjzJv+zmX5bksSQvd/eXLtvm7iQnkryU5JY+/wGSpPcb5cj9HeAfVdVPAjcAtyb5KLAfeLyqdgCPd49Jci2wG7gOuBW4L8l5PWSXJK1gaLl3f81psXv4oe5WwC7gUDf/EHBbN70LmK+qd6rqFeAEcNNGhpYkrS5Lf4ZvyEpLR95Hgb8F/E5V/UqSb1fVJcvWebOqLk1yL/BUVT3Qzb8feLSqHjznOfcCewGmp6dvnJ+fHyv44uIiU1NTY20zKWbtR+tZj73+Vk9pVjd9IVxx2cUT2fe4Wn8NDDM3N3e0qmYGLRvp6weq6l3ghiSXAA8luX6V1TPoKQY850HgIMDMzEzNzs6OEuX/WVhYYNxtJsWs/Wg96x0T/PqBTzY8rpOy2VnHulqmqr4NLLB0Lv10km0A3f2ZbrXXgKuXbXYVcGq9QSVJoxvlapmPdEfsJLkQ+GngG8ARYE+32h7g4W76CLA7yQVJrgF2AE9vcG5J0ipGOS2zDTjUnXf/IeBwVX0lyX8DDie5E3gVuB2gqo4nOQy8AJwF7upO60iSNsnQcq+qrwN/d8D8bwI3r7DNAeDAutNJktbET6hKUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaNLTck1yd5IkkLyY5nuSXuvmfT/J6kme728eXbXN3khNJXkpyS5//AEnS+50/wjpngX1V9bUkHwaOJnmsW/bbVfVvlq+c5FpgN3Ad8GPAnyb521X17kYGlyStbOiRe1W9UVVf66a/A7wIXLnKJruA+ap6p6peAU4AN21EWEnSaFJVo6+cbAeeBK4H/jlwB/A28AxLR/dvJrkXeKqqHui2uR94tKoePOe59gJ7Aaanp2+cn58fK/ji4iJTU1NjbTMpZu1H61mPvf5WT2lWN30hXHHZxRPZ97hafw0MMzc3d7SqZgYtG+W0DABJpoA/BH65qt5O8rvAbwDV3d8DfBbIgM3f9xOkqg4CBwFmZmZqdnZ21CgALCwsMO42k2LWfrSe9Y79j/QTZoh9O8/yyYbHdVI2O+tIV8sk+RBLxf6lqvojgKo6XVXvVtX3gd/jB6deXgOuXrb5VcCpjYssSRpmlKtlAtwPvFhVv7Vs/rZlq30CeL6bPgLsTnJBkmuAHcDTGxdZkjTMKKdlPgZ8BjiW5Nlu3q8Cn0pyA0unXE4CvwBQVceTHAZeYOlKm7u8UkaSNtfQcq+qP2PwefSvrrLNAeDAOnJJktbBT6hKUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaNLTck1yd5IkkLyY5nuSXuvmXJXksycvd/aXLtrk7yYkkLyW5pc9/gCTp/UY5cj8L7KuqnwA+CtyV5FpgP/B4Ve0AHu8e0y3bDVwH3Arcl+S8PsJLkgYbWu5V9UZVfa2b/g7wInAlsAs41K12CLitm94FzFfVO1X1CnACuGmDc0uSVpGqGn3lZDvwJHA98GpVXbJs2ZtVdWmSe4GnquqBbv79wKNV9eA5z7UX2AswPT194/z8/FjBFxcXmZqaGmubSTFrP1rPeuz1t3pKs7rpC+GKyy6eyL7H1fprYJi5ubmjVTUzaNn5oz5JkingD4Ffrqq3k6y46oB57/sJUlUHgYMAMzMzNTs7O2oUABYWFhh3m0kxaz9az3rH/kf6CTPEvp1n+WTD4zopm511pKtlknyIpWL/UlX9UTf7dJJt3fJtwJlu/mvA1cs2vwo4tTFxJUmjGOVqmQD3Ay9W1W8tW3QE2NNN7wEeXjZ/d5ILklwD7ACe3rjIkqRhRjkt8zHgM8CxJM92834V+AJwOMmdwKvA7QBVdTzJYeAFlq60uauq3t3o4JKklQ0t96r6MwafRwe4eYVtDgAH1pFLkrQOfkJVkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAYNLfckX0xyJsnzy+Z9PsnrSZ7tbh9ftuzuJCeSvJTklr6CS5JWNsqR++8Dtw6Y/9tVdUN3+ypAkmuB3cB13Tb3JTlvo8JKkkYztNyr6kngWyM+3y5gvqreqapXgBPATevIJ0lag1TV8JWS7cBXqur67vHngTuAt4FngH1V9WaSe4GnquqBbr37gUer6sEBz7kX2AswPT194/z8/FjBFxcXmZqaGmubSTFrP1rPeuz1t3pKs7rpC+H09yaya3ZeefFY67f+Ghhmbm7uaFXNDFp2/hqf83eB3wCqu78H+CyQAesO/OlRVQeBgwAzMzM1Ozs7VoCFhQXG3WZSzNqP1rPesf+RfsIMsW/nWe45ttZqWJ+Tn54da/3WXwPrsaarZarqdFW9W1XfB36PH5x6eQ24etmqVwGn1hdRkjSuNZV7km3LHn4CeO9KmiPA7iQXJLkG2AE8vb6IkqRxDf3dK8mXgVng8iSvAb8OzCa5gaVTLieBXwCoquNJDgMvAGeBu6rq3V6SS5JWNLTcq+pTA2bfv8r6B4AD6wklSVofP6EqSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lq0NByT/LFJGeSPL9s3mVJHkvycnd/6bJldyc5keSlJLf0FVyStLJRjtx/H7j1nHn7gceragfwePeYJNcCu4Hrum3uS3LehqWVJI1kaLlX1ZPAt86ZvQs41E0fAm5bNn++qt6pqleAE8BNGxNVkjSqVNXwlZLtwFeq6vru8ber6pJly9+sqkuT3As8VVUPdPPvBx6tqgcHPOdeYC/A9PT0jfPz82MFX1xcZGpqaqxtJsWs/Wg967HX3+opzeqmL4TT35vIrtl55cVjrd/6a2CYubm5o1U1M2jZ+Ru6J8iAeQN/elTVQeAgwMzMTM3Ozo61o4WFBcbdZlLM2o/Ws96x/5F+wgyxb+dZ7jm20dUwmpOfnh1r/dZfA+ux1qtlTifZBtDdn+nmvwZcvWy9q4BTa48nSVqLtZb7EWBPN70HeHjZ/N1JLkhyDbADeHp9ESVJ4xr6u1eSLwOzwOVJXgN+HfgCcDjJncCrwO0AVXU8yWHgBeAscFdVvdtTdknSCoaWe1V9aoVFN6+w/gHgwHpCSZLWx0+oSlKDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGnT+ejZOchL4DvAucLaqZpJcBvxnYDtwEvhkVb25vpiSpHFsxJH7XFXdUFUz3eP9wONVtQN4vHssSdpEfZyW2QUc6qYPAbf1sA9J0ipSVWvfOHkFeBMo4D9U1cEk366qS5at82ZVXTpg273AXoDp6ekb5+fnx9r34uIiU1NTa86+mczaj9azHnv9rZ7SrG76Qjj9vYnsmp1XXjzW+q2/BoaZm5s7uuysyV+zrnPuwMeq6lSSK4DHknxj1A2r6iBwEGBmZqZmZ2fH2vHCwgLjbjMpZu1H61nv2P9IP2GG2LfzLPccW281rM3JT8+OtX7rr4H1WNdpmao61d2fAR4CbgJOJ9kG0N2fWW9ISdJ41lzuSS5K8uH3poGfAZ4HjgB7utX2AA+vN6QkaTzr+d1rGngoyXvP8wdV9V+T/DlwOMmdwKvA7euPKUkax5rLvar+EvjJAfO/Cdy8nlCSpPXxE6qS1CDLXZIaZLlLUoMmczGrJG0h2zfhMwX7dp4d+NmFk1/4J73szyN3SWqQ5S5JDbLcJalBlrskNcg3VCVtGeO+sbnSm5TyyF2SmmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZ5nbs0xEZ8qZTXY2uzeeQuSQ2y3CWpQZa7JDXIcpekBvmGqsayGX+xZpC+/lqN1Komyt3Cad+g/8ZegSKtzNMyktSg3o7ck9wK/FvgPOA/VtUX+trX/2/6/E3Fo2GpDb0cuSc5D/gd4B8D1wKfSnJtH/uSJL1fX6dlbgJOVNVfVtX/BuaBXT3tS5J0jlTVxj9p8k+BW6vq57vHnwH+XlV9btk6e4G93cO/A7w05m4uB/5qA+JuBrP2w6z9MGs/+sj6N6vqI4MW9HXOPQPm/bWfIlV1EDi45h0kz1TVzFq330xm7YdZ+2HWfmx21r5Oy7wGXL3s8VXAqZ72JUk6R1/l/ufAjiTXJPlhYDdwpKd9SZLO0ctpmao6m+RzwB+zdCnkF6vq+AbvZs2ndCbArP0waz/M2o9NzdrLG6qSpMnyE6qS1CDLXZIa9IEp9yT/Osk3knw9yUNJLllhvZNJjiV5NskzmxzzvQyjZr01yUtJTiTZv8kx38twe5LjSb6fZMXLtLbIuI6adSuM62VJHkvycnd/6QrrTWxch41Tlvy7bvnXk/zUZuY7J8uwrLNJ3urG8dkk/2JCOb+Y5EyS51dYvnljWlUfiBvwM8D53fRvAr+5wnongcu3elaW3mj+C+DHgR8GngOunUDWn2DpQ2QLwMwq622FcR2adQuN678C9nfT+7fa63WUcQI+DjzK0udWPgr89wn9dx8l6yzwlUnkOyfHPwR+Cnh+heWbNqYfmCP3qvqTqjrbPXyKpWvnt6QRs26Jr2ioqheratxPB0/EiFm3xLh2+zzUTR8CbptAhtWMMk67gP9US54CLkmybbODsnX+mw5VVU8C31pllU0b0w9MuZ/jsyz99BukgD9JcrT7ioNJWynrlcD/XPb4tW7eVrXVxnUlW2Vcp6vqDYDu/ooV1pvUuI4yTltlLEfN8feTPJfk0STXbU60sW3amG6pP9aR5E+BHx2w6Neq6uFunV8DzgJfWuFpPlZVp5JcATyW5BvdT9OtlnXoVzRslFGyjmDLjOuwpxgwb9PHdYyn2ZRxHWCUcdq0sRxilBxfY+l7VhaTfBz4L8COvoOtwaaN6ZYq96r66dWWJ9kD/Cxwc3UnsAY8x6nu/kySh1j6lW7D/2fZgKyb9hUNw7KO+BxbYlxHsCXGNcnpJNuq6o3u1+4zKzzHpozrAKOM01b5GpGhOarq7WXTX01yX5LLq2qrfanYpo3pB+a0TJb++MevAD9XVf9rhXUuSvLh96ZZemNz4LvWfRolKx+gr2jYKuM6oq0yrkeAPd30HuB9v3VMeFxHGacjwD/rrvD4KPDWe6eaNtnQrEl+NEm66ZtY6rZvbnrS4TZvTCf97vKoN+AES+eqnu1u/76b/2PAV7vpH2fpnfTngOMs/Sq/JbPWD945/x8sXQkwqayfYOlo4h3gNPDHW3hch2bdQuP6N4DHgZe7+8u22rgOGifgF4Ff7KbD0h/d+QvgGKtcTbUFsn6uG8PnWLqI4R9MKOeXgTeA/9O9Vu+c1Jj69QOS1KAPzGkZSdLoLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUoP8Leq8EOSgMqH0AAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_matched[\"var4\"].hist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 169,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
+ " USPS_ZIP_PREF_CITY USPS_ZIP_PREF_STATE yearapproved \n",
+ "0 AGAWAM MA 2020.0 \n",
+ "1 AGAWAM MA 2020.0 \n",
+ "2 AGAWAM MA 2020.0 \n",
+ "3 AGAWAM MA 2020.0 \n",
+ "4 AGAWAM MA 2020.0 \n",
+ "... ... ... ... \n",
+ "4117067 KETCHIKAN AK 2020.0 \n",
+ "4117068 CRAIG AK 2020.0 \n",
+ "4117069 CRAIG AK 2020.0 \n",
+ "4117070 WRANGELL AK 2020.0 \n",
+ "4117071 WRANGELL AK 2020.0 \n",
+ "\n",
+ "[4117072 rows x 10 columns]"
]
},
- "execution_count": 169,
+ "execution_count": 356,
"metadata": {},
"output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAARa0lEQVR4nO3df6jd9X3H8edrZhMx9VdtU2dkWhpG/cFKc0ndSscNuhpamRYUUkqN1JFVFDbwD+MK62AEdKMTpNMtQzG2XVPp5gy1WeusQQb+aCzaGK0z1mBjQoI1taZrXWPf++N8sl3jufeec3PPD2+fDzic7/l8v5/veZ+Px/O638/3e05SVUiS9BujLkCSNB4MBEkSYCBIkhoDQZIEGAiSpGbRqAuYq1NPPbXOPPPMvvr87Gc/4/jjjx9MQfPMWgfDWgfDWgdjELU+/vjjL1fVu7qurKq35W358uXVrwcffLDvPqNirYNhrYNhrYMxiFqBbTXN56pTRpIkwHMIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEvI1/ukLSW5257r6RPO+dq94ePwWhmXmEIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJ6CEQkpyR5MEkzyTZkeTPWvspSe5P8ly7P3lKnxuS7EzybJKLprQvT7K9rbslSVr7sUm+1tofTXLmAF6rJGkGvRwhHAKuq6r3A+cD1yQ5G1gHPFBVy4AH2mPautXAOcAq4NYkx7R93QasBZa126rWfhVwoKreB9wM3DQPr02S1IdZA6Gq9lbV99rya8AzwOnAJcDGttlG4NK2fAmwqaper6oXgJ3AiiSnASdU1cNVVcBdR/Q5vK+vAxccPnqQJA1HOp/NPW7cmcp5CDgXeLGqTpqy7kBVnZzki8AjVfXl1n47sAXYBdxYVRe29o8A11fVxUmeAlZV1e627nngQ1X18hHPv5bOEQZLlixZvmnTpr5e7MGDB1m8eHFffUbFWgdjode6/aVXB1TNzM468ZgFPa6jMohaV65c+XhVTXRbt6jXnSRZDPwL8OdV9dMZ/oDvtqJmaJ+pz5sbqjYAGwAmJiZqcnJylqrfbOvWrfTbZ1SsdTAWeq1XrrtvMMXM4s5Vxy/ocR2VYdfa01VGSX6TThh8par+tTXva9NAtPv9rX03cMaU7kuBPa19aZf2N/VJsgg4EXil3xcjSZq7Xq4yCnA78ExV/d2UVZuBNW15DXDvlPbV7cqhs+icPH6sqvYCryU5v+3ziiP6HN7XZcB3qp+5LEnSUetlyujDwKeB7UmeaG1/AdwI3J3kKuBF4HKAqtqR5G7gaTpXKF1TVW+0flcDdwLH0TmvsKW13w58KclOOkcGq4/uZUmS+jVrIFTVf9J9jh/ggmn6rAfWd2nfRueE9JHtv6AFiiRpNPymsiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSM2sgJLkjyf4kT01p+6skLyV5ot0+NmXdDUl2Jnk2yUVT2pcn2d7W3ZIkrf3YJF9r7Y8mOXOeX6MkqQe9HCHcCazq0n5zVX2g3b4JkORsYDVwTutza5Jj2va3AWuBZe12eJ9XAQeq6n3AzcBNc3wtkqSjMGsgVNVDwCs97u8SYFNVvV5VLwA7gRVJTgNOqKqHq6qAu4BLp/TZ2Ja/Dlxw+OhBkjQ8i46i77VJrgC2AddV1QHgdOCRKdvsbm2/bMtHttPufwRQVYeSvAq8E3j5yCdMspbOUQZLlixh69atfRV88ODBvvuMirUOxkKv9brzDg2mmFks9HEdlWHXOtdAuA34a6Da/ReAzwDd/rKvGdqZZd2bG6s2ABsAJiYmanJysq+it27dSr99RsVaB2Oh13rluvsGU8ws7lx1/IIe11EZdq1zusqoqvZV1RtV9Svgn4AVbdVu4Iwpmy4F9rT2pV3a39QnySLgRHqfopIkzZM5BUI7J3DYJ4DDVyBtBla3K4fOonPy+LGq2gu8luT8dn7gCuDeKX3WtOXLgO+08wySpCGadcooyVeBSeDUJLuBzwOTST5AZ2pnF/CnAFW1I8ndwNPAIeCaqnqj7epqOlcsHQdsaTeA24EvJdlJ58hg9Ty8LklSn2YNhKr6ZJfm22fYfj2wvkv7NuDcLu2/AC6frQ5J0mD5TWVJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRLQw7+YJkmz2f7Sq1y57r6RPPeuGz8+kuddiDxCkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKmZNRCS3JFkf5KnprSdkuT+JM+1+5OnrLshyc4kzya5aEr78iTb27pbkqS1H5vka6390SRnzvNrlCT1oJcjhDuBVUe0rQMeqKplwAPtMUnOBlYD57Q+tyY5pvW5DVgLLGu3w/u8CjhQVe8DbgZumuuLkSTN3ayBUFUPAa8c0XwJsLEtbwQundK+qaper6oXgJ3AiiSnASdU1cNVVcBdR/Q5vK+vAxccPnqQJA1POp/Ps2zUmcb5RlWd2x7/pKpOmrL+QFWdnOSLwCNV9eXWfjuwBdgF3FhVF7b2jwDXV9XFbSpqVVXtbuueBz5UVS93qWMtnaMMlixZsnzTpk19vdiDBw+yePHivvqMirUOxkKvdftLrw6ompktOQ72/XwkT815p5/Y1/YL/T0wm5UrVz5eVRPd1i2a12eCbn/Z1wztM/V5a2PVBmADwMTERE1OTvZV3NatW+m3z6hY62As9FqvXHffYIqZxXXnHeIL2+f746Q3uz412df2C/09cDTmepXRvjYNRLvf39p3A2dM2W4psKe1L+3S/qY+SRYBJ/LWKSpJ0oDNNRA2A2va8hrg3intq9uVQ2fROXn8WFXtBV5Lcn47P3DFEX0O7+sy4DvVyzyWJGlezXqMl+SrwCRwapLdwOeBG4G7k1wFvAhcDlBVO5LcDTwNHAKuqao32q6upnPF0nF0zitsae23A19KspPOkcHqeXllkqS+zBoIVfXJaVZdMM3264H1Xdq3Aed2af8FLVAkSaPjN5UlSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkpqjCoQku5JsT/JEkm2t7ZQk9yd5rt2fPGX7G5LsTPJskoumtC9v+9mZ5JYkOZq6JEn9m48jhJVV9YGqmmiP1wEPVNUy4IH2mCRnA6uBc4BVwK1Jjml9bgPWAsvabdU81CVJ6sMgpowuATa25Y3ApVPaN1XV61X1ArATWJHkNOCEqnq4qgq4a0ofSdKQpPMZPMfOyQvAAaCAf6yqDUl+UlUnTdnmQFWdnOSLwCNV9eXWfjuwBdgF3FhVF7b2jwDXV9XFXZ5vLZ0jCZYsWbJ806ZNfdV78OBBFi9e3P8LHQFrHYyFXuv2l14dUDUzW3Ic7Pv5SJ6a804/sa/tF/p7YDYrV658fMqMzpssOsp9f7iq9iR5N3B/kh/MsG238wI1Q/tbG6s2ABsAJiYmanJysq9it27dSr99RsVaB2Oh13rluvsGU8wsrjvvEF/YfrQfJ3Oz61OTfW2/0N8DR+Oopoyqak+73w/cA6wA9rVpINr9/rb5buCMKd2XAnta+9Iu7ZKkIZpzICQ5Psk7Di8DHwWeAjYDa9pma4B72/JmYHWSY5OcRefk8WNVtRd4Lcn57eqiK6b0kSQNydEc4y0B7mlXiC4C/rmq/j3Jd4G7k1wFvAhcDlBVO5LcDTwNHAKuqao32r6uBu4EjqNzXmHLUdQlSZqDOQdCVf0Q+L0u7T8GLpimz3pgfZf2bcC5c61FknT0/KayJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLULBp1AZL0dnTmuvsG/hzXnXeIK7s8z64bPz6Q5/MIQZIEeIQg6W2u37/Up/urWx4hSJIajxCkeTZfc8v+Jath8whBkgSMUSAkWZXk2SQ7k6wbdT2S9OtmLKaMkhwD/D3wR8Bu4LtJNlfV06OtTPNhGJfndTOoS/OkhWosAgFYAeysqh8CJNkEXAIMJBBG9QEFo/uQGuRrHte57m6veVxrlcZBqmrUNZDkMmBVVf1Je/xp4ENVde0R260F1raHvws82+dTnQq8fJTlDou1Doa1Doa1DsYgav2dqnpXtxXjcoSQLm1vSaqq2gBsmPOTJNuqamKu/YfJWgfDWgfDWgdj2LWOy0nl3cAZUx4vBfaMqBZJ+rU0LoHwXWBZkrOS/BawGtg84pok6dfKWEwZVdWhJNcC3wKOAe6oqh0DeKo5TzeNgLUOhrUOhrUOxlBrHYuTypKk0RuXKSNJ0ogZCJIkYAEHQpK/TfKDJN9Pck+Sk6bZbleS7UmeSLJtyGUerqHXWsfi5z2SXJ5kR5JfJZn2krgxGdteax352CY5Jcn9SZ5r9ydPs91IxnW2MUrHLW3995N8cFi1dalltlonk7zaxvCJJH85ijpbLXck2Z/kqWnWD29cq2pB3oCPAova8k3ATdNstws4ddxrpXOy/XngvcBvAU8CZ4+o3vfT+WLgVmBihu3GYWxnrXVcxhb4G2BdW143Tu/ZXsYI+Biwhc73is4HHh3Rf/Neap0EvjGK+rrU+4fAB4Gnplk/tHFdsEcIVfXtqjrUHj5C57sNY6nHWv/v5z2q6n+Awz/vMXRV9UxV9fst8ZHosdZxGdtLgI1teSNw6QhqmE4vY3QJcFd1PAKclOS0YRfK+Pz37ElVPQS8MsMmQxvXBRsIR/gMnYTtpoBvJ3m8/TTGqE1X6+nAj6Y83t3axtm4je10xmVsl1TVXoB2/+5pthvFuPYyRuMyjr3W8ftJnkyyJck5wyltToY2rmPxPYS5SvIfwHu6rPpcVd3btvkccAj4yjS7+XBV7UnybuD+JD9oiT1utfb08x7zpZd6ezA2YzvbLrq0DWRsZ6q1j90MZVyP0MsYDfU9OoNe6vgend/0OZjkY8C/AcsGXdgcDW1c39aBUFUXzrQ+yRrgYuCCapNxXfaxp93vT3IPncPNef+fax5qHerPe8xWb4/7GIux7cHQxnamWpPsS3JaVe1tUwL7p9nHUMb1CL2M0bj8BM2sdVTVT6csfzPJrUlOrapx/NG7oY3rgp0ySrIKuB7446r672m2OT7JOw4v0zm52/VM/yD1Uitvs5/3GJex7dG4jO1mYE1bXgO85ehmhOPayxhtBq5oV8WcD7x6eApsyGatNcl7kqQtr6DzWfjjoVfam+GN66jPsA/qBuykM+/2RLv9Q2v/beCbbfm9dK5AeBLYQWeKYSxrrf+/2uC/6FxBMZJaWx2foPNXy+vAPuBbYzy2s9Y6LmMLvBN4AHiu3Z8yTuPabYyAzwKfbcuh8w9dPQ9sZ4Yr0Mag1mvb+D1J50KOPxhhrV8F9gK/bO/Vq0Y1rv50hSQJWMBTRpKk/hgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElS878h9hB21nYZgAAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
}
],
"source": [
- "df_unmatched[\"var4\"].hist()"
+ "df_w2"
]
},
{