ai_platform_regression/house_price/house_price.ipynb

1226 lines
558 KiB
Plaintext
Raw Permalink Normal View History

2022-12-07 10:43:52 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import xgboost as xgb\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"train_data = pd.read_csv('./data/train.csv')\n",
"test_data = pd.read_csv('./data/test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"train_data.drop(train_data[(train_data[\"GrLivArea\"]>4000)&(train_data[\"SalePrice\"]<300000)].index,inplace=True)#pandas 里面的条件索引"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2917, 81)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data = pd.concat([train_data, test_data]).reset_index(drop=True)\n",
"all_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>特征名称</th>\n",
" <th>缺失率</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>PoolQC</td>\n",
" <td>0.995885</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>MiscFeature</td>\n",
" <td>0.962963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alley</td>\n",
" <td>0.937586</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Fence</td>\n",
" <td>0.807270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>FireplaceQu</td>\n",
" <td>0.473251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>LotFrontage</td>\n",
" <td>0.177641</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>GarageYrBlt</td>\n",
" <td>0.055556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>GarageCond</td>\n",
" <td>0.055556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>GarageType</td>\n",
" <td>0.055556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>GarageFinish</td>\n",
" <td>0.055556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>GarageQual</td>\n",
" <td>0.055556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>BsmtFinType2</td>\n",
" <td>0.026063</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>BsmtExposure</td>\n",
" <td>0.026063</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>BsmtQual</td>\n",
" <td>0.025377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>BsmtCond</td>\n",
" <td>0.025377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>BsmtFinType1</td>\n",
" <td>0.025377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>MasVnrArea</td>\n",
" <td>0.005487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>MasVnrType</td>\n",
" <td>0.005487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Electrical</td>\n",
" <td>0.000686</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 特征名称 缺失率\n",
"0 PoolQC 0.995885\n",
"1 MiscFeature 0.962963\n",
"2 Alley 0.937586\n",
"3 Fence 0.807270\n",
"4 FireplaceQu 0.473251\n",
"5 LotFrontage 0.177641\n",
"6 GarageYrBlt 0.055556\n",
"7 GarageCond 0.055556\n",
"8 GarageType 0.055556\n",
"9 GarageFinish 0.055556\n",
"10 GarageQual 0.055556\n",
"11 BsmtFinType2 0.026063\n",
"12 BsmtExposure 0.026063\n",
"13 BsmtQual 0.025377\n",
"14 BsmtCond 0.025377\n",
"15 BsmtFinType1 0.025377\n",
"16 MasVnrArea 0.005487\n",
"17 MasVnrType 0.005487\n",
"18 Electrical 0.000686"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"miss_value = train_data.isnull().sum().sort_values(ascending=False).to_frame().reset_index()\n",
"miss_value.columns = ['feature', 'miss_per']\n",
"miss_value = miss_value[miss_value.miss_per > 0]\n",
"miss_value.miss_per = miss_value.miss_per / train_data.shape[0]\n",
"miss_value.columns = ['特征名称', '缺失率']\n",
"miss_value"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Id 0\n",
"Foundation 0\n",
"Heating 0\n",
"SaleCondition 0\n",
"CentralAir 0\n",
" ... \n",
"SalePrice 1459\n",
"Fence 2346\n",
"Alley 2719\n",
"MiscFeature 2812\n",
"PoolQC 2908\n",
"Length: 81, dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"miss = all_data.isnull().sum().sort_values(ascending=True)\n",
"miss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"删除缺失比例过高的列"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Alley\n",
"FireplaceQu\n",
"PoolQC\n",
"Fence\n",
"MiscFeature\n"
]
},
{
"data": {
"text/plain": [
"(2917, 76)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_cols = [x for x in all_data.columns if x != 'Id' and x != 'SalePrice']\n",
"for col in all_cols:\n",
" if miss[col] > 1000:\n",
" print(col)\n",
" all_data.drop(columns=[col], inplace=True)\n",
"all_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"from scipy.stats import norm\n",
"from scipy import stats"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='SalePrice', ylabel='Density'>"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAERCAYAAABxZrw0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4yUlEQVR4nO3deXyU1b348c83k0z2PYGEQIAAsu9hcwEqKuKGigtK1XptaWuvrbf9edVe77W2t3axettii6C1ahVRAYW6gCIIBQVEQFYje9hC9n2fOb8/ZoIBskxCnpnJ5Pt+vYZMnu185+HJNyfnOc85YoxBKaVU4AnydQBKKaWsoQleKaUClCZ4pZQKUJrglVIqQGmCV0qpAKUJXimlApTfJXgReVFEckVkdwcdL11EPhSRfSKyV0T6dMRxlVLK3/ldggdeAq7uwOO9AjxljBkMjAdyO/DYSinlt/wuwRtj1gOFjZeJSD8RWSkiX4jIv0RkkCfHEpEhQLAx5iP3scuNMZUdH7VSSvkfv0vwzVgIPGCMGQv8P+CvHu53EVAsIstEZLuIPCUiNsuiVEopPxLs6wBaIyJRwMXAWyLSsDjUve5m4JdN7HbCGDMd1+e7DBgNZANvAN8B/mZt1Eop5Xt+n+Bx/ZVRbIwZde4KY8wyYFkL+x4HdhhjDgGIyDvARDTBK6W6AL9vojHGlAKHReRWAHEZ6eHunwNxIpLs/v5yYK8FYSqllN/xuwQvIq8DnwEDReS4iNwHzAHuE5EvgT3ATE+OZYxx4Gqz/1hEdgECPG9N5Eop5V9EhwtWSqnA5Hc1eKWUUh3Dr26yJiUlmT59+vg6DKWU6jS++OKLfGNMclPr/CrB9+nTh61bt/o6DKWU6jRE5Ghz67SJRimlApQmeKWUClCa4JVSKkBpgldKqQClCV4ppQKUJnillApQmuCVUipAaYJXSqkApQleKaUClF89yaq+sWhzdpPL75yQ7uVIlFKdldbglVIqQGmCV0qpAGVpgheR/xCRPSKyW0ReF5EwK8tTSin1DcsSvIikAT8GMo0xwwAbMNuq8pRSSp3N6iaaYCBcRIKBCOCkxeUppZRysyzBG2NOAH8AsoFTQIkx5sNztxORuSKyVUS25uXlWRWOUkp1OZZ1kxSReFyTY/cFioG3ROTbxphXG29njFkILATIzMzUCWJb0VT3Se06qZRqipVNNFcAh40xecaYOmAZcLGF5SmllGrEygSfDUwUkQgREWAasM/C8pRSSjViZRv8ZmAJsA3Y5S5roVXlKaWUOpulQxUYYx4HHreyDKWUUk3TJ1mVUipAaYJXSqkApQleKaUClCZ4pZQKUJrglVIqQGmCV0qpAKUJXimlApQmeKWUClCa4JVSKkBpgldKqQClCV4ppQKUJnillApQmuCVUipAaYJXSqkApQleKaUClKXjwauOtT27iK1HiyirrufG0T3ISIrydUhKKT9mWQ1eRAaKyI5Gr1IRedCq8gLdyeIqlnxxnPLqepzG8PKnRzicX+HrsJRSfszKKfuyjDGjjDGjgLFAJfC2VeUFMqcxrPjyJBF2Gz+Y0o/vT84gNtzOW1uPUe90+jo8pZSf8lYb/DTgoDHmqJfKCyh7TpaSXVjJ1cNSCbfbiA4L4boRqRRX1fHF0SJfh6eU8lPeSvCzgdebWiEic0Vkq4hszcvL81I4ncuu48VEhwYzOj3uzLIB3aLonRDB2q9yqa5z+C44pZTfsjzBi4gduAF4q6n1xpiFxphMY0xmcnKy1eF0OnUOJ1mnyxjcI4YgkTPLRYTLB3ejtLqe93ae8mGESil/5Y0a/AxgmzHmtBfKCjgHcsupcxiG9og5b13/5CgSIu289cUxH0SmlPJ33kjwd9BM84xq3Z6TJYSFBDXZJVJEGJMez6ZDhWQXVPogOqWUP7M0wYtIJHAlsMzKcgKVMYavcsoYlBKDLUia3GZMehwisHTbcS9Hp5Tyd5YmeGNMhTEm0RhTYmU5gaqgopbKWgd9EyOb3SYuws4l/ZJ4Z8cJjDFejE4p5e/0SVY/dqzQ1ezSKyGixe2uHZHKo8t2sfdUKUN7xJ5Zvmhz9nnb3jkhvWODVEr5LR2Lxo9lF1YSGhxEt5jQFre7akh3ggRW7s7xUmRKqc5AE7wfO1ZUSc/48LO6RzYlMSqUCX0TeX+XdpdUSn1DE7yfqq13klNS3WrzTIMZw1M4mFfB/tNlFkemlOosNMH7qRPFVTgNpMd7luCnD00B4KN9+riBUspFE7yfOl7kusHa08MafPeYMIalxbBmX66VYSmlOhFN8H7qdGkN0aHBRIV63tHp8kHd2ZZdRFFFrYWRKaU6C03wfup0aTXdY8PatM+0Qd1wGlj3tQ7appTSBO+XHE5Dblk13aNb7h55ruFpsSRFhfLxV9pMo5TSBO+XjhVWUucwdI9pWw0+KEj41sBk1mXlUu/QiUCU6uo0wfuhr3JcXR1T2thEAzDNPYSwTgSilNIE74e+dvdl7xbd9gR/6YBkQmzCGm2mUarL0wTvh7JOl5EQacce3Pb/nqjQYCb0TdR2eKWUJnh/lJVT1ub298YuH9SNA7nlFGp3SaW6NE3wfqbO4eRIfgXd2tiDprHLB3UDICuntKPCUkp1Qprg/czxoirqnYbkqPYn+D5JkaQnRHAgt7wDI1NKdTZWz+gUJyJLROQrEdknIpOsLC8QHM53JeWkKPsFHefSAUkcyq/A4dRJQJTqqqyuwf8JWGmMGQSMBPZZXF6ndyivAoCkC6jBA0wekERNvfPMpCFKqa7HsgQvIrHAZOBvAMaYWmNMsVXlBYrD+RXERYQQ0YYxaJoyqV8SAhzI02YapboqK2vwfYE84O8isl1EXnBPwn0WEZkrIltFZGteno6hcji/gr5Jzc/B6qnY8BB6xofr+PBKdWFWzskaDIwBHjDGbBaRPwGPAP/deCNjzEJgIUBmZmaXbzA+nF/BpH6JbdqnqblXAfp3i+aTrFyqah2E220dEZ5SqhOxsgZ/HDhujNns/n4JroSvmlFZW8+pkmoyOqAGDzCgWxQGOKjNNEp1SZYleGNMDnBMRAa6F00D9lpVXiA4ku+6Ido3KapDjtcrIYLQ4CDtLqlUF2V1L5oHgNdEZCcwCnjS4vI6tcP5rh40HdEGD2ALEjKSIvVGq1JdlJVt8BhjdgCZVpYRCBra0NdmucaP2XK4sF3j0DSlf7co9uWUUVBeQ+IFdr1USnUu+iSrHymsqCU6NLjDkjvAgG7RgHaXVKor0gTvR4oqaomPvLAnWM+VGGUnJiz4TPOPUqrr0ATvRwora0no4AQvIvRJiuRIfgXGdPleqEp1KZrg/YTDaSiprCM+omMTPECfxEhKq+spqqzr8GMrpfyXJng/UVxZi4EOr8HDN71ytJlGqa5FE7yfKKx0Tc4RHxnS4cdOjg4lPMTGEU3wSnUpmuD9RFGFq/kkwYImmqCGdvgCTfBKdSWa4P1EYUUtNhFiwju+Bg/QNzGCgopaTpdWW3J8pZT/0QTvJ4oqa4mLCCFIxJLj93G3w285XGjJ8ZVS/kcTvJ8orOj4LpKNpcaGYw8O0gSvVBeiCd5PFFrwkFNjtiChd0KEJniluhBN8H6gus5BVZ3DkhusjfVNiiTrdBlFFbWWlqOU8g+a4P1AYUVDF0lrE3yfRFc7/OdHtBavVFegCd4PFLn7wFvZBg/QM97VDq8JXqmuQRO8H2iowVvdRBNsC2JYjxi2ZRdbWo5Syj9ogvcDhRW1hIUEeWXe1DHp8ew6UUJtvdPyspRSvmVpgheRIyKyS0R2iMhWK8vqzIoqay2vvTcY0zue2none06WeKU8pZTveKMG/y1jzChjjM7s1IzCijrLb7A2GNs7HkCbaZTqAiydsk+1zuk0FFfWMjg12pLjF+flsOuzNZw6eoCKkiLWpiQQXxLJ2s8quO/SvpaUqZTyD1YneAN8KCIGWGCMWXjuBiIyF5gLkJ6ebnE4/ie3rIZ6p+nwceDLiwv56I3n2b1pLQCJ3dOIjk+isLCQmn2f8sWuVdyz+z0efvhhhgwZ0qFlK6X8g0cJXkSWAX8DPjD
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"376.245632pt\" height=\"272.794688pt\" viewBox=\"0 0 376.245632 272.794688\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n <metadata>\n <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <cc:Work>\n <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n <dc:date>2022-07-27T14:10:04.575558</dc:date>\n <dc:format>image/svg+xml</dc:format>\n <dc:creator>\n <cc:Agent>\n <dc:title>Matplotlib v3.5.2, https://matplotlib.org/</dc:title>\n </cc:Agent>\n </dc:creator>\n </cc:Work>\n </rdf:RDF>\n </metadata>\n <defs>\n <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 272.794688 \nL 376.245632 272.794688 \nL 376.245632 0 \nL 0 0 \nL 0 272.794688 \nz\n\" style=\"fill: none\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 34.240625 235.238438 \nL 369.040625 235.238438 \nL 369.040625 17.798438 \nL 34.240625 17.798438 \nz\n\" style=\"fill: #ffffff\"/>\n </g>\n <g id=\"patch_3\">\n <path d=\"M 69.800807 235.238438 \nL 75.182024 235.238438 \nL 75.182024 229.388559 \nL 69.800807 229.388559 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_4\">\n <path d=\"M 75.182024 235.238438 \nL 80.563241 235.238438 \nL 80.563241 222.368704 \nL 75.182024 222.368704 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_5\">\n <path d=\"M 80.563241 235.238438 \nL 85.944458 235.238438 \nL 85.944458 220.028752 \nL 80.563241 220.028752 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_6\">\n <path d=\"M 85.944458 235.238438 \nL 91.325675 235.238438 \nL 91.325675 156.85006 \nL 85.944458 156.85006 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_7\">\n <path d=\"M 91.325675 235.238438 \nL 96.706892 235.238438 \nL 96.706892 158.020036 \nL 91.325675 158.020036 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_8\">\n <path d=\"M 96.706892 235.238438 \nL 102.088109 235.238438 \nL 102.088109 88.991464 \nL 96.706892 88.991464 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_9\">\n <path d=\"M 102.088109 235.238438 \nL 107.469326 235.238438 \nL 107.469326 28.152723 \nL 102.088109 28.152723 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_10\">\n <path d=\"M 107.469326 235.238438 \nL 112.850543 235.238438 \nL 112.850543 35.172578 \nL 107.469326 35.172578 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_11\">\n <path d=\"M 112.850543 235.238438 \nL 118.23176 235.238438 \nL 118.23176 85.481537 \nL 112.850543 85.481537 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_12\">\n <path d=\"M 118.23176 235.238438 \nL 123.612977 235.238438 \nL 123.612977 65.591948 \nL 118.23176 65.591948 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_13\">\n <path d=\"M 123.612977 235.238438 \nL 128.994194 235.238438 \nL 128.994194 115.900907 \nL 123.612977 115.900907 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_14\">\n <path d=\"M 128.994194 235.238438 \nL 134.375412 235.238438 \nL 134.375412 153.340132 \nL 128.994194 153.340132 \nz\n\" clip-path=\"url(#p4442583514)\" style=\"fill: #1f77b4; op
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(train_data.SalePrice, fit=norm)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEWCAYAAACqitpwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAraklEQVR4nO3de5zVVb3/8deHm3LxCtRRZGbITEXtmE6Kei4WqOgpwUrBEBVnpMSMPJ2ulJYdT2V5PWWFM4DATiArxY6GQJmWQuIlFdTkp4yApoCiXBRk+Pz+WN8tm3HPnr1nvvv+fj4e+7H3d+3v5cOczv641vp819fcHRERkTh1K3YAIiJSeZRcREQkdkouIiISOyUXERGJnZKLiIjETslFRERip+Qi0gVm5mb2wU4eu8rMRrTz3b+a2bPp9jWzb5pZU+cizim+k81sTb6vI5VJyUWqTvRD/ZaZbTazV8xshpn1K3Zcqdz9AXc/tJ3v/sfdGwHMrC5KcD06cx0zu9DMWqO/xZtm9riZfaIT55lhZv/dmRikMim5SLX6pLv3A44B6oFvtd2hsz/YZeih6G+xL9AMzDOz/YobkpQ7JRepau6+FrgHOBLeHea61MyeA56L2i42s5Vm9pqZzTezA9uc5gwze97M1pvZj8ysW3TcwWb2BzPbEH2XMLN92xz7UTNbYWavm9l0M9szOrbdISkz+46ZzY4274/eN0a9j3+P4jwqZf/3mdlWMxvYwd9iJzAN6A0cnOa6h5vZfWa20cyWm9mZUftEYBzw1SiGuzJdR6qDkotUNTMbDJwBPJbSPBo4HhhqZh8Hvg+cAxwAtABz2pzmLELv5xhgFHBR8vTRsQcChwODge+0OXYccBrhx/xDpOlBdeDfovd93b2fu/8piu+8lH3OBRa7+7pMJ4p6ao3AZqLEmvJdT+Au4F7gfcBlQMLMDnX3qUACuCaK4ZM5/hukAim5SLW6w8w2An8G/gT8T8p333f319z9LcKP/zR3f9TdtwHfAE4ws7qU/X8Y7f8icAPhxxx3X+nuC919W/TDfh3w723i+Im7r3b314Crk8d20a3AuWZm0fZ4YFaG/YdFf4t/RNc/y93faLsP0A/4gbtvd/c/AL+LKV6pQNUypizS1mh3X9TOd6tTPh8IPJrccPfNZrYBGASsSrN/S3QMZvZ+4EbgX4G9CP8x93qGa717bFe4+1Iz2wqcbGYvAx8E5mc4ZIm7/0sHpz0QWB0NnSW1EP4OIu+hnovIe6UuFf4SUJvcMLO+QH9gbco+g1M+10THQOgNOXCUu+9NGKoydtfesZ2JNdWt0fXGA7e7+9s5nretl4DByfmkSA27/g5aXl12o+QiktltwAQzO9rM9iAkjKXuvipln6+Y2X7R/M1kYG7Uvhdh/uINMxsEfCXN+S81s4PMbH9gSsqx2VoH7AQ+0KZ9NmEu6DxgZo7nTGcpsJUwad/TzE4GPsmu+adX0sQgVUzJRSSDaOjs28CvgZcJE+9j2+x2J/AI8Djwf4RyXoDvEib534jaf5PmEr8kTJI/D/w/IKd7Rdx9K2Gu5i9RFdewqH01YTjPgQdyOWc719lOSCanA+uBm4Hz3f2ZaJdmQgHERjO7o6vXk/JneliYSGUys2nAS+6eawWaSJdpQl+kAkXVbJ8CPlLkUKRKaVhMpMKY2feAp4AfufsLxY5HqpOGxUREJHbquYiISOw05xIZMGCA19XVFTsMEZGy8sgjj6x39/esW6fkEqmrq2PZsmXFDkNEpKyYWUu6dg2LiYhI7JRcREQkdkouIiISOyUXERGJnZKLiIjETslFRKQKJRJQVwfduoX3RCLe86sUWUSkyiQSMHEibN0atltawjbAuHHxXEM9FxGRKjNlyq7EkrR1a2iPi5KLiEiFyHao68UXc2vvDCUXEZEKkBzqamkB911DXekSTE1N+nO0194ZSi4iIhUgl6Guq6+GPn12b+vTJ7THRclFRKQC5DLUNW4cTJ0KtbVgFt6nTo1vMh9ULSYiUhFqasJQWLr2dMaNizeZtKWei4hIBSjEUFculFxERCpAIYa6cqFhMRGRCpHvoa5cqOciIiKxU3IREZHYKbmIiEjslFxERCR2Si4iIhI7JRcREYmdkouIiMROyUVERGKn5CIiIrFTchERkdgpuYiISOyUXEREJHZKLiIiEjslFxERiZ2Si4iIxE7JRUREYqfkIiIisVNyERGR2Cm5iIhI7JRcREQkdkouIiISOyUXEZEykkhAXR106xbeE4liR5Rej2IHICIi2UkkYOJE2Lo1bLe0hG2AceOKF1c66rmIiJSJKVN2JZakrVtDe6lRchERKVFth8BaWtLv9+KLhYwqOxoWExEpQemGwMzA/b371tQUNrZs5LXnYmb7mtntZvaMmT1tZieY2f5mttDMnove94v2NTO7ycxWmtkTZnZMynkuiPZ/zswuSGk/1syejI65ycwsak97DRGRcpFuCMw9JJhUffrA1VcXLq5s5XtY7Ebg9+5+GPDPwNPA14HF7n4IsDjaBjgdOCR6TQR+BiFRAFcCxwPHAVemJIufARenHDcyam/vGiIiZaG9oS53qK0NSaa2FqZOLb3JfMhjcjGzfYB/A5oB3H27u28ERgG3RrvdCoyOPo8CZnqwBNjXzA4ATgMWuvtr7v46sBAYGX23t7svcXcHZrY5V7priIiUhfaGumprYdUq2LkzvJdiYoH89lyGAOuA6Wb2mJk1mVlf4P3u/nK0zz+A90efBwGrU45fE7Vlal+Tpp0M1xARKQtXXx2GvFKV6hBYOvlMLj2AY4CfuftHgC20GZ6Kehxppqfik+kaZjbRzJaZ2bJ169blMwwRkZyMGxeGvMphCCydfCaXNcAad18abd9OSDavRENaRO+vRt+vBQanHH9Q1Jap/aA07WS4xm7cfaq717t7/cCBAzv1jxQRiVNq+fGUKaGnUupDYOnkLbm4+z+A1WZ2aNQ0HFgBzAeSFV8XAHdGn+cD50dVY8OAN6KhrQXAqWa2XzSRfyqwIPruTTMbFlWJnd/mXOmuISJSspLlxy0tYeI+eQd+qS7xkol5uqLpuE5udjTQBPQCngcmEBLaPKAGaAHOcffXogTxE0LF11Zggrsvi85zEfDN6LRXu/v0qL0emAH0Bu4BLnN3N7P+6a6RKdb6+npftmxZTP9yEZHctXejZHISvxSZ2SPuXv+e9nwml3Ki5CIixdatW/qbJM3C0Fgpai+5aPkXEZES0V75cSnegd8RJRcRkRJR7uXHqZRcRESKILUqbMCA8Bo/Hnr3hv79y7P8OJUWrhQRKbC2i1Ju2LDruw0bQm9l1qzyTCpJ6rmIiBRYukUpU5XqM1pyoeQiIlJg2Tx/pRSf0ZILJRcRkQLLpvqrHCvEUim5iIgUUCIBmzdn3qdcK8RSKbmIiBRIciI/dQIfoG/fyqgQS6VqMRGRAmlvIn/AgNJd3qWz1HMRESmQ9ibpy33yPh0lFxGRAqmk5V06ouQiIlIglbS8S0eUXERECqTcny6ZC03oi4gU0LhxlZlM2lLPRUSkAFIXqqyrK8+nS+ZCPRcRkTxru1Bl8vHFULm9GPVcRETyLN39LZWwOGUmSi4iInlWTfe3JOWUXMysm5ntna9gREQqTSIR5lnSqcT7W5I6TC5m9ksz29vM+gJPASvM7Cv5D01EpLwl51paW9/7XaXe35KUTc9lqLu/CYwG7gGGAOPzGZSISCVoby2x7t0r9/6WpGySS08z60lILvPd/R3A8xqViEgFaGlJ375zZ2UnFsguufwCWAX0Be43s1rgzXwGJSJS7hKJcBd+OpU815LUYXJx95vcfZC7n+FBC/CxAsQmIlJ2EomwhP5554GnGeMxq+y5lqRsJvTfb2bNZnZPtD0UuCDvkYmIlJlEAiZMeO/DwFK5V/6QGGQ3LDYDWAAcGG3/HfhSnuIRESlbU6bAO+9k3qe2tjCxFFs2yWWAu88DdgK4+w4gTWGdiEj1SiTan8BPqvTy41TZJJctZtafqELMzIYBb+Q1KhGRMpK8nyWTaig/TpXNwpX/CcwHDjazvwADgc/kNSo
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"408.053125pt\" height=\"277.314375pt\" viewBox=\"0 0 408.053125 277.314375\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n <metadata>\n <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <cc:Work>\n <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n <dc:date>2022-07-27T14:10:13.679452</dc:date>\n <dc:format>image/svg+xml</dc:format>\n <dc:creator>\n <cc:Agent>\n <dc:title>Matplotlib v3.5.2, https://matplotlib.org/</dc:title>\n </cc:Agent>\n </dc:creator>\n </cc:Work>\n </rdf:RDF>\n </metadata>\n <defs>\n <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 277.314375 \nL 408.053125 277.314375 \nL 408.053125 0 \nL 0 0 \nL 0 277.314375 \nz\n\" style=\"fill: none\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 66.053125 239.758125 \nL 400.853125 239.758125 \nL 400.853125 22.318125 \nL 66.053125 22.318125 \nz\n\" style=\"fill: #ffffff\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path id=\"m3b543db31e\" d=\"M 0 0 \nL 0 3.5 \n\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n </defs>\n <g>\n <use xlink:href=\"#m3b543db31e\" x=\"95.305093\" y=\"239.758125\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 3 -->\n <g transform=\"translate(87.934 254.356562)scale(0.1 -0.1)\">\n <defs>\n <path id=\"DejaVuSans-2212\" d=\"M 678 2272 \nL 4684 2272 \nL 4684 1741 \nL 678 1741 \nL 678 2272 \nz\n\" transform=\"scale(0.015625)\"/>\n <path id=\"DejaVuSans-33\" d=\"M 2597 2516 \nQ 3050 2419 3304 2112 \nQ 3559 1806 3559 1356 \nQ 3559 666 3084 287 \nQ 2609 -91 1734 -91 \nQ 1441 -91 1130 -33 \nQ 819 25 488 141 \nL 488 750 \nQ 750 597 1062 519 \nQ 1375 441 1716 441 \nQ 2309 441 2620 675 \nQ 2931 909 2931 1356 \nQ 2931 1769 2642 2001 \nQ 2353 2234 1838 2234 \nL 1294 2234 \nL 1294 2753 \nL 1863 2753 \nQ 2328 2753 2575 2939 \nQ 2822 3125 2822 3475 \nQ 2822 3834 2567 4026 \nQ 2313 4219 1838 4219 \nQ 1578 4219 1281 4162 \nQ 984 4106 628 3988 \nL 628 4550 \nQ 988 4650 1302 4700 \nQ 1616 4750 1894 4750 \nQ 2613 4750 3031 4423 \nQ 3450 4097 3450 3541 \nQ 3450 3153 3228 2886 \nQ 3006 2619 2597 2516 \nz\n\" transform=\"scale(0.015625)\"/>\n </defs>\n <use xlink:href=\"#DejaVuSans-2212\"/>\n <use xlink:href=\"#DejaVuSans-33\" x=\"83.789062\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_2\">\n <g id=\"line2d_2\">\n <g>\n <use xlink:href=\"#m3b543db31e\" x=\"141.354437\" y=\"239.758125\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n </g>\n </g>\n <g id=\"text_2\">\n <!-- 2 -->\n <g transform=\"translate(133.983343 254.356562)scale(0.1 -0.1)\">\n <defs>\n <path id=\"DejaVuSans-32\" d=\"M 1228 531 \nL 3431 531 \nL 3431 0 \nL 469 0 \nL 469 531 \nQ 828 903 1448 1529 \nQ 2069 2156 2228 2338 \nQ 2531 2678 2651 2914 \nQ 2772 3150 2772 3378 \nQ 2772 3750 2511 3984 \nQ 2250 4219 1831 4219 \nQ 1534 4219 1204 4116 \nQ 875 4013 500 3803 \nL 500 4441 \nQ 881 4594 1212 4672 \nQ 1544 4750 1819 4750 \nQ 2544 4750 2975 4387 \nQ 3406 4025 3406 3419 \nQ 3406 3131 3298 2873 \nQ 3191 2616 2906 2266 \nQ 2828 2175 2409 1742 \nQ 1991 1309 1228 531 \nz\n\" transform=\"scale(0.015625)\"/>\n </defs>\n <use xlink:href=\"#DejaVuSans-2212\"/>\n <use xlink:href=\"#DejaVuSans-32\" x=\"83.789062\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_3\">\n <g id=\"line2d_3\">\n <g>\n <use xlink:href=\"#m3b543db31e\" x=\"187.403781\" y=\"239.758125\" style=\"strok
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"rest = stats.probplot(train_data.SalePrice, plot=plt)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='SalePrice', ylabel='Density'>"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA8T0lEQVR4nO3dd3xT973/8ddHsuW9F8Y22+yNGQESSLBZSSGrzexMbtreJLdtett0Jm260/6ae9smaUmaNs3NHiRACAQTCBCW2XsvGzA24L0tfX9/SLgGbGyD5SNbn+fjoYelc46kNwLx8Xec7xFjDEoppfyXzeoASimlrKWFQCml/JwWAqWU8nNaCJRSys9pIVBKKT8XYHWAtoqPjze9evWyOoZSSnUqmzdvPmuMSWhqX6crBL169WLTpk1Wx1BKqU5FRI43t89rXUMi8pKIFIjIrmb23yciO0Rkp4isFZER3sqilFKqed4cI/gnMPMK+48CU4wxw4BfAPO8mEUppVQzvNY1ZIxZJSK9rrB/baOH64FUb2VRSinVPF+ZNfQA8JHVIZRSyh9ZPlgsIjfiLgSTr3DMQ8BDAD169OigZEop5R8sbRGIyHDgRWCuMeZcc8cZY+YZYzKMMRkJCU3OflJKKXWVLCsEItIDeA/4ojHmgFU5lFLK33mta0hEXgemAvEikgc8CQQCGGP+CjwBxAHPiQhAvTEmw1t5lFJKNc2bs4buaWH/g8CD3np/pZRSrWP5YLFSXdVrG05ccf+943Xig/INvjJ9VCmllEW0ECillJ/TQqCUUn5OxwiU39O+fOXvtEWglFJ+TguBUkr5OS0ESinl57QQKKWUn9PBYqVacKXBZB1IVl2BtgiUUsrPaSFQSik/p4VAKaX8nBYCpZTyczpYrFQz6pwuCspqECApMhi7TayOpJRXaCFQqgm55yt5c1Mu5ytqAUiMCOLmYcmkJ0VYnEyp9qddQ0pdIq+oknmrjuByGT4/JpU7RqfgdBn+ufYY6480e2ltpTotbREo1YjTZXhvy0lCg+w8cmM/QoPcX5HhqdG8vvEEC7afwukyTOoXb3FSpdqPtgiUamTNobPkl1Yzd0T3hiIAEGi3cd/4ngzpHsmHO0+zLbfIwpRKtS8tBEp51DtdrDl0lv5J4QzuHnXZfrtNuCsjjT7xYbyzOY8DZ8osSKlU+9NCoJTH7lOlVNTUM7Fv890+AXYb90/oSVJkMK9uOM7WE9oyUJ2fFgKlPNYfPUdsmIN+ieFXPC440M5XJvYiIjiQr/0zh0MF5R2UUCnv0EKgFHCmtJrj5yoZ3zsWm7R8vkBEcCBfndgLu0340t83cKRQi4HqvLQQKAXsOV0KwIi06FY/Jy48iH9+dRzV9S7u/Os6tmg3keqktBAoBew9XUpqTAiRwYFtet7QlCje/eZEwoLs3PW3dby89hjGGC+lVMo7tBAov1daVUdeURWDkyOv6vm948NY+Mhkrk9P4MkFu3n09a2U19S3c0qlvEcLgfJ7+/Ld00AHXmUhAIgOdfDilzL43owBLN55mjl/WUN+aXV7RVTKq7xWCETkJREpEJFdzewXEfmTiBwSkR0iMtpbWZS6kn35pcSEBpIUEXRNr2OzCQ/f2I//e3A8pVX1PL/ykM4oUp2CN1sE/wRmXmH/LCDdc3sIeN6LWZRqUr3TxdGzFaQnRSCtmC3UGhP7xrP4vyYTG+bglfXHdEaR8nleW2vIGLNKRHpd4ZC5wL+Me2RtvYhEi0iyMea0tzIpdak9p0upqXfROz7sml+ruLiYnJwc8vPzCQoKYnqCgyXOEF7beIJHb0onKqRtA9FKdRQrF51LAXIbPc7zbLusEIjIQ7hbDfTooRcLV+3nwmqi11IIcnNz+eMf/8jy5cupq6u7aF9IRBTO1Am8EWLnP27s36pzFJTqaJ1i9VFjzDxgHkBGRobOzVPtZsOR88SHB7V52iiAMYZXX32Vp59+moCAAO69915mzJhBz549qamp4fn5K9m2+mMObltK/olNLA9+jKyJGV74Uyh1bawsBCeBtEaPUz3blOoQTpdh49HzVzVbyLhcfPTKX9i84kOmTJnCz3/+c5KSki46ZuCYSQwcM4kju7fy+nO/Y/3ff05y4OMMHTu5vf4ISrULK6ePLgC+5Jk9NAEo0fEB1ZH2nCqlrKaePm3sFjLGsPhff2bzig958MEHee655y4rAo31GTKKO7/3/3BFJPH+87/m0I6ca42uVLvy5vTR14F1wAARyRORB0TkGyLyDc8hi4EjwCHgBeA/vZVFqaZcWBKiZ1xom5736fuvsGXlYibdchff/e53sdla/hoN6JVCn9u/iyuyG2//5RecOrL/qjIr5Q3enDV0Twv7DfCwt95fqZZsyy0mISKoTbN59m9dx+oPXmXE5CxuvOOrbXq/KUN6sPfUg4Sse5Z3nv0lD8zMICYmpq2xlWp3emax8lvbc4sZmRbd6vMHSs4V8MG839OtZz9mfenRNp93kBYbSkpSInLdVykvKeLxxx/XdYmUT9BCoPxSSWUdR85WMLKVq40aY1j0j//B5XJyx8M/JtDR9rOQRYSJfeM4H5TEyJu/xOrVq3nvvffa/DpKtbdOMX1UqWv12oYTFz2+cJnJ8xW1xIQ6Wnz+tlVLObJrM7O++Aixid2vOsfQlCgW7jhFRbfxjB27g9/+9rdMmjSJbt26XfVrKnWttEWg/FJuUSUCpESHtHhsVXkpy9/+Oz36D2XMjTdf0/sG2m0M7R7F7tPl/OTJn1NXV8fTTz99Ta+p1LXSQqD8Ut75KhIigggOtLd47Mr3/kV1RTkzv/gw0ooZQi0ZkRZNbb2LfeVBPPjgg3z00Ufk5OiUUmUdLQTK7xhjyCuuIjWm5dZAwcljbF7xIRk33UJSWp92ef/e8WFEhQTywdaTPPDAAyQnJ/Ob3/wGl8vVLq+vVFtpIVB+p6y6noqaerq3olto5bsv4wgO5obbvthu728TYVhKFKsPnsVpC+Sxxx5j7969fPTRR+32Hkq1hRYC5XdOFVcB0D3qyoXg5OF97N+ylgkz7yQ0/OovWtOUQcmR1DpdfLq/kNmzZ9O/f3/+9Kc/XbZonVIdQQuB8junSqoQIDkq+IrHffr+K4RGRDF++m3tnqFnXCixYQ4+3pOPzWbjW9/6FidOnGDBggXt/l5KtUQLgfI7p4qriQsPIugKA8X5Jw5zeOcmxk+/jaCQti1B0Ro2EaYNTOSTfQXUOV3ceOONDBkyhHnz5uF0Otv9/ZS6Ei0Eyu+cKq6ie/SVWwPrPnoHR3AIY266xWs5pg/pRll1PRuOnEdEeOihhzhx4gRLlizx2nsq1RQtBMqvVNTUU1xVd8XxgeKzZ9i9YSWjp8wiJCzCa1km94vHEWBj5f4CADIzM+nbty8vvPCCLj2hOpQWAuVXTpV4BoqvMGNow9L3EBHGz7jdq1lCHHbG9Yrl0wOFANhsNr72ta+xf/9+Nm7c6NX3VqoxXWJC+ZX8kmqg+YHiyvJStn76EUMn3ERkbEKLr3fp0hVtNaV/Ar9avNfTXRXC7Nmz+f3vf88rr7zC+PHjr+m1lWotbREov5JfUk1EcABhQU3/DrRp+ULqamu4bvadHZLnhv7uYrP6oLtVEBwczBe+8AVWrFjByZN6wT7VMbQQKL9yprSabpFNtwZcTidbVnxIv+FjSUzp1SF5+ieF0y0ymFUHzjZsu/vuuxERXnvttQ7JoJQWAuU3nC5DQVlNs4XgwLYNlBWfY/Q1LizXFiLCDf3jWX2wkHqne4mJ5ORkMjMzeffdd6mqquqwLMp/aSFQfuNceQ31LkO3ZsYHtqxYRGRsPOnDx3Vorhv6J1BaXc/2vJKGbffddx8lJSUsXLiwQ7Mo/6SFQPmN/FL3QHFSEy2CosJ8Du/ewsgbZmGzt7wiaXua3C8em8Aqz+whgIyMDAYMGMCrr76qU0mV12khUH4jv7Qam0BixOVXF9v66WIEYdQNMzo8V3Sog+Gp0Q3TSMHdZXTvvfdy4MABduzY0eGZlH/RQqD8Rn5JNfHhQQTYL/5n76yvY9uqpaSPHNeqKaPt5bU
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"385.78125pt\" height=\"262.19625pt\" viewBox=\"0 0 385.78125 262.19625\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n <metadata>\n <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <cc:Work>\n <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n <dc:date>2022-07-27T14:17:35.675100</dc:date>\n <dc:format>image/svg+xml</dc:format>\n <dc:creator>\n <cc:Agent>\n <dc:title>Matplotlib v3.5.2, https://matplotlib.org/</dc:title>\n </cc:Agent>\n </dc:creator>\n </cc:Work>\n </rdf:RDF>\n </metadata>\n <defs>\n <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 262.19625 \nL 385.78125 262.19625 \nL 385.78125 0 \nL 0 0 \nL 0 262.19625 \nz\n\" style=\"fill: none\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 43.78125 224.64 \nL 378.58125 224.64 \nL 378.58125 7.2 \nL 43.78125 7.2 \nz\n\" style=\"fill: #ffffff\"/>\n </g>\n <g id=\"patch_3\">\n <path d=\"M 82.40189 224.64 \nL 89.76071 224.64 \nL 89.76071 220.498286 \nL 82.40189 220.498286 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_4\">\n <path d=\"M 89.76071 224.64 \nL 97.119531 224.64 \nL 97.119531 221.878857 \nL 89.76071 221.878857 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_5\">\n <path d=\"M 97.119531 224.64 \nL 104.478351 224.64 \nL 104.478351 224.64 \nL 97.119531 224.64 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_6\">\n <path d=\"M 104.478351 224.64 \nL 111.837172 224.64 \nL 111.837172 224.64 \nL 104.478351 224.64 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_7\">\n <path d=\"M 111.837172 224.64 \nL 119.195993 224.64 \nL 119.195993 221.878857 \nL 111.837172 221.878857 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_8\">\n <path d=\"M 119.195993 224.64 \nL 126.554813 224.64 \nL 126.554813 219.117714 \nL 119.195993 219.117714 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_9\">\n <path d=\"M 126.554813 224.64 \nL 133.913634 224.64 \nL 133.913634 216.356571 \nL 126.554813 216.356571 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_10\">\n <path d=\"M 133.913634 224.64 \nL 141.272454 224.64 \nL 141.272454 217.737143 \nL 133.913634 217.737143 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_11\">\n <path d=\"M 141.272454 224.64 \nL 148.631275 224.64 \nL 148.631275 216.356571 \nL 141.272454 216.356571 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_12\">\n <path d=\"M 148.631275 224.64 \nL 155.990096 224.64 \nL 155.990096 190.125714 \nL 148.631275 190.125714 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_13\">\n <path d=\"M 155.990096 224.64 \nL 163.348916 224.64 \nL 163.348916 174.939429 \nL 155.990096 174.939429 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_14\">\n <path d=\"M 163.348916 224.64 \nL 170.707737 224.64 \nL 170.707737 177.700571 \nL 163.348916 177.700571 \nz\n\" clip-path=\"url(#pb6b4546f6a)\" style=\"fill: #1f77b4; opacity: 0.4\"/>\n </g>\n <g id=\"patch_15\">\n <path d=\"M 170.707737 224.64 \nL 178.066557 224.64 \nL 178.066557 162
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(np.log1p(train_data.SalePrice), fit=norm)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAyjElEQVR4nO3debyWc/7H8dentJcpFUNaTCXKmmMdS5R9yTqkbCHKmBgiZaQSEWMwSGiiblnGMsnWQsLg50RSSYWiMkSShNT5/P64rrvuTvd2Tufeznk/H4/zOPd13dfy6VT351zfz3cxd0dERKS0arkOQERE8pMShIiIxKUEISIicSlBiIhIXEoQIiISlxKEiIjEpQQhVZ6ZuZm1Kee5i8ysS4L3DjGzT+Ida2YDzOyh8kVcpvg6mdmSTN9HKiclCClI4Yftz2a22sy+NrMxZlY/13HFcvc33L1dgvdudveLAMysVZiktirPfczsfDNbH/4sVpnZTDM7oRzXGWNmN5UnBqmclCCkkJ3o7vWBjkARcH3pA8r7oVuA3g5/Fg2Bh4EnzaxRbkOSQqcEIQXP3ZcCLwG7wYYmo8vMbAGwINx3sZktNLMVZjbBzHYodZnjzOwzM/vWzEaYWbXwvNZm9qqZfRe+FzGzhqXO3dfM5prZ92b2LzOrHZ6bsHnHzG40s3Hh5vTw+8rwKeCwMM7dY47f1szWmFnTFD+LEmA0UAdoHee+u5rZNDNbaWZzzOykcH8voDtwTRjD88nuI1WDEoQUPDNrDhwHfBCz+2Rgf6C9mR0B3AL8CdgeWAw8XuoypxA8hXQEugI9o5cPz90B2BVoDtxY6tzuwNEEH8g7E+dJJoVDw+8N3b2+u78extcj5phuwFR3X57sQuET00XAasLkGPNeDeB5YBKwLXA5EDGzdu4+CogAt4UxnFjGP4NUQkoQUsieM7OVwJvA68DNMe/d4u4r3P1ngg/w0e7+vrv/ClwHHGhmrWKOvzU8/gvgHwQfyLj7Qnef7O6/hh/OfwcOKxXHP939S3dfAQyLnruFHgG6mZmF2+cAY5Mcf0D4s/hfeP9T3P2H0scA9YHh7r7W3V8FJlZQvFIJVZX2WamcTnb3KQne+zLm9Q7A+9ENd19tZt8BzYBFcY5fHJ6DmW0H3AUcAjQg+KXq+yT32nDulnD3d81sDdDJzL4C2gATkpzyjrsfnOKyOwBfhs1QUYsJfg4im9EThFRWsdMULwNaRjfMrB7QGFgac0zzmNctwnMgeCpxYHd335qg2cfYVKJzyxNrrEfC+50D/NvdfynjdUtbBjSP1ldCLdj4c9DUzrIJJQipCsYDF5jZXmZWi+BD/113XxRzTD8zaxTWM/oCT4T7GxC05/9gZs2AfnGuf5mZ7Whm2wADY85N13KgBPhDqf3jCGojPYBHy3jNeN4F1hAUomuYWSfgRDbWY76OE4NUYUoQUumFzVB/A54GviIoJp9V6rD/ADOAmcALBF1FAQYTFK5/CPc/E+cWjxEUfj8DPgXKNJbA3dcQ1C7eCnsXHRDu/5KgacyBN8pyzQT3WUuQEI4FvgXuA85193nhIQ8TFPVXmtlzW3o/KXymBYNE8peZjQaWuXtZe0aJbDEVqUXyVNjL6lRg7xyHIlWUmphE8pCZDQVmAyPc/fNcxyNVk5qYREQkLj1BiIhIXJWqBtGkSRNv1apVrsMQESkYM2bM+Nbd487xVakSRKtWrSguLs51GCIiBcPMFid6T01MIiISlxKEiIjEpQQhIiJxKUGIiEhcShAiIhKXEoSISIGKRKBVK6hWLfgeiVTs9ZUgRETyRFk+8CMR6NULFi8G9+B7r14VmyQyliDMbLSZfWNms2P2DTWzWWY208wmxVk4Pnrc+vCYmWaWbBUtEZFKoawf+AMHwpo1m+5bsybYX1EyNheTmR1KsNDKo+6+W7hva3dfFb7+C9De3S+Nc+5qd69f1nsWFRW5BsqJSCFq1SpICqW1bAmLFm2+v1q1IJGUZgYlJZvvT8TMZrh7Ubz3MvYE4e7TgRWl9q2K2ayHljgUEQHgiy/Ktr9Fi7LtL4+s1yDMbJiZfQl0B25IcFhtMys2s3fM7OQU1+sVHlu8fPnyig5XRCQryvqBP2wY1K276b66dYP9FSXrCcLdB7p7cyAC/DnBYS3DR56zgX+YWesk1xvl7kXuXtS0adz5pkRE8l5ZP/C7d4dRo4ImKLPg+6hRwf6KksteTBHgtHhvuPvS8PtnwDS0opaIVHLl+cDv3j2oT5SUBN8rMjlAlmdzNbO27r4g3OwKzItzTCNgjbv/amZNgD8Ct2UxTBGRnOjeveI/5LdExhKEmY0HOgFNzGwJMAg4zszaASXAYuDS8Ngi4FJ3vwjYFXjAzEoInnCGu/vcTMUpIiLxVaolR9XNVUSkbHLSzVVERAqbEoSISJZleg6lilKplhwVEcl30Sk1otNkRKfUgPwqUIOeIEREMqr000LfvhU8h1JJCXz22RZGGZ8ShIhIhsSbgO+77+Ifm2hKjaRefRX22Qc6dYKff96SUONSghARyZB4M64mUqY5lD7+GE44ATp3hu+/h1tvhVq1yhVjMkoQIiIVKLZJKd7srPGkPYfS119D796w++7wxhtBYpg3D7p1C25YwVSkFhGpIKUL0Ik0bgz16wfNSi1aBMkhaYH655/hzjth+PDgde/eMGgQNGlSofGXpgQhIlJB0mlSqlsX7rorzR5LJSVB1hkwAJYsga5dg6eGdu0qJN5U1MQkIlJOsc1JTZokb1Iq84yr06bBvvvCuefCdtsF2889l7XkAHqCEBEpl9LNSYl6J0HiVeHimjcPrrkGnn8emjeHceMyVmNIRU8QIiJlFInAeeel10Mp7QL08uVw2WWw227B08Itt8AnnwSPGzlIDqAnCBGRMok+Oaxfn97xKZuUfv45KErcfHOQcS65JChAb7tthcS7JZQgRETKoCxjG1q2TJIcSkpg/PigAP3FF3DiiXDbbbDLLhUW65ZSE5OISBmkO+I5adPS9Omw//7Qo0dQ3X71VZgwIa+SAyhBiIiUSaIRz2bB+IakvZXmz4dTToHDDoP//Q8efRTeew8OPzzjcZeHmphERNIUicDq1Zvvr1s3Ra3h229h8GAYORJq1w4eLa64IjgxjylBiIikIdEo6caNkwx8++UXuPvuICGsXh1c4MYbg3ENBUAJQkQkhWi31ng9l+rXj5Mc3OHxx+G664LRc8cfHxSg27fPSrwVRTUIEZEEIpGghtyjR+JurZsVrd98Ew44AM4+Gxo1gilTYOLEgksOoAQhIrKJ6PQZZkFiSDZCGmKK1gsWwGmnwSGHwNKlMGYMzJgRTMldoNTEJCISSnc21qi6dWFE/+/giqFw773BmgxDh8Jf/5r3Beh0KEGIiITiLQeaSJ1qv/JG13vo2P8m+PFHuOiioKfS73+f2SCzKKNNTGY22sy+MbPZMfuGmtksM5tpZpPMbIcE555nZgvCr/MyGaeISJ8+qZuTAs45NZ/gm8a70HF8PzjoIJg1Cx54oFIlB8h8DWIMcEypfSPcfQ933wuYCNxQ+iQz2wYYBOwP7AcMMrNGmQ1VRKqaaBHaDO6/P/XxB/EW7211II+uPYv6228NkybBiy9Chw6ZDzYHMpog3H06sKLUvlUxm/UAj3Pq0cBkd1/h7t8Dk9k80YiIlFufPukVoQH+wKc8xem8xcEUNf0CRo+G99+HI4/MfKA5lJMahJkNA84FfgDijTFvBnwZs70k3CciUm6RSFBnSK8pCRqxgr8xlMu4F6tVEwYMhquugnr1MhtonshJN1d3H+juzYEI8OctuZaZ9TKzYjMrXr58ecUEKCKVTiQCF1yQXnKoya9cyd/5lNb0tbupedF51Ph8AdxwQ5VJDpD7cRAR4LQ4+5cCzWO2dwz3bcbdR7l7kbsXNW3aNAMhikihi0SClTt/+y3Vkc7pPMVc2vN3rmJZ8/2p9uFMePBB2H77LESaX7KeIMysbcxmV2BenMNeAY4ys0ZhcfqocJ+ISJlEp8koKUl+3AG8zVv8kaf4Ez9Rj3uOf5kOX7wMu++enUD
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<svg xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"392.14375pt\" height=\"277.314375pt\" viewBox=\"0 0 392.14375 277.314375\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">\n <metadata>\n <rdf:RDF xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <cc:Work>\n <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n <dc:date>2022-07-27T14:17:39.713631</dc:date>\n <dc:format>image/svg+xml</dc:format>\n <dc:creator>\n <cc:Agent>\n <dc:title>Matplotlib v3.5.2, https://matplotlib.org/</dc:title>\n </cc:Agent>\n </dc:creator>\n </cc:Work>\n </rdf:RDF>\n </metadata>\n <defs>\n <style type=\"text/css\">*{stroke-linejoin: round; stroke-linecap: butt}</style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 277.314375 \nL 392.14375 277.314375 \nL 392.14375 0 \nL 0 0 \nL 0 277.314375 \nz\n\" style=\"fill: none\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 50.14375 239.758125 \nL 384.94375 239.758125 \nL 384.94375 22.318125 \nL 50.14375 22.318125 \nz\n\" style=\"fill: #ffffff\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path id=\"m8a0c02aead\" d=\"M 0 0 \nL 0 3.5 \n\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n </defs>\n <g>\n <use xlink:href=\"#m8a0c02aead\" x=\"79.395718\" y=\"239.758125\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 3 -->\n <g transform=\"translate(72.024625 254.356562)scale(0.1 -0.1)\">\n <defs>\n <path id=\"DejaVuSans-2212\" d=\"M 678 2272 \nL 4684 2272 \nL 4684 1741 \nL 678 1741 \nL 678 2272 \nz\n\" transform=\"scale(0.015625)\"/>\n <path id=\"DejaVuSans-33\" d=\"M 2597 2516 \nQ 3050 2419 3304 2112 \nQ 3559 1806 3559 1356 \nQ 3559 666 3084 287 \nQ 2609 -91 1734 -91 \nQ 1441 -91 1130 -33 \nQ 819 25 488 141 \nL 488 750 \nQ 750 597 1062 519 \nQ 1375 441 1716 441 \nQ 2309 441 2620 675 \nQ 2931 909 2931 1356 \nQ 2931 1769 2642 2001 \nQ 2353 2234 1838 2234 \nL 1294 2234 \nL 1294 2753 \nL 1863 2753 \nQ 2328 2753 2575 2939 \nQ 2822 3125 2822 3475 \nQ 2822 3834 2567 4026 \nQ 2313 4219 1838 4219 \nQ 1578 4219 1281 4162 \nQ 984 4106 628 3988 \nL 628 4550 \nQ 988 4650 1302 4700 \nQ 1616 4750 1894 4750 \nQ 2613 4750 3031 4423 \nQ 3450 4097 3450 3541 \nQ 3450 3153 3228 2886 \nQ 3006 2619 2597 2516 \nz\n\" transform=\"scale(0.015625)\"/>\n </defs>\n <use xlink:href=\"#DejaVuSans-2212\"/>\n <use xlink:href=\"#DejaVuSans-33\" x=\"83.789062\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_2\">\n <g id=\"line2d_2\">\n <g>\n <use xlink:href=\"#m8a0c02aead\" x=\"125.445062\" y=\"239.758125\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n </g>\n </g>\n <g id=\"text_2\">\n <!-- 2 -->\n <g transform=\"translate(118.073968 254.356562)scale(0.1 -0.1)\">\n <defs>\n <path id=\"DejaVuSans-32\" d=\"M 1228 531 \nL 3431 531 \nL 3431 0 \nL 469 0 \nL 469 531 \nQ 828 903 1448 1529 \nQ 2069 2156 2228 2338 \nQ 2531 2678 2651 2914 \nQ 2772 3150 2772 3378 \nQ 2772 3750 2511 3984 \nQ 2250 4219 1831 4219 \nQ 1534 4219 1204 4116 \nQ 875 4013 500 3803 \nL 500 4441 \nQ 881 4594 1212 4672 \nQ 1544 4750 1819 4750 \nQ 2544 4750 2975 4387 \nQ 3406 4025 3406 3419 \nQ 3406 3131 3298 2873 \nQ 3191 2616 2906 2266 \nQ 2828 2175 2409 1742 \nQ 1991 1309 1228 531 \nz\n\" transform=\"scale(0.015625)\"/>\n </defs>\n <use xlink:href=\"#DejaVuSans-2212\"/>\n <use xlink:href=\"#DejaVuSans-32\" x=\"83.789062\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_3\">\n <g id=\"line2d_3\">\n <g>\n <use xlink:href=\"#m8a0c02aead\" x=\"171.494406\" y=\"239.758125\" style=\"stroke: #0
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"rest = stats.probplot(np.log1p(train_data.SalePrice), plot=plt)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0, 76)"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data[all_data['GarageYrBlt'].isna()].shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"na_index = all_data[all_data['GarageYrBlt'] > 2022].index\n",
"all_data.loc[na_index, 'GarageYrBlt'] = None"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(160, 76)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data[all_data['GarageYrBlt'].isna()].shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2917, 76)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True)\n",
"year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']\n",
"for col in year_cols:\n",
" all_data[col] = 2022 - all_data[col]\n",
"all_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2917, 76)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols1 = [\"GarageQual\", \"GarageCond\", \"GarageFinish\", \"GarageType\", \"BsmtExposure\", \"BsmtCond\", \"BsmtQual\", \"BsmtFinType2\", \"BsmtFinType1\", \"MasVnrType\"]\n",
"for col in cols1:\n",
" all_data[col].fillna(\"None\",inplace=True)\n",
"all_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2917, 76)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols2=[\"MasVnrArea\", \"BsmtUnfSF\", \"TotalBsmtSF\", \"GarageCars\", \"BsmtFinSF2\", \"BsmtFinSF1\", \"GarageArea\"]\n",
"for col in cols2:\n",
" all_data[col].fillna(0, inplace=True)\n",
"all_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2917, 76)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data[\"LotFrontage\"].fillna(np.mean(all_data[\"LotFrontage\"]),inplace=True)\n",
"cols3 = [\"MSZoning\", \"BsmtFullBath\", \"BsmtHalfBath\", \"Utilities\", \"Functional\", \"Electrical\", \"KitchenQual\", \"SaleType\",\"Exterior1st\", \"Exterior2nd\"]\n",
"for col in cols3:\n",
" all_data[col].fillna(all_data[col].mode()[0], inplace=True)\n",
"all_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if x != 'Id' and x != 'SalePrice']\n",
"object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"for col in numeric_cols:\n",
" all_data[col] = np.log1p(all_data[col])\n",
" all_data[col] = (all_data[col] - all_data[col].min()) / (all_data[col].max() - all_data[col].min())"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.get_dummies(all_data, columns=object_cols)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"dataset.SalePrice = np.log1p(dataset.SalePrice)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1458, 280)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train = dataset[~dataset.SalePrice.isna()].copy()\n",
"train.shape"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1459, 280)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test = dataset[dataset.SalePrice.isna()].copy()\n",
"test.shape"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"feature_cols = [x for x in dataset.columns if x != 'Id' and x != 'SalePrice']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"train, valid = train_test_split(train, test_size=0.12, shuffle=True, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"X_train, Y_train = train[feature_cols], train['SalePrice']\n",
"X_valid, Y_valid = valid[feature_cols], valid['SalePrice']\n",
"X_test, Y_test = test[feature_cols], test['SalePrice']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"dtrain = xgb.DMatrix(X_train, Y_train)\n",
"dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"params = {'objective': 'reg:squarederror', \n",
" 'booster': 'gbtree', \n",
" 'eta': 0.05,\n",
" 'max_depth': 15, \n",
" 'subsample': 0.7, \n",
" 'colsample_bytree': 0.7,\n",
" 'eval_metric':['rmse'],\n",
" 'silent': 1, \n",
" 'seed': 10} \n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[10:33:47] WARNING: ../src/learner.cc:627: \n",
"Parameters: { \"silent\" } might not be used.\n",
"\n",
" This could be a false alarm, with some parameters getting used by language bindings but\n",
" then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
" but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
"\n",
"\n",
"[0]\ttrain-rmse:10.95491\teval-rmse:10.96235\n",
"[1]\ttrain-rmse:10.40916\teval-rmse:10.41661\n",
"[2]\ttrain-rmse:9.89034\teval-rmse:9.89780\n",
"[3]\ttrain-rmse:9.39722\teval-rmse:9.40469\n",
"[4]\ttrain-rmse:8.92885\teval-rmse:8.93633\n",
"[5]\ttrain-rmse:8.48375\teval-rmse:8.49124\n",
"[6]\ttrain-rmse:8.06123\teval-rmse:8.06873\n",
"[7]\ttrain-rmse:7.66021\teval-rmse:7.66773\n",
"[8]\ttrain-rmse:7.27851\teval-rmse:7.28504\n",
"[9]\ttrain-rmse:6.91608\teval-rmse:6.92262\n",
"[10]\ttrain-rmse:6.57212\teval-rmse:6.57776\n",
"[11]\ttrain-rmse:6.24453\teval-rmse:6.24978\n",
"[12]\ttrain-rmse:5.93355\teval-rmse:5.93791\n",
"[13]\ttrain-rmse:5.63820\teval-rmse:5.64171\n",
"[14]\ttrain-rmse:5.35791\teval-rmse:5.36060\n",
"[15]\ttrain-rmse:5.09149\teval-rmse:5.09384\n",
"[16]\ttrain-rmse:4.83796\teval-rmse:4.84034\n",
"[17]\ttrain-rmse:4.59742\teval-rmse:4.59968\n",
"[18]\ttrain-rmse:4.36846\teval-rmse:4.37007\n",
"[19]\ttrain-rmse:4.15155\teval-rmse:4.15304\n",
"[20]\ttrain-rmse:3.94554\teval-rmse:3.94632\n",
"[21]\ttrain-rmse:3.74977\teval-rmse:3.74999\n",
"[22]\ttrain-rmse:3.56360\teval-rmse:3.56321\n",
"[23]\ttrain-rmse:3.38712\teval-rmse:3.38680\n",
"[24]\ttrain-rmse:3.21874\teval-rmse:3.21847\n",
"[25]\ttrain-rmse:3.05978\teval-rmse:3.05902\n",
"[26]\ttrain-rmse:2.90867\teval-rmse:2.90743\n",
"[27]\ttrain-rmse:2.76500\teval-rmse:2.76388\n",
"[28]\ttrain-rmse:2.62812\teval-rmse:2.62685\n",
"[29]\ttrain-rmse:2.49820\teval-rmse:2.49628\n",
"[30]\ttrain-rmse:2.37453\teval-rmse:2.37196\n",
"[31]\ttrain-rmse:2.25692\teval-rmse:2.25370\n",
"[32]\ttrain-rmse:2.14536\teval-rmse:2.14147\n",
"[33]\ttrain-rmse:2.03937\teval-rmse:2.03521\n",
"[34]\ttrain-rmse:1.93883\teval-rmse:1.93448\n",
"[35]\ttrain-rmse:1.84381\teval-rmse:1.83979\n",
"[36]\ttrain-rmse:1.75285\teval-rmse:1.74887\n",
"[37]\ttrain-rmse:1.66676\teval-rmse:1.66205\n",
"[38]\ttrain-rmse:1.58492\teval-rmse:1.57965\n",
"[39]\ttrain-rmse:1.50715\teval-rmse:1.50159\n",
"[40]\ttrain-rmse:1.43321\teval-rmse:1.42713\n",
"[41]\ttrain-rmse:1.36283\teval-rmse:1.35596\n",
"[42]\ttrain-rmse:1.29620\teval-rmse:1.28879\n",
"[43]\ttrain-rmse:1.23316\teval-rmse:1.22663\n",
"[44]\ttrain-rmse:1.17272\teval-rmse:1.16596\n",
"[45]\ttrain-rmse:1.11549\teval-rmse:1.10860\n",
"[46]\ttrain-rmse:1.06120\teval-rmse:1.05444\n",
"[47]\ttrain-rmse:1.00958\teval-rmse:1.00254\n",
"[48]\ttrain-rmse:0.96067\teval-rmse:0.95273\n",
"[49]\ttrain-rmse:0.91434\teval-rmse:0.90591\n",
"[50]\ttrain-rmse:0.87015\teval-rmse:0.86133\n",
"[51]\ttrain-rmse:0.82834\teval-rmse:0.81927\n",
"[52]\ttrain-rmse:0.78870\teval-rmse:0.77968\n",
"[53]\ttrain-rmse:0.75082\teval-rmse:0.74161\n",
"[54]\ttrain-rmse:0.71492\teval-rmse:0.70547\n",
"[55]\ttrain-rmse:0.68106\teval-rmse:0.67230\n",
"[56]\ttrain-rmse:0.64849\teval-rmse:0.63960\n",
"[57]\ttrain-rmse:0.61769\teval-rmse:0.60831\n",
"[58]\ttrain-rmse:0.58868\teval-rmse:0.57939\n",
"[59]\ttrain-rmse:0.56057\teval-rmse:0.55152\n",
"[60]\ttrain-rmse:0.53451\teval-rmse:0.52523\n",
"[61]\ttrain-rmse:0.50950\teval-rmse:0.50035\n",
"[62]\ttrain-rmse:0.48564\teval-rmse:0.47651\n",
"[63]\ttrain-rmse:0.46293\teval-rmse:0.45377\n",
"[64]\ttrain-rmse:0.44159\teval-rmse:0.43343\n",
"[65]\ttrain-rmse:0.42131\teval-rmse:0.41326\n",
"[66]\ttrain-rmse:0.40179\teval-rmse:0.39410\n",
"[67]\ttrain-rmse:0.38364\teval-rmse:0.37700\n",
"[68]\ttrain-rmse:0.36614\teval-rmse:0.36009\n",
"[69]\ttrain-rmse:0.34965\teval-rmse:0.34418\n",
"[70]\ttrain-rmse:0.33389\teval-rmse:0.32955\n",
"[71]\ttrain-rmse:0.31898\teval-rmse:0.31511\n",
"[72]\ttrain-rmse:0.30487\teval-rmse:0.30192\n",
"[73]\ttrain-rmse:0.29146\teval-rmse:0.28948\n",
"[74]\ttrain-rmse:0.27854\teval-rmse:0.27745\n",
"[75]\ttrain-rmse:0.26624\teval-rmse:0.26603\n",
"[76]\ttrain-rmse:0.25467\teval-rmse:0.25535\n",
"[77]\ttrain-rmse:0.24384\teval-rmse:0.24510\n",
"[78]\ttrain-rmse:0.23341\teval-rmse:0.23538\n",
"[79]\ttrain-rmse:0.22357\teval-rmse:0.22674\n",
"[80]\ttrain-rmse:0.21429\teval-rmse:0.21868\n",
"[81]\ttrain-rmse:0.20526\teval-rmse:0.21073\n",
"[82]\ttrain-rmse:0.19662\teval-rmse:0.20326\n",
"[83]\ttrain-rmse:0.18837\teval-rmse:0.19614\n",
"[84]\ttrain-rmse:0.18054\teval-rmse:0.18948\n",
"[85]\ttrain-rmse:0.17345\teval-rmse:0.18387\n",
"[86]\ttrain-rmse:0.16646\teval-rmse:0.17787\n",
"[87]\ttrain-rmse:0.15977\teval-rmse:0.17240\n",
"[88]\ttrain-rmse:0.15350\teval-rmse:0.16762\n",
"[89]\ttrain-rmse:0.14754\teval-rmse:0.16333\n",
"[90]\ttrain-rmse:0.14182\teval-rmse:0.15882\n",
"[91]\ttrain-rmse:0.13632\teval-rmse:0.15475\n",
"[92]\ttrain-rmse:0.13127\teval-rmse:0.15126\n",
"[93]\ttrain-rmse:0.12620\teval-rmse:0.14789\n",
"[94]\ttrain-rmse:0.12159\teval-rmse:0.14519\n",
"[95]\ttrain-rmse:0.11702\teval-rmse:0.14218\n",
"[96]\ttrain-rmse:0.11266\teval-rmse:0.13953\n",
"[97]\ttrain-rmse:0.10853\teval-rmse:0.13714\n",
"[98]\ttrain-rmse:0.10450\teval-rmse:0.13514\n",
"[99]\ttrain-rmse:0.10078\teval-rmse:0.13347\n",
"[100]\ttrain-rmse:0.09716\teval-rmse:0.13144\n",
"[101]\ttrain-rmse:0.09377\teval-rmse:0.12970\n",
"[102]\ttrain-rmse:0.09061\teval-rmse:0.12809\n",
"[103]\ttrain-rmse:0.08744\teval-rmse:0.12667\n",
"[104]\ttrain-rmse:0.08450\teval-rmse:0.12523\n",
"[105]\ttrain-rmse:0.08152\teval-rmse:0.12383\n",
"[106]\ttrain-rmse:0.07869\teval-rmse:0.12271\n",
"[107]\ttrain-rmse:0.07611\teval-rmse:0.12161\n",
"[108]\ttrain-rmse:0.07358\teval-rmse:0.12084\n",
"[109]\ttrain-rmse:0.07116\teval-rmse:0.11998\n",
"[110]\ttrain-rmse:0.06895\teval-rmse:0.11904\n",
"[111]\ttrain-rmse:0.06676\teval-rmse:0.11830\n",
"[112]\ttrain-rmse:0.06457\teval-rmse:0.11761\n",
"[113]\ttrain-rmse:0.06251\teval-rmse:0.11679\n",
"[114]\ttrain-rmse:0.06071\teval-rmse:0.11642\n",
"[115]\ttrain-rmse:0.05873\teval-rmse:0.11584\n",
"[116]\ttrain-rmse:0.05691\teval-rmse:0.11509\n",
"[117]\ttrain-rmse:0.05539\teval-rmse:0.11460\n",
"[118]\ttrain-rmse:0.05374\teval-rmse:0.11408\n",
"[119]\ttrain-rmse:0.05229\teval-rmse:0.11369\n",
"[120]\ttrain-rmse:0.05087\teval-rmse:0.11348\n",
"[121]\ttrain-rmse:0.04938\teval-rmse:0.11326\n",
"[122]\ttrain-rmse:0.04790\teval-rmse:0.11283\n",
"[123]\ttrain-rmse:0.04652\teval-rmse:0.11271\n",
"[124]\ttrain-rmse:0.04506\teval-rmse:0.11234\n",
"[125]\ttrain-rmse:0.04385\teval-rmse:0.11213\n",
"[126]\ttrain-rmse:0.04264\teval-rmse:0.11208\n",
"[127]\ttrain-rmse:0.04140\teval-rmse:0.11193\n",
"[128]\ttrain-rmse:0.04036\teval-rmse:0.11187\n",
"[129]\ttrain-rmse:0.03931\teval-rmse:0.11160\n",
"[130]\ttrain-rmse:0.03824\teval-rmse:0.11150\n",
"[131]\ttrain-rmse:0.03722\teval-rmse:0.11131\n",
"[132]\ttrain-rmse:0.03628\teval-rmse:0.11130\n",
"[133]\ttrain-rmse:0.03530\teval-rmse:0.11123\n",
"[134]\ttrain-rmse:0.03441\teval-rmse:0.11112\n",
"[135]\ttrain-rmse:0.03345\teval-rmse:0.11104\n",
"[136]\ttrain-rmse:0.03262\teval-rmse:0.11096\n",
"[137]\ttrain-rmse:0.03188\teval-rmse:0.11098\n",
"[138]\ttrain-rmse:0.03105\teval-rmse:0.11097\n",
"[139]\ttrain-rmse:0.03025\teval-rmse:0.11102\n",
"[140]\ttrain-rmse:0.02952\teval-rmse:0.11110\n",
"[141]\ttrain-rmse:0.02890\teval-rmse:0.11103\n",
"[142]\ttrain-rmse:0.02824\teval-rmse:0.11104\n",
"[143]\ttrain-rmse:0.02761\teval-rmse:0.11102\n",
"[144]\ttrain-rmse:0.02702\teval-rmse:0.11100\n",
"[145]\ttrain-rmse:0.02634\teval-rmse:0.11108\n",
"[146]\ttrain-rmse:0.02584\teval-rmse:0.11106\n",
"[147]\ttrain-rmse:0.02540\teval-rmse:0.11111\n",
"[148]\ttrain-rmse:0.02489\teval-rmse:0.11130\n",
"[149]\ttrain-rmse:0.02439\teval-rmse:0.11131\n",
"[150]\ttrain-rmse:0.02382\teval-rmse:0.11130\n",
"[151]\ttrain-rmse:0.02333\teval-rmse:0.11134\n",
"[152]\ttrain-rmse:0.02277\teval-rmse:0.11133\n",
"[153]\ttrain-rmse:0.02238\teval-rmse:0.11135\n",
"[154]\ttrain-rmse:0.02189\teval-rmse:0.11143\n",
"[155]\ttrain-rmse:0.02146\teval-rmse:0.11156\n",
"[156]\ttrain-rmse:0.02101\teval-rmse:0.11152\n",
"[157]\ttrain-rmse:0.02058\teval-rmse:0.11150\n",
"[158]\ttrain-rmse:0.02017\teval-rmse:0.11143\n",
"[159]\ttrain-rmse:0.01975\teval-rmse:0.11141\n",
"[160]\ttrain-rmse:0.01932\teval-rmse:0.11136\n",
"[161]\ttrain-rmse:0.01901\teval-rmse:0.11136\n",
"[162]\ttrain-rmse:0.01860\teval-rmse:0.11142\n",
"[163]\ttrain-rmse:0.01820\teval-rmse:0.11150\n",
"[164]\ttrain-rmse:0.01792\teval-rmse:0.11156\n",
"[165]\ttrain-rmse:0.01758\teval-rmse:0.11161\n",
"[166]\ttrain-rmse:0.01725\teval-rmse:0.11173\n",
"[167]\ttrain-rmse:0.01694\teval-rmse:0.11173\n",
"[168]\ttrain-rmse:0.01661\teval-rmse:0.11172\n",
"[169]\ttrain-rmse:0.01629\teval-rmse:0.11181\n",
"[170]\ttrain-rmse:0.01602\teval-rmse:0.11185\n",
"[171]\ttrain-rmse:0.01574\teval-rmse:0.11181\n",
"[172]\ttrain-rmse:0.01544\teval-rmse:0.11183\n",
"[173]\ttrain-rmse:0.01520\teval-rmse:0.11179\n",
"[174]\ttrain-rmse:0.01489\teval-rmse:0.11181\n",
"[175]\ttrain-rmse:0.01463\teval-rmse:0.11181\n",
"[176]\ttrain-rmse:0.01435\teval-rmse:0.11179\n",
"[177]\ttrain-rmse:0.01409\teval-rmse:0.11177\n",
"[178]\ttrain-rmse:0.01373\teval-rmse:0.11180\n",
"[179]\ttrain-rmse:0.01350\teval-rmse:0.11181\n",
"[180]\ttrain-rmse:0.01327\teval-rmse:0.11180\n",
"[181]\ttrain-rmse:0.01304\teval-rmse:0.11185\n",
"[182]\ttrain-rmse:0.01279\teval-rmse:0.11187\n",
"[183]\ttrain-rmse:0.01256\teval-rmse:0.11186\n",
"[184]\ttrain-rmse:0.01232\teval-rmse:0.11188\n",
"[185]\ttrain-rmse:0.01211\teval-rmse:0.11191\n",
"[186]\ttrain-rmse:0.01186\teval-rmse:0.11187\n",
"[187]\ttrain-rmse:0.01172\teval-rmse:0.11188\n",
"[188]\ttrain-rmse:0.01150\teval-rmse:0.11201\n",
"[189]\ttrain-rmse:0.01133\teval-rmse:0.11203\n",
"[190]\ttrain-rmse:0.01110\teval-rmse:0.11207\n",
"[191]\ttrain-rmse:0.01092\teval-rmse:0.11210\n",
"[192]\ttrain-rmse:0.01075\teval-rmse:0.11209\n",
"[193]\ttrain-rmse:0.01057\teval-rmse:0.11205\n",
"[194]\ttrain-rmse:0.01042\teval-rmse:0.11211\n",
"[195]\ttrain-rmse:0.01025\teval-rmse:0.11215\n",
"[196]\ttrain-rmse:0.01008\teval-rmse:0.11213\n",
"[197]\ttrain-rmse:0.00993\teval-rmse:0.11216\n",
"[198]\ttrain-rmse:0.00973\teval-rmse:0.11215\n",
"[199]\ttrain-rmse:0.00959\teval-rmse:0.11218\n",
"[200]\ttrain-rmse:0.00946\teval-rmse:0.11218\n",
"[201]\ttrain-rmse:0.00929\teval-rmse:0.11218\n",
"[202]\ttrain-rmse:0.00911\teval-rmse:0.11218\n",
"[203]\ttrain-rmse:0.00896\teval-rmse:0.11220\n",
"[204]\ttrain-rmse:0.00884\teval-rmse:0.11217\n",
"[205]\ttrain-rmse:0.00872\teval-rmse:0.11216\n",
"[206]\ttrain-rmse:0.00861\teval-rmse:0.11219\n",
"[207]\ttrain-rmse:0.00844\teval-rmse:0.11218\n",
"[208]\ttrain-rmse:0.00830\teval-rmse:0.11227\n",
"[209]\ttrain-rmse:0.00819\teval-rmse:0.11229\n",
"[210]\ttrain-rmse:0.00809\teval-rmse:0.11230\n",
"[211]\ttrain-rmse:0.00800\teval-rmse:0.11231\n",
"[212]\ttrain-rmse:0.00783\teval-rmse:0.11234\n",
"[213]\ttrain-rmse:0.00772\teval-rmse:0.11234\n",
"[214]\ttrain-rmse:0.00762\teval-rmse:0.11232\n",
"[215]\ttrain-rmse:0.00747\teval-rmse:0.11235\n",
"[216]\ttrain-rmse:0.00734\teval-rmse:0.11236\n",
"[217]\ttrain-rmse:0.00723\teval-rmse:0.11240\n",
"[218]\ttrain-rmse:0.00709\teval-rmse:0.11241\n",
"[219]\ttrain-rmse:0.00697\teval-rmse:0.11240\n",
"[220]\ttrain-rmse:0.00687\teval-rmse:0.11242\n",
"[221]\ttrain-rmse:0.00680\teval-rmse:0.11245\n",
"[222]\ttrain-rmse:0.00667\teval-rmse:0.11250\n",
"[223]\ttrain-rmse:0.00658\teval-rmse:0.11254\n",
"[224]\ttrain-rmse:0.00647\teval-rmse:0.11255\n",
"[225]\ttrain-rmse:0.00639\teval-rmse:0.11258\n",
"[226]\ttrain-rmse:0.00627\teval-rmse:0.11257\n",
"[227]\ttrain-rmse:0.00616\teval-rmse:0.11256\n",
"[228]\ttrain-rmse:0.00605\teval-rmse:0.11257\n",
"[229]\ttrain-rmse:0.00595\teval-rmse:0.11261\n",
"[230]\ttrain-rmse:0.00583\teval-rmse:0.11262\n",
"[231]\ttrain-rmse:0.00577\teval-rmse:0.11264\n",
"[232]\ttrain-rmse:0.00566\teval-rmse:0.11263\n",
"[233]\ttrain-rmse:0.00558\teval-rmse:0.11263\n",
"[234]\ttrain-rmse:0.00552\teval-rmse:0.11264\n",
"[235]\ttrain-rmse:0.00543\teval-rmse:0.11264\n",
"[236]\ttrain-rmse:0.00536\teval-rmse:0.11265\n",
"[237]\ttrain-rmse:0.00530\teval-rmse:0.11266\n",
"[238]\ttrain-rmse:0.00524\teval-rmse:0.11267\n",
"[239]\ttrain-rmse:0.00513\teval-rmse:0.11265\n",
"[240]\ttrain-rmse:0.00505\teval-rmse:0.11265\n",
"[241]\ttrain-rmse:0.00497\teval-rmse:0.11265\n",
"[242]\ttrain-rmse:0.00488\teval-rmse:0.11264\n",
"[243]\ttrain-rmse:0.00481\teval-rmse:0.11265\n",
"[244]\ttrain-rmse:0.00472\teval-rmse:0.11266\n",
"[245]\ttrain-rmse:0.00465\teval-rmse:0.11267\n",
"[246]\ttrain-rmse:0.00461\teval-rmse:0.11266\n",
"[247]\ttrain-rmse:0.00453\teval-rmse:0.11265\n",
"[248]\ttrain-rmse:0.00445\teval-rmse:0.11265\n",
"[249]\ttrain-rmse:0.00439\teval-rmse:0.11266\n",
"[250]\ttrain-rmse:0.00431\teval-rmse:0.11266\n",
"[251]\ttrain-rmse:0.00425\teval-rmse:0.11267\n",
"[252]\ttrain-rmse:0.00417\teval-rmse:0.11268\n",
"[253]\ttrain-rmse:0.00411\teval-rmse:0.11269\n",
"[254]\ttrain-rmse:0.00404\teval-rmse:0.11268\n",
"[255]\ttrain-rmse:0.00399\teval-rmse:0.11269\n",
"[256]\ttrain-rmse:0.00391\teval-rmse:0.11270\n",
"[257]\ttrain-rmse:0.00385\teval-rmse:0.11270\n",
"[258]\ttrain-rmse:0.00379\teval-rmse:0.11272\n",
"[259]\ttrain-rmse:0.00372\teval-rmse:0.11272\n",
"[260]\ttrain-rmse:0.00367\teval-rmse:0.11271\n",
"[261]\ttrain-rmse:0.00360\teval-rmse:0.11271\n",
"[262]\ttrain-rmse:0.00355\teval-rmse:0.11272\n",
"[263]\ttrain-rmse:0.00349\teval-rmse:0.11272\n",
"[264]\ttrain-rmse:0.00342\teval-rmse:0.11273\n",
"[265]\ttrain-rmse:0.00337\teval-rmse:0.11272\n",
"[266]\ttrain-rmse:0.00333\teval-rmse:0.11272\n",
"[267]\ttrain-rmse:0.00328\teval-rmse:0.11273\n",
"[268]\ttrain-rmse:0.00324\teval-rmse:0.11274\n",
"[269]\ttrain-rmse:0.00319\teval-rmse:0.11272\n",
"[270]\ttrain-rmse:0.00313\teval-rmse:0.11272\n",
"[271]\ttrain-rmse:0.00308\teval-rmse:0.11272\n",
"[272]\ttrain-rmse:0.00303\teval-rmse:0.11273\n",
"[273]\ttrain-rmse:0.00300\teval-rmse:0.11273\n",
"[274]\ttrain-rmse:0.00297\teval-rmse:0.11273\n",
"[275]\ttrain-rmse:0.00293\teval-rmse:0.11273\n",
"[276]\ttrain-rmse:0.00288\teval-rmse:0.11273\n",
"[277]\ttrain-rmse:0.00283\teval-rmse:0.11274\n",
"[278]\ttrain-rmse:0.00278\teval-rmse:0.11273\n",
"[279]\ttrain-rmse:0.00273\teval-rmse:0.11274\n",
"[280]\ttrain-rmse:0.00268\teval-rmse:0.11273\n",
"[281]\ttrain-rmse:0.00264\teval-rmse:0.11274\n",
"[282]\ttrain-rmse:0.00259\teval-rmse:0.11273\n",
"[283]\ttrain-rmse:0.00255\teval-rmse:0.11273\n",
"[284]\ttrain-rmse:0.00251\teval-rmse:0.11273\n",
"[285]\ttrain-rmse:0.00248\teval-rmse:0.11272\n",
"[286]\ttrain-rmse:0.00243\teval-rmse:0.11272\n",
"[287]\ttrain-rmse:0.00240\teval-rmse:0.11272\n",
"[288]\ttrain-rmse:0.00236\teval-rmse:0.11272\n",
"[289]\ttrain-rmse:0.00233\teval-rmse:0.11272\n",
"[290]\ttrain-rmse:0.00230\teval-rmse:0.11272\n",
"[291]\ttrain-rmse:0.00228\teval-rmse:0.11272\n",
"[292]\ttrain-rmse:0.00224\teval-rmse:0.11271\n",
"[293]\ttrain-rmse:0.00220\teval-rmse:0.11271\n",
"[294]\ttrain-rmse:0.00217\teval-rmse:0.11271\n",
"[295]\ttrain-rmse:0.00214\teval-rmse:0.11271\n",
"[296]\ttrain-rmse:0.00211\teval-rmse:0.11271\n",
"[297]\ttrain-rmse:0.00208\teval-rmse:0.11271\n",
"[298]\ttrain-rmse:0.00205\teval-rmse:0.11270\n",
"[299]\ttrain-rmse:0.00202\teval-rmse:0.11270\n",
"[300]\ttrain-rmse:0.00199\teval-rmse:0.11270\n",
"[301]\ttrain-rmse:0.00196\teval-rmse:0.11271\n",
"[302]\ttrain-rmse:0.00192\teval-rmse:0.11271\n",
"[303]\ttrain-rmse:0.00190\teval-rmse:0.11271\n",
"[304]\ttrain-rmse:0.00189\teval-rmse:0.11271\n",
"[305]\ttrain-rmse:0.00185\teval-rmse:0.11272\n",
"[306]\ttrain-rmse:0.00182\teval-rmse:0.11272\n",
"[307]\ttrain-rmse:0.00179\teval-rmse:0.11273\n",
"[308]\ttrain-rmse:0.00176\teval-rmse:0.11273\n",
"[309]\ttrain-rmse:0.00175\teval-rmse:0.11273\n",
"[310]\ttrain-rmse:0.00173\teval-rmse:0.11273\n",
"[311]\ttrain-rmse:0.00170\teval-rmse:0.11274\n",
"[312]\ttrain-rmse:0.00168\teval-rmse:0.11274\n",
"[313]\ttrain-rmse:0.00165\teval-rmse:0.11274\n",
"[314]\ttrain-rmse:0.00163\teval-rmse:0.11274\n",
"[315]\ttrain-rmse:0.00160\teval-rmse:0.11275\n",
"[316]\ttrain-rmse:0.00158\teval-rmse:0.11275\n",
"[317]\ttrain-rmse:0.00155\teval-rmse:0.11275\n",
"[318]\ttrain-rmse:0.00154\teval-rmse:0.11275\n",
"[319]\ttrain-rmse:0.00152\teval-rmse:0.11275\n",
"[320]\ttrain-rmse:0.00150\teval-rmse:0.11275\n",
"[321]\ttrain-rmse:0.00148\teval-rmse:0.11275\n",
"[322]\ttrain-rmse:0.00145\teval-rmse:0.11276\n",
"[323]\ttrain-rmse:0.00143\teval-rmse:0.11275\n",
"[324]\ttrain-rmse:0.00141\teval-rmse:0.11275\n",
"[325]\ttrain-rmse:0.00138\teval-rmse:0.11275\n",
"[326]\ttrain-rmse:0.00136\teval-rmse:0.11276\n",
"[327]\ttrain-rmse:0.00134\teval-rmse:0.11275\n",
"[328]\ttrain-rmse:0.00132\teval-rmse:0.11276\n",
"[329]\ttrain-rmse:0.00130\teval-rmse:0.11276\n",
"[330]\ttrain-rmse:0.00128\teval-rmse:0.11276\n",
"[331]\ttrain-rmse:0.00127\teval-rmse:0.11275\n",
"[332]\ttrain-rmse:0.00125\teval-rmse:0.11275\n",
"[333]\ttrain-rmse:0.00123\teval-rmse:0.11275\n",
"[334]\ttrain-rmse:0.00121\teval-rmse:0.11275\n",
"[335]\ttrain-rmse:0.00119\teval-rmse:0.11276\n"
]
}
],
"source": [
"gbm = xgb.train(params, dtrain, evals=watchlist, num_boost_round=5000,\n",
" early_stopping_rounds=200, verbose_eval=True)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"x_pred = gbm.predict(xgb.DMatrix(X_test))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"test['SalePrice'] = np.expm1(x_pred)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"test[['Id', 'SalePrice']].to_csv('house_pred2.csv', index=False, encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"gbm.save_model('./pretrain_models/house_price_eta0.05_round280.json')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"gg = xgb.XGBRegressor()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/xgboost/sklearn.py:742: UserWarning: Loading a native XGBoost model with Scikit-Learn interface.\n",
" 'Loading a native XGBoost model with Scikit-Learn interface.'\n"
]
}
],
"source": [
"gg.load_model('./pretrain_models/house_price_eta0.05_round280.json')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"test['SalePrice'] = np.expm1(gg.predict(X_test))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([11.706002, 12.04607 , 12.116972, ..., 11.978775, 11.649101,\n",
" 12.330935], dtype=float32)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.13 ('py37')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "993bd31d5df1020fab369d79a34ff0a2a159e1798f3e25d3ad4b7751d38184c9"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}