ai_platform_regression/house_price/house_price.ipynb

558 KiB

In [2]:
import xgboost as xgb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [3]:
train_data = pd.read_csv('./data/train.csv')
test_data = pd.read_csv('./data/test.csv')
In [3]:
train_data.drop(train_data[(train_data["GrLivArea"]>4000)&(train_data["SalePrice"]<300000)].index,inplace=True)#pandas 里面的条件索引
In [ ]:
train_data
In [4]:
all_data = pd.concat([train_data, test_data]).reset_index(drop=True)
all_data.shape
Out[4]:
(2917, 81)
In [50]:
miss_value = train_data.isnull().sum().sort_values(ascending=False).to_frame().reset_index()
miss_value.columns = ['feature', 'miss_per']
miss_value = miss_value[miss_value.miss_per > 0]
miss_value.miss_per = miss_value.miss_per / train_data.shape[0]
miss_value.columns = ['特征名称', '缺失率']
miss_value
Out[50]:
特征名称 缺失率
0 PoolQC 0.995885
1 MiscFeature 0.962963
2 Alley 0.937586
3 Fence 0.807270
4 FireplaceQu 0.473251
5 LotFrontage 0.177641
6 GarageYrBlt 0.055556
7 GarageCond 0.055556
8 GarageType 0.055556
9 GarageFinish 0.055556
10 GarageQual 0.055556
11 BsmtFinType2 0.026063
12 BsmtExposure 0.026063
13 BsmtQual 0.025377
14 BsmtCond 0.025377
15 BsmtFinType1 0.025377
16 MasVnrArea 0.005487
17 MasVnrType 0.005487
18 Electrical 0.000686
In [5]:
miss = all_data.isnull().sum().sort_values(ascending=True)
miss
Out[5]:
Id                  0
Foundation          0
Heating             0
SaleCondition       0
CentralAir          0
                 ... 
SalePrice        1459
Fence            2346
Alley            2719
MiscFeature      2812
PoolQC           2908
Length: 81, dtype: int64

删除缺失比例过高的列

In [6]:
all_cols = [x for x in all_data.columns if x != 'Id' and x != 'SalePrice']
for col in all_cols:
    if miss[col] > 1000:
        print(col)
        all_data.drop(columns=[col], inplace=True)
all_data.shape
Alley
FireplaceQu
PoolQC
Fence
MiscFeature
Out[6]:
(2917, 76)
In [58]:
import seaborn as sns
from scipy.stats import norm
from scipy import stats
In [59]:
sns.distplot(train_data.SalePrice, fit=norm)
/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Out[59]:
<AxesSubplot:xlabel='SalePrice', ylabel='Density'>
No description has been provided for this image
In [61]:
rest = stats.probplot(train_data.SalePrice, plot=plt)
No description has been provided for this image
In [63]:
sns.distplot(np.log1p(train_data.SalePrice), fit=norm)
/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Out[63]:
<AxesSubplot:xlabel='SalePrice', ylabel='Density'>
No description has been provided for this image
In [64]:
rest = stats.probplot(np.log1p(train_data.SalePrice), plot=plt)
No description has been provided for this image
In [56]:
all_data[all_data['GarageYrBlt'].isna()].shape
Out[56]:
(0, 76)
In [8]:
na_index = all_data[all_data['GarageYrBlt'] > 2022].index
all_data.loc[na_index, 'GarageYrBlt'] = None
In [9]:
all_data[all_data['GarageYrBlt'].isna()].shape
Out[9]:
(160, 76)
In [10]:
all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True)
year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']
for col in year_cols:
    all_data[col] = 2022 - all_data[col]
all_data.shape
Out[10]:
(2917, 76)
In [11]:
cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"]
for col in cols1:
    all_data[col].fillna("None",inplace=True)
all_data.shape
Out[11]:
(2917, 76)
In [12]:
cols2=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
for col in cols2:
    all_data[col].fillna(0, inplace=True)
all_data.shape
Out[12]:
(2917, 76)
In [13]:
all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]),inplace=True)
cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"]
for col in cols3:
    all_data[col].fillna(all_data[col].mode()[0], inplace=True)
all_data.shape
Out[13]:
(2917, 76)
In [14]:
numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if x != 'Id' and x != 'SalePrice']
object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()]
In [15]:
for col in numeric_cols:
    all_data[col] = np.log1p(all_data[col])
    all_data[col] = (all_data[col] - all_data[col].min()) / (all_data[col].max() - all_data[col].min())
In [16]:
dataset = pd.get_dummies(all_data, columns=object_cols)
In [17]:
dataset.SalePrice = np.log1p(dataset.SalePrice)
In [18]:
train = dataset[~dataset.SalePrice.isna()].copy()
train.shape
Out[18]:
(1458, 280)
In [19]:
test = dataset[dataset.SalePrice.isna()].copy()
test.shape
Out[19]:
(1459, 280)
In [20]:
feature_cols = [x for x in dataset.columns if x != 'Id' and x != 'SalePrice']
In [21]:
from sklearn.model_selection import train_test_split
In [22]:
train, valid = train_test_split(train, test_size=0.12, shuffle=True, random_state=42)
In [23]:
X_train, Y_train = train[feature_cols], train['SalePrice']
X_valid, Y_valid = valid[feature_cols], valid['SalePrice']
X_test, Y_test = test[feature_cols], test['SalePrice']
In [24]:
dtrain = xgb.DMatrix(X_train, Y_train)
dvalid = xgb.DMatrix(X_valid, Y_valid)
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
In [25]:
params = {'objective': 'reg:squarederror', 
          'booster': 'gbtree', 
          'eta': 0.05,
          'max_depth': 15, 
          'subsample': 0.7,  
          'colsample_bytree': 0.7,
          'eval_metric':['rmse'],
          'silent': 1,  
          'seed': 10}  
In [26]:
gbm = xgb.train(params, dtrain, evals=watchlist, num_boost_round=5000,
                early_stopping_rounds=200,  verbose_eval=True)
[10:33:47] WARNING: ../src/learner.cc:627: 
Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:10.95491	eval-rmse:10.96235
[1]	train-rmse:10.40916	eval-rmse:10.41661
[2]	train-rmse:9.89034	eval-rmse:9.89780
[3]	train-rmse:9.39722	eval-rmse:9.40469
[4]	train-rmse:8.92885	eval-rmse:8.93633
[5]	train-rmse:8.48375	eval-rmse:8.49124
[6]	train-rmse:8.06123	eval-rmse:8.06873
[7]	train-rmse:7.66021	eval-rmse:7.66773
[8]	train-rmse:7.27851	eval-rmse:7.28504
[9]	train-rmse:6.91608	eval-rmse:6.92262
[10]	train-rmse:6.57212	eval-rmse:6.57776
[11]	train-rmse:6.24453	eval-rmse:6.24978
[12]	train-rmse:5.93355	eval-rmse:5.93791
[13]	train-rmse:5.63820	eval-rmse:5.64171
[14]	train-rmse:5.35791	eval-rmse:5.36060
[15]	train-rmse:5.09149	eval-rmse:5.09384
[16]	train-rmse:4.83796	eval-rmse:4.84034
[17]	train-rmse:4.59742	eval-rmse:4.59968
[18]	train-rmse:4.36846	eval-rmse:4.37007
[19]	train-rmse:4.15155	eval-rmse:4.15304
[20]	train-rmse:3.94554	eval-rmse:3.94632
[21]	train-rmse:3.74977	eval-rmse:3.74999
[22]	train-rmse:3.56360	eval-rmse:3.56321
[23]	train-rmse:3.38712	eval-rmse:3.38680
[24]	train-rmse:3.21874	eval-rmse:3.21847
[25]	train-rmse:3.05978	eval-rmse:3.05902
[26]	train-rmse:2.90867	eval-rmse:2.90743
[27]	train-rmse:2.76500	eval-rmse:2.76388
[28]	train-rmse:2.62812	eval-rmse:2.62685
[29]	train-rmse:2.49820	eval-rmse:2.49628
[30]	train-rmse:2.37453	eval-rmse:2.37196
[31]	train-rmse:2.25692	eval-rmse:2.25370
[32]	train-rmse:2.14536	eval-rmse:2.14147
[33]	train-rmse:2.03937	eval-rmse:2.03521
[34]	train-rmse:1.93883	eval-rmse:1.93448
[35]	train-rmse:1.84381	eval-rmse:1.83979
[36]	train-rmse:1.75285	eval-rmse:1.74887
[37]	train-rmse:1.66676	eval-rmse:1.66205
[38]	train-rmse:1.58492	eval-rmse:1.57965
[39]	train-rmse:1.50715	eval-rmse:1.50159
[40]	train-rmse:1.43321	eval-rmse:1.42713
[41]	train-rmse:1.36283	eval-rmse:1.35596
[42]	train-rmse:1.29620	eval-rmse:1.28879
[43]	train-rmse:1.23316	eval-rmse:1.22663
[44]	train-rmse:1.17272	eval-rmse:1.16596
[45]	train-rmse:1.11549	eval-rmse:1.10860
[46]	train-rmse:1.06120	eval-rmse:1.05444
[47]	train-rmse:1.00958	eval-rmse:1.00254
[48]	train-rmse:0.96067	eval-rmse:0.95273
[49]	train-rmse:0.91434	eval-rmse:0.90591
[50]	train-rmse:0.87015	eval-rmse:0.86133
[51]	train-rmse:0.82834	eval-rmse:0.81927
[52]	train-rmse:0.78870	eval-rmse:0.77968
[53]	train-rmse:0.75082	eval-rmse:0.74161
[54]	train-rmse:0.71492	eval-rmse:0.70547
[55]	train-rmse:0.68106	eval-rmse:0.67230
[56]	train-rmse:0.64849	eval-rmse:0.63960
[57]	train-rmse:0.61769	eval-rmse:0.60831
[58]	train-rmse:0.58868	eval-rmse:0.57939
[59]	train-rmse:0.56057	eval-rmse:0.55152
[60]	train-rmse:0.53451	eval-rmse:0.52523
[61]	train-rmse:0.50950	eval-rmse:0.50035
[62]	train-rmse:0.48564	eval-rmse:0.47651
[63]	train-rmse:0.46293	eval-rmse:0.45377
[64]	train-rmse:0.44159	eval-rmse:0.43343
[65]	train-rmse:0.42131	eval-rmse:0.41326
[66]	train-rmse:0.40179	eval-rmse:0.39410
[67]	train-rmse:0.38364	eval-rmse:0.37700
[68]	train-rmse:0.36614	eval-rmse:0.36009
[69]	train-rmse:0.34965	eval-rmse:0.34418
[70]	train-rmse:0.33389	eval-rmse:0.32955
[71]	train-rmse:0.31898	eval-rmse:0.31511
[72]	train-rmse:0.30487	eval-rmse:0.30192
[73]	train-rmse:0.29146	eval-rmse:0.28948
[74]	train-rmse:0.27854	eval-rmse:0.27745
[75]	train-rmse:0.26624	eval-rmse:0.26603
[76]	train-rmse:0.25467	eval-rmse:0.25535
[77]	train-rmse:0.24384	eval-rmse:0.24510
[78]	train-rmse:0.23341	eval-rmse:0.23538
[79]	train-rmse:0.22357	eval-rmse:0.22674
[80]	train-rmse:0.21429	eval-rmse:0.21868
[81]	train-rmse:0.20526	eval-rmse:0.21073
[82]	train-rmse:0.19662	eval-rmse:0.20326
[83]	train-rmse:0.18837	eval-rmse:0.19614
[84]	train-rmse:0.18054	eval-rmse:0.18948
[85]	train-rmse:0.17345	eval-rmse:0.18387
[86]	train-rmse:0.16646	eval-rmse:0.17787
[87]	train-rmse:0.15977	eval-rmse:0.17240
[88]	train-rmse:0.15350	eval-rmse:0.16762
[89]	train-rmse:0.14754	eval-rmse:0.16333
[90]	train-rmse:0.14182	eval-rmse:0.15882
[91]	train-rmse:0.13632	eval-rmse:0.15475
[92]	train-rmse:0.13127	eval-rmse:0.15126
[93]	train-rmse:0.12620	eval-rmse:0.14789
[94]	train-rmse:0.12159	eval-rmse:0.14519
[95]	train-rmse:0.11702	eval-rmse:0.14218
[96]	train-rmse:0.11266	eval-rmse:0.13953
[97]	train-rmse:0.10853	eval-rmse:0.13714
[98]	train-rmse:0.10450	eval-rmse:0.13514
[99]	train-rmse:0.10078	eval-rmse:0.13347
[100]	train-rmse:0.09716	eval-rmse:0.13144
[101]	train-rmse:0.09377	eval-rmse:0.12970
[102]	train-rmse:0.09061	eval-rmse:0.12809
[103]	train-rmse:0.08744	eval-rmse:0.12667
[104]	train-rmse:0.08450	eval-rmse:0.12523
[105]	train-rmse:0.08152	eval-rmse:0.12383
[106]	train-rmse:0.07869	eval-rmse:0.12271
[107]	train-rmse:0.07611	eval-rmse:0.12161
[108]	train-rmse:0.07358	eval-rmse:0.12084
[109]	train-rmse:0.07116	eval-rmse:0.11998
[110]	train-rmse:0.06895	eval-rmse:0.11904
[111]	train-rmse:0.06676	eval-rmse:0.11830
[112]	train-rmse:0.06457	eval-rmse:0.11761
[113]	train-rmse:0.06251	eval-rmse:0.11679
[114]	train-rmse:0.06071	eval-rmse:0.11642
[115]	train-rmse:0.05873	eval-rmse:0.11584
[116]	train-rmse:0.05691	eval-rmse:0.11509
[117]	train-rmse:0.05539	eval-rmse:0.11460
[118]	train-rmse:0.05374	eval-rmse:0.11408
[119]	train-rmse:0.05229	eval-rmse:0.11369
[120]	train-rmse:0.05087	eval-rmse:0.11348
[121]	train-rmse:0.04938	eval-rmse:0.11326
[122]	train-rmse:0.04790	eval-rmse:0.11283
[123]	train-rmse:0.04652	eval-rmse:0.11271
[124]	train-rmse:0.04506	eval-rmse:0.11234
[125]	train-rmse:0.04385	eval-rmse:0.11213
[126]	train-rmse:0.04264	eval-rmse:0.11208
[127]	train-rmse:0.04140	eval-rmse:0.11193
[128]	train-rmse:0.04036	eval-rmse:0.11187
[129]	train-rmse:0.03931	eval-rmse:0.11160
[130]	train-rmse:0.03824	eval-rmse:0.11150
[131]	train-rmse:0.03722	eval-rmse:0.11131
[132]	train-rmse:0.03628	eval-rmse:0.11130
[133]	train-rmse:0.03530	eval-rmse:0.11123
[134]	train-rmse:0.03441	eval-rmse:0.11112
[135]	train-rmse:0.03345	eval-rmse:0.11104
[136]	train-rmse:0.03262	eval-rmse:0.11096
[137]	train-rmse:0.03188	eval-rmse:0.11098
[138]	train-rmse:0.03105	eval-rmse:0.11097
[139]	train-rmse:0.03025	eval-rmse:0.11102
[140]	train-rmse:0.02952	eval-rmse:0.11110
[141]	train-rmse:0.02890	eval-rmse:0.11103
[142]	train-rmse:0.02824	eval-rmse:0.11104
[143]	train-rmse:0.02761	eval-rmse:0.11102
[144]	train-rmse:0.02702	eval-rmse:0.11100
[145]	train-rmse:0.02634	eval-rmse:0.11108
[146]	train-rmse:0.02584	eval-rmse:0.11106
[147]	train-rmse:0.02540	eval-rmse:0.11111
[148]	train-rmse:0.02489	eval-rmse:0.11130
[149]	train-rmse:0.02439	eval-rmse:0.11131
[150]	train-rmse:0.02382	eval-rmse:0.11130
[151]	train-rmse:0.02333	eval-rmse:0.11134
[152]	train-rmse:0.02277	eval-rmse:0.11133
[153]	train-rmse:0.02238	eval-rmse:0.11135
[154]	train-rmse:0.02189	eval-rmse:0.11143
[155]	train-rmse:0.02146	eval-rmse:0.11156
[156]	train-rmse:0.02101	eval-rmse:0.11152
[157]	train-rmse:0.02058	eval-rmse:0.11150
[158]	train-rmse:0.02017	eval-rmse:0.11143
[159]	train-rmse:0.01975	eval-rmse:0.11141
[160]	train-rmse:0.01932	eval-rmse:0.11136
[161]	train-rmse:0.01901	eval-rmse:0.11136
[162]	train-rmse:0.01860	eval-rmse:0.11142
[163]	train-rmse:0.01820	eval-rmse:0.11150
[164]	train-rmse:0.01792	eval-rmse:0.11156
[165]	train-rmse:0.01758	eval-rmse:0.11161
[166]	train-rmse:0.01725	eval-rmse:0.11173
[167]	train-rmse:0.01694	eval-rmse:0.11173
[168]	train-rmse:0.01661	eval-rmse:0.11172
[169]	train-rmse:0.01629	eval-rmse:0.11181
[170]	train-rmse:0.01602	eval-rmse:0.11185
[171]	train-rmse:0.01574	eval-rmse:0.11181
[172]	train-rmse:0.01544	eval-rmse:0.11183
[173]	train-rmse:0.01520	eval-rmse:0.11179
[174]	train-rmse:0.01489	eval-rmse:0.11181
[175]	train-rmse:0.01463	eval-rmse:0.11181
[176]	train-rmse:0.01435	eval-rmse:0.11179
[177]	train-rmse:0.01409	eval-rmse:0.11177
[178]	train-rmse:0.01373	eval-rmse:0.11180
[179]	train-rmse:0.01350	eval-rmse:0.11181
[180]	train-rmse:0.01327	eval-rmse:0.11180
[181]	train-rmse:0.01304	eval-rmse:0.11185
[182]	train-rmse:0.01279	eval-rmse:0.11187
[183]	train-rmse:0.01256	eval-rmse:0.11186
[184]	train-rmse:0.01232	eval-rmse:0.11188
[185]	train-rmse:0.01211	eval-rmse:0.11191
[186]	train-rmse:0.01186	eval-rmse:0.11187
[187]	train-rmse:0.01172	eval-rmse:0.11188
[188]	train-rmse:0.01150	eval-rmse:0.11201
[189]	train-rmse:0.01133	eval-rmse:0.11203
[190]	train-rmse:0.01110	eval-rmse:0.11207
[191]	train-rmse:0.01092	eval-rmse:0.11210
[192]	train-rmse:0.01075	eval-rmse:0.11209
[193]	train-rmse:0.01057	eval-rmse:0.11205
[194]	train-rmse:0.01042	eval-rmse:0.11211
[195]	train-rmse:0.01025	eval-rmse:0.11215
[196]	train-rmse:0.01008	eval-rmse:0.11213
[197]	train-rmse:0.00993	eval-rmse:0.11216
[198]	train-rmse:0.00973	eval-rmse:0.11215
[199]	train-rmse:0.00959	eval-rmse:0.11218
[200]	train-rmse:0.00946	eval-rmse:0.11218
[201]	train-rmse:0.00929	eval-rmse:0.11218
[202]	train-rmse:0.00911	eval-rmse:0.11218
[203]	train-rmse:0.00896	eval-rmse:0.11220
[204]	train-rmse:0.00884	eval-rmse:0.11217
[205]	train-rmse:0.00872	eval-rmse:0.11216
[206]	train-rmse:0.00861	eval-rmse:0.11219
[207]	train-rmse:0.00844	eval-rmse:0.11218
[208]	train-rmse:0.00830	eval-rmse:0.11227
[209]	train-rmse:0.00819	eval-rmse:0.11229
[210]	train-rmse:0.00809	eval-rmse:0.11230
[211]	train-rmse:0.00800	eval-rmse:0.11231
[212]	train-rmse:0.00783	eval-rmse:0.11234
[213]	train-rmse:0.00772	eval-rmse:0.11234
[214]	train-rmse:0.00762	eval-rmse:0.11232
[215]	train-rmse:0.00747	eval-rmse:0.11235
[216]	train-rmse:0.00734	eval-rmse:0.11236
[217]	train-rmse:0.00723	eval-rmse:0.11240
[218]	train-rmse:0.00709	eval-rmse:0.11241
[219]	train-rmse:0.00697	eval-rmse:0.11240
[220]	train-rmse:0.00687	eval-rmse:0.11242
[221]	train-rmse:0.00680	eval-rmse:0.11245
[222]	train-rmse:0.00667	eval-rmse:0.11250
[223]	train-rmse:0.00658	eval-rmse:0.11254
[224]	train-rmse:0.00647	eval-rmse:0.11255
[225]	train-rmse:0.00639	eval-rmse:0.11258
[226]	train-rmse:0.00627	eval-rmse:0.11257
[227]	train-rmse:0.00616	eval-rmse:0.11256
[228]	train-rmse:0.00605	eval-rmse:0.11257
[229]	train-rmse:0.00595	eval-rmse:0.11261
[230]	train-rmse:0.00583	eval-rmse:0.11262
[231]	train-rmse:0.00577	eval-rmse:0.11264
[232]	train-rmse:0.00566	eval-rmse:0.11263
[233]	train-rmse:0.00558	eval-rmse:0.11263
[234]	train-rmse:0.00552	eval-rmse:0.11264
[235]	train-rmse:0.00543	eval-rmse:0.11264
[236]	train-rmse:0.00536	eval-rmse:0.11265
[237]	train-rmse:0.00530	eval-rmse:0.11266
[238]	train-rmse:0.00524	eval-rmse:0.11267
[239]	train-rmse:0.00513	eval-rmse:0.11265
[240]	train-rmse:0.00505	eval-rmse:0.11265
[241]	train-rmse:0.00497	eval-rmse:0.11265
[242]	train-rmse:0.00488	eval-rmse:0.11264
[243]	train-rmse:0.00481	eval-rmse:0.11265
[244]	train-rmse:0.00472	eval-rmse:0.11266
[245]	train-rmse:0.00465	eval-rmse:0.11267
[246]	train-rmse:0.00461	eval-rmse:0.11266
[247]	train-rmse:0.00453	eval-rmse:0.11265
[248]	train-rmse:0.00445	eval-rmse:0.11265
[249]	train-rmse:0.00439	eval-rmse:0.11266
[250]	train-rmse:0.00431	eval-rmse:0.11266
[251]	train-rmse:0.00425	eval-rmse:0.11267
[252]	train-rmse:0.00417	eval-rmse:0.11268
[253]	train-rmse:0.00411	eval-rmse:0.11269
[254]	train-rmse:0.00404	eval-rmse:0.11268
[255]	train-rmse:0.00399	eval-rmse:0.11269
[256]	train-rmse:0.00391	eval-rmse:0.11270
[257]	train-rmse:0.00385	eval-rmse:0.11270
[258]	train-rmse:0.00379	eval-rmse:0.11272
[259]	train-rmse:0.00372	eval-rmse:0.11272
[260]	train-rmse:0.00367	eval-rmse:0.11271
[261]	train-rmse:0.00360	eval-rmse:0.11271
[262]	train-rmse:0.00355	eval-rmse:0.11272
[263]	train-rmse:0.00349	eval-rmse:0.11272
[264]	train-rmse:0.00342	eval-rmse:0.11273
[265]	train-rmse:0.00337	eval-rmse:0.11272
[266]	train-rmse:0.00333	eval-rmse:0.11272
[267]	train-rmse:0.00328	eval-rmse:0.11273
[268]	train-rmse:0.00324	eval-rmse:0.11274
[269]	train-rmse:0.00319	eval-rmse:0.11272
[270]	train-rmse:0.00313	eval-rmse:0.11272
[271]	train-rmse:0.00308	eval-rmse:0.11272
[272]	train-rmse:0.00303	eval-rmse:0.11273
[273]	train-rmse:0.00300	eval-rmse:0.11273
[274]	train-rmse:0.00297	eval-rmse:0.11273
[275]	train-rmse:0.00293	eval-rmse:0.11273
[276]	train-rmse:0.00288	eval-rmse:0.11273
[277]	train-rmse:0.00283	eval-rmse:0.11274
[278]	train-rmse:0.00278	eval-rmse:0.11273
[279]	train-rmse:0.00273	eval-rmse:0.11274
[280]	train-rmse:0.00268	eval-rmse:0.11273
[281]	train-rmse:0.00264	eval-rmse:0.11274
[282]	train-rmse:0.00259	eval-rmse:0.11273
[283]	train-rmse:0.00255	eval-rmse:0.11273
[284]	train-rmse:0.00251	eval-rmse:0.11273
[285]	train-rmse:0.00248	eval-rmse:0.11272
[286]	train-rmse:0.00243	eval-rmse:0.11272
[287]	train-rmse:0.00240	eval-rmse:0.11272
[288]	train-rmse:0.00236	eval-rmse:0.11272
[289]	train-rmse:0.00233	eval-rmse:0.11272
[290]	train-rmse:0.00230	eval-rmse:0.11272
[291]	train-rmse:0.00228	eval-rmse:0.11272
[292]	train-rmse:0.00224	eval-rmse:0.11271
[293]	train-rmse:0.00220	eval-rmse:0.11271
[294]	train-rmse:0.00217	eval-rmse:0.11271
[295]	train-rmse:0.00214	eval-rmse:0.11271
[296]	train-rmse:0.00211	eval-rmse:0.11271
[297]	train-rmse:0.00208	eval-rmse:0.11271
[298]	train-rmse:0.00205	eval-rmse:0.11270
[299]	train-rmse:0.00202	eval-rmse:0.11270
[300]	train-rmse:0.00199	eval-rmse:0.11270
[301]	train-rmse:0.00196	eval-rmse:0.11271
[302]	train-rmse:0.00192	eval-rmse:0.11271
[303]	train-rmse:0.00190	eval-rmse:0.11271
[304]	train-rmse:0.00189	eval-rmse:0.11271
[305]	train-rmse:0.00185	eval-rmse:0.11272
[306]	train-rmse:0.00182	eval-rmse:0.11272
[307]	train-rmse:0.00179	eval-rmse:0.11273
[308]	train-rmse:0.00176	eval-rmse:0.11273
[309]	train-rmse:0.00175	eval-rmse:0.11273
[310]	train-rmse:0.00173	eval-rmse:0.11273
[311]	train-rmse:0.00170	eval-rmse:0.11274
[312]	train-rmse:0.00168	eval-rmse:0.11274
[313]	train-rmse:0.00165	eval-rmse:0.11274
[314]	train-rmse:0.00163	eval-rmse:0.11274
[315]	train-rmse:0.00160	eval-rmse:0.11275
[316]	train-rmse:0.00158	eval-rmse:0.11275
[317]	train-rmse:0.00155	eval-rmse:0.11275
[318]	train-rmse:0.00154	eval-rmse:0.11275
[319]	train-rmse:0.00152	eval-rmse:0.11275
[320]	train-rmse:0.00150	eval-rmse:0.11275
[321]	train-rmse:0.00148	eval-rmse:0.11275
[322]	train-rmse:0.00145	eval-rmse:0.11276
[323]	train-rmse:0.00143	eval-rmse:0.11275
[324]	train-rmse:0.00141	eval-rmse:0.11275
[325]	train-rmse:0.00138	eval-rmse:0.11275
[326]	train-rmse:0.00136	eval-rmse:0.11276
[327]	train-rmse:0.00134	eval-rmse:0.11275
[328]	train-rmse:0.00132	eval-rmse:0.11276
[329]	train-rmse:0.00130	eval-rmse:0.11276
[330]	train-rmse:0.00128	eval-rmse:0.11276
[331]	train-rmse:0.00127	eval-rmse:0.11275
[332]	train-rmse:0.00125	eval-rmse:0.11275
[333]	train-rmse:0.00123	eval-rmse:0.11275
[334]	train-rmse:0.00121	eval-rmse:0.11275
[335]	train-rmse:0.00119	eval-rmse:0.11276
In [36]:
x_pred = gbm.predict(xgb.DMatrix(X_test))
In [38]:
test['SalePrice'] = np.expm1(x_pred)
In [41]:
test[['Id', 'SalePrice']].to_csv('house_pred2.csv', index=False, encoding='utf-8')
In [30]:
gbm.save_model('./pretrain_models/house_price_eta0.05_round280.json')
In [31]:
gg = xgb.XGBRegressor()
In [32]:
gg.load_model('./pretrain_models/house_price_eta0.05_round280.json')
/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/xgboost/sklearn.py:742: UserWarning: Loading a native XGBoost model with Scikit-Learn interface.
  'Loading a native XGBoost model with Scikit-Learn interface.'
In [40]:
test['SalePrice'] = np.expm1(gg.predict(X_test))
In [37]:
x_pred
Out[37]:
array([11.706002, 12.04607 , 12.116972, ..., 11.978775, 11.649101,
       12.330935], dtype=float32)
In [ ]: