{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import xgboost as xgb\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "train_data = pd.read_csv('./data/train.csv')\n", "test_data = pd.read_csv('./data/test.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "train_data.drop(train_data[(train_data[\"GrLivArea\"]>4000)&(train_data[\"SalePrice\"]<300000)].index,inplace=True)#pandas 里面的条件索引" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2917, 81)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_data = pd.concat([train_data, test_data]).reset_index(drop=True)\n", "all_data.shape" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 特征名称 | \n", "缺失率 | \n", "
---|---|---|
0 | \n", "PoolQC | \n", "0.995885 | \n", "
1 | \n", "MiscFeature | \n", "0.962963 | \n", "
2 | \n", "Alley | \n", "0.937586 | \n", "
3 | \n", "Fence | \n", "0.807270 | \n", "
4 | \n", "FireplaceQu | \n", "0.473251 | \n", "
5 | \n", "LotFrontage | \n", "0.177641 | \n", "
6 | \n", "GarageYrBlt | \n", "0.055556 | \n", "
7 | \n", "GarageCond | \n", "0.055556 | \n", "
8 | \n", "GarageType | \n", "0.055556 | \n", "
9 | \n", "GarageFinish | \n", "0.055556 | \n", "
10 | \n", "GarageQual | \n", "0.055556 | \n", "
11 | \n", "BsmtFinType2 | \n", "0.026063 | \n", "
12 | \n", "BsmtExposure | \n", "0.026063 | \n", "
13 | \n", "BsmtQual | \n", "0.025377 | \n", "
14 | \n", "BsmtCond | \n", "0.025377 | \n", "
15 | \n", "BsmtFinType1 | \n", "0.025377 | \n", "
16 | \n", "MasVnrArea | \n", "0.005487 | \n", "
17 | \n", "MasVnrType | \n", "0.005487 | \n", "
18 | \n", "Electrical | \n", "0.000686 | \n", "