From 2e64b83e0d2d140f3c08a44dbb763b47737571dd Mon Sep 17 00:00:00 2001 From: zhaojinghao Date: Thu, 8 Dec 2022 16:22:29 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9response?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- house_price/house_price_predcition.py | 35 ++++++---- ocean_wave/wave_height_mlp.py | 26 +++++-- prophet_predict/prophet_predict.py | 21 +++--- run.py | 99 ++++++++++++++------------- 4 files changed, 103 insertions(+), 78 deletions(-) diff --git a/house_price/house_price_predcition.py b/house_price/house_price_predcition.py index d29ba01..dc2d498 100644 --- a/house_price/house_price_predcition.py +++ b/house_price/house_price_predcition.py @@ -5,15 +5,19 @@ import numpy as np from sklearn.model_selection import train_test_split from logzero import logger import os + current_path = os.path.dirname(__file__) + def load_data(): logger.info(f"读取本地数据") logger.info(current_path) train_data = pd.read_csv(f'{current_path}/data/train.csv') - train_data.drop(train_data[(train_data["GrLivArea"]>4000)&(train_data["SalePrice"]<300000)].index,inplace=True)#pandas 里面的条件索引 + train_data.drop(train_data[(train_data["GrLivArea"] > 4000) & (train_data["SalePrice"] < 300000)].index, + inplace=True) # pandas 里面的条件索引 return train_data + def load_model(): logger.info(f"读取本地模型") model = xgb.XGBModel() @@ -42,22 +46,25 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame): year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt'] all_data.loc[na_index, 'GarageYrBlt'] = None all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True) - - cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"] - for col in cols1: - all_data[col].fillna("None",inplace=True) - cols2=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"] + cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", + "BsmtFinType2", "BsmtFinType1", "MasVnrType"] + for col in cols1: + all_data[col].fillna("None", inplace=True) + + cols2 = ["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"] for col in cols2: all_data[col] = all_data[col].astype(float) all_data[col].fillna(0, inplace=True) - all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]),inplace=True) + all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]), inplace=True) - cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"] + cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", + "SaleType", "Exterior1st", "Exterior2nd"] for col in cols3: all_data[col].fillna(all_data[col].mode()[0], inplace=True) - numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if x != 'Id' and x != 'SalePrice'] + numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if + x != 'Id' and x != 'SalePrice'] object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()] for col in numeric_cols: @@ -67,6 +74,7 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame): dataset = pd.get_dummies(all_data, columns=object_cols) return dataset + def build_dataset(dataset): dataset.SalePrice = np.log1p(dataset.SalePrice) train = dataset[~dataset.SalePrice.isna()].copy() @@ -84,11 +92,12 @@ def build_dataset(dataset): def build_model(dtrain, dvalid, watchlist, num_iter=5000, early_stop=200, **params): logger.info('开始本地建模') - model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop, verbose_eval=True) + model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop, + verbose_eval=True) return model -def predict(data:pd.DataFrame, model, feature_cols): +def predict(data: pd.DataFrame, model, feature_cols): dtest = xgb.DMatrix(data[feature_cols]) result = np.expm1(model.predict(dtest)) data['SalePrice'] = result @@ -118,7 +127,7 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop raise Exception("test data is None", extra_train_data) train_data = load_data() datasets = preprocessing(train_data, test_data) - _, _, _,feature_cols = build_dataset(datasets) + _, _, _, feature_cols = build_dataset(datasets) model = load_model() dtest = datasets[datasets.SalePrice.isna()].copy() result = np.expm1(model.predict(dtest[feature_cols])) @@ -127,4 +136,4 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop if __name__ == '__main__': - pass \ No newline at end of file + pass diff --git a/ocean_wave/wave_height_mlp.py b/ocean_wave/wave_height_mlp.py index 47b7580..89b7d1d 100644 --- a/ocean_wave/wave_height_mlp.py +++ b/ocean_wave/wave_height_mlp.py @@ -1,4 +1,5 @@ import os + os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf import numpy as np @@ -12,6 +13,7 @@ loss = [] jiance = [] current_path = os.path.dirname(__file__) + def readData(filePath): data = pd.read_csv(filePath, header=0) x = data.iloc[:len(data), 5:12].values @@ -19,11 +21,13 @@ def readData(filePath): return x, y + # 标准化函数 def Z_ScoreNormalization(x, mean, sigma): - x = (x-mean)/sigma + x = (x - mean) / sigma return x + # 特征标准化 def featureScore(x): for i in range(7): @@ -34,6 +38,7 @@ def featureScore(x): return x + # 划分数据集 def dataDivision(x, y, train_scale, val_scale): train_volumn = int(len(x) * train_scale) @@ -41,17 +46,18 @@ def dataDivision(x, y, train_scale, val_scale): test_volumn = len(x) - train_volumn - val_volumn x_train = x[:train_volumn, :] - x_val = x[train_volumn:train_volumn+val_volumn, :] + x_val = x[train_volumn:train_volumn + val_volumn, :] x_test = x[-test_volumn:, :] y_train = y[:train_volumn] - y_val = y[train_volumn:train_volumn+val_volumn] + y_val = y[train_volumn:train_volumn + val_volumn] y_test = y[-test_volumn:] print(len(x_train), len(y_train), len(x_val), len(y_val)) return x_train, x_val, x_test, y_train, y_val, y_test + # 创建模型 def createModel(neure, activation, learning_rate, loss): model = tf.keras.models.Sequential([ @@ -63,21 +69,25 @@ def createModel(neure, activation, learning_rate, loss): return model + # 训练模型 def trainModel(model, x_train, y_train, x_val, y_val, epochs): history = LossHistory() - model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1, callbacks=[history]) + model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1, + callbacks=[history]) model.summary() return model + # 模型预测 def predictModel(x_test, model): predicted_data = model.predict(x_test) return predicted_data + # 训练 def train(csv_file, neure, activation, learning_rate, loss, epochs): x, y = readData(csv_file) @@ -98,21 +108,25 @@ def train(csv_file, neure, activation, learning_rate, loss, epochs): # model.save(os.path.abspath("./appweb/self_model/ocean_wave_mlp.h5")) return model, mse, mae, r + def drawLoss(logs): loss.append(logs['loss']) print(loss[-1]) + def load_model(): logger.info(f"{current_path}/pretrain_models/ocean_wave_mlp.h5") return keras_load_model(f"{current_path}/pretrain_models/ocean_wave_mlp.h5") + class LossHistory(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs={}): # print(logs['loss']) drawLoss(logs) # return logs['loss'], logs['mae'] -def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:str, epochs:int, x_test:list): + +def predict_wave_height(csv_file, num_units: int, activation: str, lr: float, loss: str, epochs: int, x_test: [np.ndarray, list]): """_summary_ Args: @@ -130,4 +144,4 @@ def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss: model = train(csv_file, num_units, activation, lr, loss, epochs) result = predictModel(x_test, model) logger.info(result) - return result[0][0] \ No newline at end of file + return result[0][0] diff --git a/prophet_predict/prophet_predict.py b/prophet_predict/prophet_predict.py index 53ebdb0..cfe62d6 100644 --- a/prophet_predict/prophet_predict.py +++ b/prophet_predict/prophet_predict.py @@ -4,7 +4,8 @@ import datetime as dt from get_holiday_cn.client import getHoliday from logzero import logger -def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'): + +def run_prophet(data: pd.DataFrame, period: int = 1, freq: str = 'D'): """_summary_ Args: @@ -18,13 +19,13 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'): """ assert period > 0 assert 'ds' in data.columns and 'y' in data.columns - try: + try: data.ds = pd.to_datetime(data.ds) except Exception as e: return e holiday_data = build_holiday(data.ds.min(), data.ds.max()) train_data = data.copy() - model=Prophet( + model = Prophet( growth="linear", yearly_seasonality=True, weekly_seasonality=True, @@ -32,15 +33,15 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'): seasonality_mode="multiplicative", seasonality_prior_scale=12, holidays=holiday_data, - n_changepoints= 100, # change points num, default=25 - ) + n_changepoints=100, # change points num, default=25 + ) model.fit(train_data) future = model.make_future_dataframe(periods=period, freq=freq, include_history=True) - forecast=model.predict(future) + forecast = model.predict(future) return forecast -def get_date_type(date:str, holiday_client:getHoliday): +def get_date_type(date: str, holiday_client: getHoliday): """一个判断某个日期是哪种假期的类 Args: @@ -58,7 +59,7 @@ def get_date_type(date:str, holiday_client:getHoliday): return rst.get('holiday').get('name') -def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"): +def build_holiday(start_date: str = "2015-01-01", end_date: str = "2021-12-31"): """基于起止日期,将该时间段内的国内假期都找出来,包括本应该放假但是最后调休上班的 Args: @@ -72,10 +73,10 @@ def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"): ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d')) client = getHoliday() ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client)) - special_date = ds_list[ds_list.day_type != 'simple'].copy() + special_date = ds_list[ds_list.day_type != 'simple'].copy() special_date.columns = ['ds', 'holiday'] return special_date if __name__ == '__main__': - pass \ No newline at end of file + pass diff --git a/run.py b/run.py index 24d196e..54a39af 100644 --- a/run.py +++ b/run.py @@ -9,11 +9,13 @@ from house_price.house_price_predcition import run_boston_price from ocean_wave.wave_height_mlp import predict_wave_height from prophet_predict.prophet_predict import run_prophet +TEXT = "text" app = Flask(__name__) @app.route('/house_price', methods=["POST"]) def predict_price(): - if request.method=='POST': + resp_info = dict() + if request.method == 'POST': eta = request.form.get('eta', 0.05) max_depth = request.form.get('max_depth', 10) subsample = request.form.get('subsample', 0.7) @@ -22,7 +24,6 @@ def predict_price(): early_stopping_rounds = int(request.form.get('early_stopping_rounds', 200)) train_data = request.files.get('train_data', None) test_data = request.files.get('test_data', None) - resp = make_response() logger.info(train_data) params = { "eta": float(eta), @@ -31,34 +32,36 @@ def predict_price(): "cosample_bytree": float(cosample_bytree) } if not train_data: - train_data = None + train_data = None else: train_data = pd.read_csv(train_data) - if test_data is None: - resp.status_code = '406' - resp.response = json.dumps({"text": "test data is None"}) - test_data = pd.read_csv(test_data) - try: - if train_data is None: - rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params) + if test_data is None or pd.read_csv(test_data).shape[0] == 0: + resp_info["msg"] = "测试数据为空" + resp_info["code"] = 406 + else: + test_data = pd.read_csv(test_data) + try: + if train_data is None: + rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params) + else: + rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params) + except Exception as e: + logger.error(f"Error: {e}") + resp_info["msg"] = str(e) + resp_info["code"] = 406 else: - rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params) - except Exception as e: - logger.error(f"Error: {e}") - resp.status_code = '406' - resp.response = json.dumps({'text': str(e)}) - return resp - resp.status_code=200 - resp.response = json.dumps({"Id": rst["Id"].values.tolist(), "price": rst["SalePrice"].values.tolist()}) - return resp - else: - resp.status_code=405 - return resp + resp_info["code"] = 200 + resp_info["data"] = rst.to_csv() + resp_info["dtype"] = "csv" + resp = make_response(json.dumps(resp_info)) + resp.status_code = 200 + return resp @app.route('/ocean_wave_height', methods=["POST"]) def predict_height(): - if request.method=='POST': + resp_info = dict() + if request.method == 'POST': num_units = int(request.form.get('num_units', 8)) activation = request.form.get('activation', 'relu') lr = float(request.form.get('learning_rate', 0.01)) @@ -75,35 +78,34 @@ def predict_height(): x_test = [WVHT_1, WDIR_1, WSPD_1, WDIR_2, WSPD_2, WDIR, WSPD] x_test = np.array([x_test]) logger.info(f"test data: {x_test}") - resp = make_response() if not train_data: - train_data = None + train_data = None else: try: train_data = pd.read_csv(train_data) except Exception as e: logger.error(f"Error: {e}") - resp.status_code = '406' - resp.response = json.dumps({'text': str(e)}) - return resp + resp_info["msg"] = str(e) + resp_info["code"] = 406 + train_data = None try: rst = predict_wave_height(train_data, num_units, activation, lr, loss, epochs, x_test) except Exception as e: logger.error(f"Error: {e}") - resp.status_code = '406' - resp.response = json.dumps({'text': "上传数据不符合海浪高度预测的规定文件示例,请检查"}) - return resp - resp.status_code=200 - resp.response = json.dumps({"result": str(rst)}) - return resp - else: - resp.status_code=405 - return resp + resp_info["msg"] = "上传数据不符合海浪高度预测的规定文件示例,请检查" + resp_info["code"] = 406 + else: + resp_info["code"] = 200 + resp_info["data"] = rst + resp_info["dtype"] = TEXT + resp = make_response(json.dumps(resp_info)) + resp.status_code = 200 + return resp @app.route("/prophet/", methods=["POST"]) def run_ts_predict(): - resp = make_response() + resp_info = dict() if request.method == "POST": data_file = request.files.get("data") freq = request.form.get('freq') @@ -117,20 +119,19 @@ def run_ts_predict(): logger.info(rest.columns) rest['ds'] = rest['ds'].apply(str) rest['yhat'] = rest['yhat'].apply(str) - resp.data = json.dumps({'ds':rest.ds.values.tolist(), 'yhat':rest.yhat.values.tolist()}) - resp.status_code = 200 - return resp + except Exception as e: logger.error(f"Error: {e}") - resp.status_code = '406' - resp.response = json.dumps({'text': str(e)}) - return resp - else: - resp.status_code=405 - return resp + resp_info["msg"] = str(e) + resp_info["code"] = 406 + else: + resp_info["code"] = 200 + resp_info["data"] = rest.to_csv() + resp_info["dtype"] = "csv" + resp = make_response(json.dumps(resp_info)) + resp.status_code = 200 + return resp if __name__ == '__main__': app.run(host='0.0.0.0', port=8901, debug=True) - -