From 2e64b83e0d2d140f3c08a44dbb763b47737571dd Mon Sep 17 00:00:00 2001
From: zhaojinghao <zhaojh@qibebt.ac.cn>
Date: Thu, 8 Dec 2022 16:22:29 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9response?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 house_price/house_price_predcition.py | 35 ++++++----
 ocean_wave/wave_height_mlp.py         | 26 +++++--
 prophet_predict/prophet_predict.py    | 21 +++---
 run.py                                | 99 ++++++++++++++-------------
 4 files changed, 103 insertions(+), 78 deletions(-)

diff --git a/house_price/house_price_predcition.py b/house_price/house_price_predcition.py
index d29ba01..dc2d498 100644
--- a/house_price/house_price_predcition.py
+++ b/house_price/house_price_predcition.py
@@ -5,15 +5,19 @@ import numpy as np
 from sklearn.model_selection import train_test_split
 from logzero import logger
 import os
+
 current_path = os.path.dirname(__file__)
 
+
 def load_data():
     logger.info(f"读取本地数据")
     logger.info(current_path)
     train_data = pd.read_csv(f'{current_path}/data/train.csv')
-    train_data.drop(train_data[(train_data["GrLivArea"]>4000)&(train_data["SalePrice"]<300000)].index,inplace=True)#pandas 里面的条件索引
+    train_data.drop(train_data[(train_data["GrLivArea"] > 4000) & (train_data["SalePrice"] < 300000)].index,
+                    inplace=True)  # pandas 里面的条件索引
     return train_data
 
+
 def load_model():
     logger.info(f"读取本地模型")
     model = xgb.XGBModel()
@@ -42,22 +46,25 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame):
     year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']
     all_data.loc[na_index, 'GarageYrBlt'] = None
     all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True)
-    
-    cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"]
-    for col in cols1:
-        all_data[col].fillna("None",inplace=True)
 
-    cols2=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
+    cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual",
+             "BsmtFinType2", "BsmtFinType1", "MasVnrType"]
+    for col in cols1:
+        all_data[col].fillna("None", inplace=True)
+
+    cols2 = ["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
     for col in cols2:
         all_data[col] = all_data[col].astype(float)
         all_data[col].fillna(0, inplace=True)
-    all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]),inplace=True)
+    all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]), inplace=True)
 
-    cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"]
+    cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual",
+             "SaleType", "Exterior1st", "Exterior2nd"]
     for col in cols3:
         all_data[col].fillna(all_data[col].mode()[0], inplace=True)
 
-    numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if x != 'Id' and x != 'SalePrice']
+    numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if
+                    x != 'Id' and x != 'SalePrice']
     object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()]
 
     for col in numeric_cols:
@@ -67,6 +74,7 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame):
     dataset = pd.get_dummies(all_data, columns=object_cols)
     return dataset
 
+
 def build_dataset(dataset):
     dataset.SalePrice = np.log1p(dataset.SalePrice)
     train = dataset[~dataset.SalePrice.isna()].copy()
@@ -84,11 +92,12 @@ def build_dataset(dataset):
 
 def build_model(dtrain, dvalid, watchlist, num_iter=5000, early_stop=200, **params):
     logger.info('开始本地建模')
-    model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop, verbose_eval=True)
+    model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop,
+                      verbose_eval=True)
     return model
 
 
-def predict(data:pd.DataFrame, model, feature_cols):
+def predict(data: pd.DataFrame, model, feature_cols):
     dtest = xgb.DMatrix(data[feature_cols])
     result = np.expm1(model.predict(dtest))
     data['SalePrice'] = result
@@ -118,7 +127,7 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop
             raise Exception("test data is None", extra_train_data)
         train_data = load_data()
         datasets = preprocessing(train_data, test_data)
-        _, _, _,feature_cols = build_dataset(datasets)
+        _, _, _, feature_cols = build_dataset(datasets)
         model = load_model()
         dtest = datasets[datasets.SalePrice.isna()].copy()
         result = np.expm1(model.predict(dtest[feature_cols]))
@@ -127,4 +136,4 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop
 
 
 if __name__ == '__main__':
-    pass
\ No newline at end of file
+    pass
diff --git a/ocean_wave/wave_height_mlp.py b/ocean_wave/wave_height_mlp.py
index 47b7580..89b7d1d 100644
--- a/ocean_wave/wave_height_mlp.py
+++ b/ocean_wave/wave_height_mlp.py
@@ -1,4 +1,5 @@
 import os
+
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 import tensorflow as tf
 import numpy as np
@@ -12,6 +13,7 @@ loss = []
 jiance = []
 current_path = os.path.dirname(__file__)
 
+
 def readData(filePath):
     data = pd.read_csv(filePath, header=0)
     x = data.iloc[:len(data), 5:12].values
@@ -19,11 +21,13 @@ def readData(filePath):
 
     return x, y
 
+
 # 标准化函数
 def Z_ScoreNormalization(x, mean, sigma):
-    x = (x-mean)/sigma
+    x = (x - mean) / sigma
     return x
 
+
 # 特征标准化
 def featureScore(x):
     for i in range(7):
@@ -34,6 +38,7 @@ def featureScore(x):
 
     return x
 
+
 # 划分数据集
 def dataDivision(x, y, train_scale, val_scale):
     train_volumn = int(len(x) * train_scale)
@@ -41,17 +46,18 @@ def dataDivision(x, y, train_scale, val_scale):
     test_volumn = len(x) - train_volumn - val_volumn
 
     x_train = x[:train_volumn, :]
-    x_val = x[train_volumn:train_volumn+val_volumn, :]
+    x_val = x[train_volumn:train_volumn + val_volumn, :]
     x_test = x[-test_volumn:, :]
 
     y_train = y[:train_volumn]
-    y_val = y[train_volumn:train_volumn+val_volumn]
+    y_val = y[train_volumn:train_volumn + val_volumn]
     y_test = y[-test_volumn:]
 
     print(len(x_train), len(y_train), len(x_val), len(y_val))
 
     return x_train, x_val, x_test, y_train, y_val, y_test
 
+
 # 创建模型
 def createModel(neure, activation, learning_rate, loss):
     model = tf.keras.models.Sequential([
@@ -63,21 +69,25 @@ def createModel(neure, activation, learning_rate, loss):
 
     return model
 
+
 # 训练模型
 def trainModel(model, x_train, y_train, x_val, y_val, epochs):
     history = LossHistory()
 
-    model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1, callbacks=[history])
+    model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1,
+              callbacks=[history])
 
     model.summary()
 
     return model
 
+
 # 模型预测
 def predictModel(x_test, model):
     predicted_data = model.predict(x_test)
     return predicted_data
 
+
 # 训练
 def train(csv_file, neure, activation, learning_rate, loss, epochs):
     x, y = readData(csv_file)
@@ -98,21 +108,25 @@ def train(csv_file, neure, activation, learning_rate, loss, epochs):
     # model.save(os.path.abspath("./appweb/self_model/ocean_wave_mlp.h5"))
     return model, mse, mae, r
 
+
 def drawLoss(logs):
     loss.append(logs['loss'])
     print(loss[-1])
 
+
 def load_model():
     logger.info(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
     return keras_load_model(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
 
+
 class LossHistory(tf.keras.callbacks.Callback):
     def on_epoch_end(self, epoch, logs={}):
         # print(logs['loss'])
         drawLoss(logs)
         # return logs['loss'], logs['mae']
 
-def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:str, epochs:int, x_test:list):
+
+def predict_wave_height(csv_file, num_units: int, activation: str, lr: float, loss: str, epochs: int, x_test: [np.ndarray, list]):
     """_summary_
 
     Args:
@@ -130,4 +144,4 @@ def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:
         model = train(csv_file, num_units, activation, lr, loss, epochs)
     result = predictModel(x_test, model)
     logger.info(result)
-    return result[0][0]
\ No newline at end of file
+    return result[0][0]
diff --git a/prophet_predict/prophet_predict.py b/prophet_predict/prophet_predict.py
index 53ebdb0..cfe62d6 100644
--- a/prophet_predict/prophet_predict.py
+++ b/prophet_predict/prophet_predict.py
@@ -4,7 +4,8 @@ import datetime as dt
 from get_holiday_cn.client import getHoliday
 from logzero import logger
 
-def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
+
+def run_prophet(data: pd.DataFrame, period: int = 1, freq: str = 'D'):
     """_summary_
 
     Args:
@@ -18,13 +19,13 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
     """
     assert period > 0
     assert 'ds' in data.columns and 'y' in data.columns
-    try: 
+    try:
         data.ds = pd.to_datetime(data.ds)
     except Exception as e:
         return e
     holiday_data = build_holiday(data.ds.min(), data.ds.max())
     train_data = data.copy()
-    model=Prophet(
+    model = Prophet(
         growth="linear",
         yearly_seasonality=True,
         weekly_seasonality=True,
@@ -32,15 +33,15 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
         seasonality_mode="multiplicative",
         seasonality_prior_scale=12,
         holidays=holiday_data,
-        n_changepoints= 100,  # change points num, default=25
-        )
+        n_changepoints=100,  # change points num, default=25
+    )
     model.fit(train_data)
     future = model.make_future_dataframe(periods=period, freq=freq, include_history=True)
-    forecast=model.predict(future)
+    forecast = model.predict(future)
     return forecast
 
 
-def get_date_type(date:str, holiday_client:getHoliday):
+def get_date_type(date: str, holiday_client: getHoliday):
     """一个判断某个日期是哪种假期的类
 
     Args:
@@ -58,7 +59,7 @@ def get_date_type(date:str, holiday_client:getHoliday):
             return rst.get('holiday').get('name')
 
 
-def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"):
+def build_holiday(start_date: str = "2015-01-01", end_date: str = "2021-12-31"):
     """基于起止日期,将该时间段内的国内假期都找出来，包括本应该放假但是最后调休上班的
 
     Args:
@@ -72,10 +73,10 @@ def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"):
     ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d'))
     client = getHoliday()
     ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client))
-    special_date = ds_list[ds_list.day_type  != 'simple'].copy()
+    special_date = ds_list[ds_list.day_type != 'simple'].copy()
     special_date.columns = ['ds', 'holiday']
     return special_date
 
 
 if __name__ == '__main__':
-    pass
\ No newline at end of file
+    pass
diff --git a/run.py b/run.py
index 24d196e..54a39af 100644
--- a/run.py
+++ b/run.py
@@ -9,11 +9,13 @@ from house_price.house_price_predcition import run_boston_price
 from ocean_wave.wave_height_mlp import predict_wave_height
 from prophet_predict.prophet_predict import run_prophet
 
+TEXT = "text"
 app = Flask(__name__)
 
 @app.route('/house_price', methods=["POST"])
 def predict_price():
-    if request.method=='POST':
+    resp_info = dict()
+    if request.method == 'POST':
         eta = request.form.get('eta', 0.05)
         max_depth = request.form.get('max_depth', 10)
         subsample = request.form.get('subsample', 0.7)
@@ -22,7 +24,6 @@ def predict_price():
         early_stopping_rounds = int(request.form.get('early_stopping_rounds', 200))
         train_data = request.files.get('train_data', None)
         test_data = request.files.get('test_data', None)
-        resp = make_response()
         logger.info(train_data)
         params = {
             "eta": float(eta),
@@ -31,34 +32,36 @@ def predict_price():
             "cosample_bytree": float(cosample_bytree)
         }
         if not train_data:
-            train_data =  None
+            train_data = None
         else:
             train_data = pd.read_csv(train_data)
-        if test_data is None:
-            resp.status_code = '406'
-            resp.response = json.dumps({"text": "test data is None"})
-        test_data = pd.read_csv(test_data)
-        try:
-            if train_data is None:
-                rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params)
+        if test_data is None or pd.read_csv(test_data).shape[0] == 0:
+            resp_info["msg"] = "测试数据为空"
+            resp_info["code"] = 406
+        else:
+            test_data = pd.read_csv(test_data)
+            try:
+                if train_data is None:
+                    rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params)
+                else:
+                    rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params)
+            except Exception as e:
+                logger.error(f"Error: {e}")
+                resp_info["msg"] = str(e)
+                resp_info["code"] = 406
             else:
-                rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params)
-        except Exception as e:
-            logger.error(f"Error: {e}")
-            resp.status_code = '406'
-            resp.response = json.dumps({'text': str(e)})
-            return resp
-        resp.status_code=200
-        resp.response = json.dumps({"Id": rst["Id"].values.tolist(), "price": rst["SalePrice"].values.tolist()})
-        return resp
-    else:
-        resp.status_code=405
-        return resp 
+                resp_info["code"] = 200
+                resp_info["data"] = rst.to_csv()
+                resp_info["dtype"] = "csv"
+    resp = make_response(json.dumps(resp_info))
+    resp.status_code = 200
+    return resp
 
 
 @app.route('/ocean_wave_height', methods=["POST"])
 def predict_height():
-    if request.method=='POST':
+    resp_info = dict()
+    if request.method == 'POST':
         num_units = int(request.form.get('num_units', 8))
         activation = request.form.get('activation', 'relu')
         lr = float(request.form.get('learning_rate', 0.01))
@@ -75,35 +78,34 @@ def predict_height():
         x_test = [WVHT_1, WDIR_1, WSPD_1, WDIR_2, WSPD_2, WDIR, WSPD]
         x_test = np.array([x_test])
         logger.info(f"test data: {x_test}")
-        resp = make_response()
         if not train_data:
-            train_data =  None
+            train_data = None
         else:
             try:
                 train_data = pd.read_csv(train_data)
             except Exception as e:
                 logger.error(f"Error: {e}")
-                resp.status_code = '406'
-                resp.response = json.dumps({'text': str(e)})
-                return resp
+                resp_info["msg"] = str(e)
+                resp_info["code"] = 406
+                train_data = None
         try:
             rst = predict_wave_height(train_data, num_units, activation, lr, loss, epochs, x_test)
         except Exception as e:
             logger.error(f"Error: {e}")
-            resp.status_code = '406'
-            resp.response = json.dumps({'text': "上传数据不符合海浪高度预测的规定文件示例，请检查"})
-            return resp
-        resp.status_code=200
-        resp.response = json.dumps({"result": str(rst)})
-        return resp
-    else:
-        resp.status_code=405
-        return resp 
+            resp_info["msg"] = "上传数据不符合海浪高度预测的规定文件示例，请检查"
+            resp_info["code"] = 406
+        else:
+            resp_info["code"] = 200
+            resp_info["data"] = rst
+            resp_info["dtype"] = TEXT
+    resp = make_response(json.dumps(resp_info))
+    resp.status_code = 200
+    return resp
 
 
 @app.route("/prophet/", methods=["POST"])
 def run_ts_predict():
-    resp = make_response()
+    resp_info = dict()
     if request.method == "POST":
         data_file = request.files.get("data")
         freq = request.form.get('freq')
@@ -117,20 +119,19 @@ def run_ts_predict():
             logger.info(rest.columns)
             rest['ds'] = rest['ds'].apply(str)
             rest['yhat'] = rest['yhat'].apply(str)
-            resp.data = json.dumps({'ds':rest.ds.values.tolist(), 'yhat':rest.yhat.values.tolist()})
-            resp.status_code = 200
-            return resp
+
         except Exception as e:
             logger.error(f"Error: {e}")
-            resp.status_code = '406'
-            resp.response = json.dumps({'text': str(e)})
-            return resp
-    else:
-        resp.status_code=405
-        return resp 
+            resp_info["msg"] = str(e)
+            resp_info["code"] = 406
+        else:
+            resp_info["code"] = 200
+            resp_info["data"] = rest.to_csv()
+            resp_info["dtype"] = "csv"
+        resp = make_response(json.dumps(resp_info))
+        resp.status_code = 200
+        return resp
 
 
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=8901, debug=True)
-    
-