修改response

This commit is contained in:
zhaojinghao 2022-12-08 16:22:29 +08:00
parent 365c194e82
commit 2e64b83e0d
4 changed files with 103 additions and 78 deletions

View File

@ -5,15 +5,19 @@ import numpy as np
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from logzero import logger from logzero import logger
import os import os
current_path = os.path.dirname(__file__) current_path = os.path.dirname(__file__)
def load_data(): def load_data():
logger.info(f"读取本地数据") logger.info(f"读取本地数据")
logger.info(current_path) logger.info(current_path)
train_data = pd.read_csv(f'{current_path}/data/train.csv') train_data = pd.read_csv(f'{current_path}/data/train.csv')
train_data.drop(train_data[(train_data["GrLivArea"]>4000)&(train_data["SalePrice"]<300000)].index,inplace=True)#pandas 里面的条件索引 train_data.drop(train_data[(train_data["GrLivArea"] > 4000) & (train_data["SalePrice"] < 300000)].index,
inplace=True) # pandas 里面的条件索引
return train_data return train_data
def load_model(): def load_model():
logger.info(f"读取本地模型") logger.info(f"读取本地模型")
model = xgb.XGBModel() model = xgb.XGBModel()
@ -42,22 +46,25 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame):
year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt'] year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']
all_data.loc[na_index, 'GarageYrBlt'] = None all_data.loc[na_index, 'GarageYrBlt'] = None
all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True) all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True)
cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"]
for col in cols1:
all_data[col].fillna("None",inplace=True)
cols2=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"] cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual",
"BsmtFinType2", "BsmtFinType1", "MasVnrType"]
for col in cols1:
all_data[col].fillna("None", inplace=True)
cols2 = ["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
for col in cols2: for col in cols2:
all_data[col] = all_data[col].astype(float) all_data[col] = all_data[col].astype(float)
all_data[col].fillna(0, inplace=True) all_data[col].fillna(0, inplace=True)
all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]),inplace=True) all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]), inplace=True)
cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"] cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual",
"SaleType", "Exterior1st", "Exterior2nd"]
for col in cols3: for col in cols3:
all_data[col].fillna(all_data[col].mode()[0], inplace=True) all_data[col].fillna(all_data[col].mode()[0], inplace=True)
numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if x != 'Id' and x != 'SalePrice'] numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if
x != 'Id' and x != 'SalePrice']
object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()] object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()]
for col in numeric_cols: for col in numeric_cols:
@ -67,6 +74,7 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame):
dataset = pd.get_dummies(all_data, columns=object_cols) dataset = pd.get_dummies(all_data, columns=object_cols)
return dataset return dataset
def build_dataset(dataset): def build_dataset(dataset):
dataset.SalePrice = np.log1p(dataset.SalePrice) dataset.SalePrice = np.log1p(dataset.SalePrice)
train = dataset[~dataset.SalePrice.isna()].copy() train = dataset[~dataset.SalePrice.isna()].copy()
@ -84,11 +92,12 @@ def build_dataset(dataset):
def build_model(dtrain, dvalid, watchlist, num_iter=5000, early_stop=200, **params): def build_model(dtrain, dvalid, watchlist, num_iter=5000, early_stop=200, **params):
logger.info('开始本地建模') logger.info('开始本地建模')
model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop, verbose_eval=True) model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop,
verbose_eval=True)
return model return model
def predict(data:pd.DataFrame, model, feature_cols): def predict(data: pd.DataFrame, model, feature_cols):
dtest = xgb.DMatrix(data[feature_cols]) dtest = xgb.DMatrix(data[feature_cols])
result = np.expm1(model.predict(dtest)) result = np.expm1(model.predict(dtest))
data['SalePrice'] = result data['SalePrice'] = result
@ -118,7 +127,7 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop
raise Exception("test data is None", extra_train_data) raise Exception("test data is None", extra_train_data)
train_data = load_data() train_data = load_data()
datasets = preprocessing(train_data, test_data) datasets = preprocessing(train_data, test_data)
_, _, _,feature_cols = build_dataset(datasets) _, _, _, feature_cols = build_dataset(datasets)
model = load_model() model = load_model()
dtest = datasets[datasets.SalePrice.isna()].copy() dtest = datasets[datasets.SalePrice.isna()].copy()
result = np.expm1(model.predict(dtest[feature_cols])) result = np.expm1(model.predict(dtest[feature_cols]))
@ -127,4 +136,4 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop
if __name__ == '__main__': if __name__ == '__main__':
pass pass

View File

@ -1,4 +1,5 @@
import os import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
@ -12,6 +13,7 @@ loss = []
jiance = [] jiance = []
current_path = os.path.dirname(__file__) current_path = os.path.dirname(__file__)
def readData(filePath): def readData(filePath):
data = pd.read_csv(filePath, header=0) data = pd.read_csv(filePath, header=0)
x = data.iloc[:len(data), 5:12].values x = data.iloc[:len(data), 5:12].values
@ -19,11 +21,13 @@ def readData(filePath):
return x, y return x, y
# 标准化函数 # 标准化函数
def Z_ScoreNormalization(x, mean, sigma): def Z_ScoreNormalization(x, mean, sigma):
x = (x-mean)/sigma x = (x - mean) / sigma
return x return x
# 特征标准化 # 特征标准化
def featureScore(x): def featureScore(x):
for i in range(7): for i in range(7):
@ -34,6 +38,7 @@ def featureScore(x):
return x return x
# 划分数据集 # 划分数据集
def dataDivision(x, y, train_scale, val_scale): def dataDivision(x, y, train_scale, val_scale):
train_volumn = int(len(x) * train_scale) train_volumn = int(len(x) * train_scale)
@ -41,17 +46,18 @@ def dataDivision(x, y, train_scale, val_scale):
test_volumn = len(x) - train_volumn - val_volumn test_volumn = len(x) - train_volumn - val_volumn
x_train = x[:train_volumn, :] x_train = x[:train_volumn, :]
x_val = x[train_volumn:train_volumn+val_volumn, :] x_val = x[train_volumn:train_volumn + val_volumn, :]
x_test = x[-test_volumn:, :] x_test = x[-test_volumn:, :]
y_train = y[:train_volumn] y_train = y[:train_volumn]
y_val = y[train_volumn:train_volumn+val_volumn] y_val = y[train_volumn:train_volumn + val_volumn]
y_test = y[-test_volumn:] y_test = y[-test_volumn:]
print(len(x_train), len(y_train), len(x_val), len(y_val)) print(len(x_train), len(y_train), len(x_val), len(y_val))
return x_train, x_val, x_test, y_train, y_val, y_test return x_train, x_val, x_test, y_train, y_val, y_test
# 创建模型 # 创建模型
def createModel(neure, activation, learning_rate, loss): def createModel(neure, activation, learning_rate, loss):
model = tf.keras.models.Sequential([ model = tf.keras.models.Sequential([
@ -63,21 +69,25 @@ def createModel(neure, activation, learning_rate, loss):
return model return model
# 训练模型 # 训练模型
def trainModel(model, x_train, y_train, x_val, y_val, epochs): def trainModel(model, x_train, y_train, x_val, y_val, epochs):
history = LossHistory() history = LossHistory()
model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1, callbacks=[history]) model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1,
callbacks=[history])
model.summary() model.summary()
return model return model
# 模型预测 # 模型预测
def predictModel(x_test, model): def predictModel(x_test, model):
predicted_data = model.predict(x_test) predicted_data = model.predict(x_test)
return predicted_data return predicted_data
# 训练 # 训练
def train(csv_file, neure, activation, learning_rate, loss, epochs): def train(csv_file, neure, activation, learning_rate, loss, epochs):
x, y = readData(csv_file) x, y = readData(csv_file)
@ -98,21 +108,25 @@ def train(csv_file, neure, activation, learning_rate, loss, epochs):
# model.save(os.path.abspath("./appweb/self_model/ocean_wave_mlp.h5")) # model.save(os.path.abspath("./appweb/self_model/ocean_wave_mlp.h5"))
return model, mse, mae, r return model, mse, mae, r
def drawLoss(logs): def drawLoss(logs):
loss.append(logs['loss']) loss.append(logs['loss'])
print(loss[-1]) print(loss[-1])
def load_model(): def load_model():
logger.info(f"{current_path}/pretrain_models/ocean_wave_mlp.h5") logger.info(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
return keras_load_model(f"{current_path}/pretrain_models/ocean_wave_mlp.h5") return keras_load_model(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
class LossHistory(tf.keras.callbacks.Callback): class LossHistory(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}): def on_epoch_end(self, epoch, logs={}):
# print(logs['loss']) # print(logs['loss'])
drawLoss(logs) drawLoss(logs)
# return logs['loss'], logs['mae'] # return logs['loss'], logs['mae']
def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:str, epochs:int, x_test:list):
def predict_wave_height(csv_file, num_units: int, activation: str, lr: float, loss: str, epochs: int, x_test: [np.ndarray, list]):
"""_summary_ """_summary_
Args: Args:
@ -130,4 +144,4 @@ def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:
model = train(csv_file, num_units, activation, lr, loss, epochs) model = train(csv_file, num_units, activation, lr, loss, epochs)
result = predictModel(x_test, model) result = predictModel(x_test, model)
logger.info(result) logger.info(result)
return result[0][0] return result[0][0]

View File

@ -4,7 +4,8 @@ import datetime as dt
from get_holiday_cn.client import getHoliday from get_holiday_cn.client import getHoliday
from logzero import logger from logzero import logger
def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
def run_prophet(data: pd.DataFrame, period: int = 1, freq: str = 'D'):
"""_summary_ """_summary_
Args: Args:
@ -18,13 +19,13 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
""" """
assert period > 0 assert period > 0
assert 'ds' in data.columns and 'y' in data.columns assert 'ds' in data.columns and 'y' in data.columns
try: try:
data.ds = pd.to_datetime(data.ds) data.ds = pd.to_datetime(data.ds)
except Exception as e: except Exception as e:
return e return e
holiday_data = build_holiday(data.ds.min(), data.ds.max()) holiday_data = build_holiday(data.ds.min(), data.ds.max())
train_data = data.copy() train_data = data.copy()
model=Prophet( model = Prophet(
growth="linear", growth="linear",
yearly_seasonality=True, yearly_seasonality=True,
weekly_seasonality=True, weekly_seasonality=True,
@ -32,15 +33,15 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
seasonality_mode="multiplicative", seasonality_mode="multiplicative",
seasonality_prior_scale=12, seasonality_prior_scale=12,
holidays=holiday_data, holidays=holiday_data,
n_changepoints= 100, # change points num, default=25 n_changepoints=100, # change points num, default=25
) )
model.fit(train_data) model.fit(train_data)
future = model.make_future_dataframe(periods=period, freq=freq, include_history=True) future = model.make_future_dataframe(periods=period, freq=freq, include_history=True)
forecast=model.predict(future) forecast = model.predict(future)
return forecast return forecast
def get_date_type(date:str, holiday_client:getHoliday): def get_date_type(date: str, holiday_client: getHoliday):
"""一个判断某个日期是哪种假期的类 """一个判断某个日期是哪种假期的类
Args: Args:
@ -58,7 +59,7 @@ def get_date_type(date:str, holiday_client:getHoliday):
return rst.get('holiday').get('name') return rst.get('holiday').get('name')
def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"): def build_holiday(start_date: str = "2015-01-01", end_date: str = "2021-12-31"):
"""基于起止日期,将该时间段内的国内假期都找出来,包括本应该放假但是最后调休上班的 """基于起止日期,将该时间段内的国内假期都找出来,包括本应该放假但是最后调休上班的
Args: Args:
@ -72,10 +73,10 @@ def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"):
ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d')) ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d'))
client = getHoliday() client = getHoliday()
ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client)) ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client))
special_date = ds_list[ds_list.day_type != 'simple'].copy() special_date = ds_list[ds_list.day_type != 'simple'].copy()
special_date.columns = ['ds', 'holiday'] special_date.columns = ['ds', 'holiday']
return special_date return special_date
if __name__ == '__main__': if __name__ == '__main__':
pass pass

99
run.py
View File

@ -9,11 +9,13 @@ from house_price.house_price_predcition import run_boston_price
from ocean_wave.wave_height_mlp import predict_wave_height from ocean_wave.wave_height_mlp import predict_wave_height
from prophet_predict.prophet_predict import run_prophet from prophet_predict.prophet_predict import run_prophet
TEXT = "text"
app = Flask(__name__) app = Flask(__name__)
@app.route('/house_price', methods=["POST"]) @app.route('/house_price', methods=["POST"])
def predict_price(): def predict_price():
if request.method=='POST': resp_info = dict()
if request.method == 'POST':
eta = request.form.get('eta', 0.05) eta = request.form.get('eta', 0.05)
max_depth = request.form.get('max_depth', 10) max_depth = request.form.get('max_depth', 10)
subsample = request.form.get('subsample', 0.7) subsample = request.form.get('subsample', 0.7)
@ -22,7 +24,6 @@ def predict_price():
early_stopping_rounds = int(request.form.get('early_stopping_rounds', 200)) early_stopping_rounds = int(request.form.get('early_stopping_rounds', 200))
train_data = request.files.get('train_data', None) train_data = request.files.get('train_data', None)
test_data = request.files.get('test_data', None) test_data = request.files.get('test_data', None)
resp = make_response()
logger.info(train_data) logger.info(train_data)
params = { params = {
"eta": float(eta), "eta": float(eta),
@ -31,34 +32,36 @@ def predict_price():
"cosample_bytree": float(cosample_bytree) "cosample_bytree": float(cosample_bytree)
} }
if not train_data: if not train_data:
train_data = None train_data = None
else: else:
train_data = pd.read_csv(train_data) train_data = pd.read_csv(train_data)
if test_data is None: if test_data is None or pd.read_csv(test_data).shape[0] == 0:
resp.status_code = '406' resp_info["msg"] = "测试数据为空"
resp.response = json.dumps({"text": "test data is None"}) resp_info["code"] = 406
test_data = pd.read_csv(test_data) else:
try: test_data = pd.read_csv(test_data)
if train_data is None: try:
rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params) if train_data is None:
rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params)
else:
rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params)
except Exception as e:
logger.error(f"Error: {e}")
resp_info["msg"] = str(e)
resp_info["code"] = 406
else: else:
rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params) resp_info["code"] = 200
except Exception as e: resp_info["data"] = rst.to_csv()
logger.error(f"Error: {e}") resp_info["dtype"] = "csv"
resp.status_code = '406' resp = make_response(json.dumps(resp_info))
resp.response = json.dumps({'text': str(e)}) resp.status_code = 200
return resp return resp
resp.status_code=200
resp.response = json.dumps({"Id": rst["Id"].values.tolist(), "price": rst["SalePrice"].values.tolist()})
return resp
else:
resp.status_code=405
return resp
@app.route('/ocean_wave_height', methods=["POST"]) @app.route('/ocean_wave_height', methods=["POST"])
def predict_height(): def predict_height():
if request.method=='POST': resp_info = dict()
if request.method == 'POST':
num_units = int(request.form.get('num_units', 8)) num_units = int(request.form.get('num_units', 8))
activation = request.form.get('activation', 'relu') activation = request.form.get('activation', 'relu')
lr = float(request.form.get('learning_rate', 0.01)) lr = float(request.form.get('learning_rate', 0.01))
@ -75,35 +78,34 @@ def predict_height():
x_test = [WVHT_1, WDIR_1, WSPD_1, WDIR_2, WSPD_2, WDIR, WSPD] x_test = [WVHT_1, WDIR_1, WSPD_1, WDIR_2, WSPD_2, WDIR, WSPD]
x_test = np.array([x_test]) x_test = np.array([x_test])
logger.info(f"test data: {x_test}") logger.info(f"test data: {x_test}")
resp = make_response()
if not train_data: if not train_data:
train_data = None train_data = None
else: else:
try: try:
train_data = pd.read_csv(train_data) train_data = pd.read_csv(train_data)
except Exception as e: except Exception as e:
logger.error(f"Error: {e}") logger.error(f"Error: {e}")
resp.status_code = '406' resp_info["msg"] = str(e)
resp.response = json.dumps({'text': str(e)}) resp_info["code"] = 406
return resp train_data = None
try: try:
rst = predict_wave_height(train_data, num_units, activation, lr, loss, epochs, x_test) rst = predict_wave_height(train_data, num_units, activation, lr, loss, epochs, x_test)
except Exception as e: except Exception as e:
logger.error(f"Error: {e}") logger.error(f"Error: {e}")
resp.status_code = '406' resp_info["msg"] = "上传数据不符合海浪高度预测的规定文件示例,请检查"
resp.response = json.dumps({'text': "上传数据不符合海浪高度预测的规定文件示例,请检查"}) resp_info["code"] = 406
return resp else:
resp.status_code=200 resp_info["code"] = 200
resp.response = json.dumps({"result": str(rst)}) resp_info["data"] = rst
return resp resp_info["dtype"] = TEXT
else: resp = make_response(json.dumps(resp_info))
resp.status_code=405 resp.status_code = 200
return resp return resp
@app.route("/prophet/", methods=["POST"]) @app.route("/prophet/", methods=["POST"])
def run_ts_predict(): def run_ts_predict():
resp = make_response() resp_info = dict()
if request.method == "POST": if request.method == "POST":
data_file = request.files.get("data") data_file = request.files.get("data")
freq = request.form.get('freq') freq = request.form.get('freq')
@ -117,20 +119,19 @@ def run_ts_predict():
logger.info(rest.columns) logger.info(rest.columns)
rest['ds'] = rest['ds'].apply(str) rest['ds'] = rest['ds'].apply(str)
rest['yhat'] = rest['yhat'].apply(str) rest['yhat'] = rest['yhat'].apply(str)
resp.data = json.dumps({'ds':rest.ds.values.tolist(), 'yhat':rest.yhat.values.tolist()})
resp.status_code = 200
return resp
except Exception as e: except Exception as e:
logger.error(f"Error: {e}") logger.error(f"Error: {e}")
resp.status_code = '406' resp_info["msg"] = str(e)
resp.response = json.dumps({'text': str(e)}) resp_info["code"] = 406
return resp else:
else: resp_info["code"] = 200
resp.status_code=405 resp_info["data"] = rest.to_csv()
return resp resp_info["dtype"] = "csv"
resp = make_response(json.dumps(resp_info))
resp.status_code = 200
return resp
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=8901, debug=True) app.run(host='0.0.0.0', port=8901, debug=True)