修改response

This commit is contained in:
zhaojinghao 2022-12-08 16:22:29 +08:00
parent 365c194e82
commit 2e64b83e0d
4 changed files with 103 additions and 78 deletions

View File

@ -5,15 +5,19 @@ import numpy as np
from sklearn.model_selection import train_test_split
from logzero import logger
import os
current_path = os.path.dirname(__file__)
def load_data():
logger.info(f"读取本地数据")
logger.info(current_path)
train_data = pd.read_csv(f'{current_path}/data/train.csv')
train_data.drop(train_data[(train_data["GrLivArea"]>4000)&(train_data["SalePrice"]<300000)].index,inplace=True)#pandas 里面的条件索引
train_data.drop(train_data[(train_data["GrLivArea"] > 4000) & (train_data["SalePrice"] < 300000)].index,
inplace=True) # pandas 里面的条件索引
return train_data
def load_model():
logger.info(f"读取本地模型")
model = xgb.XGBModel()
@ -42,22 +46,25 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame):
year_cols = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']
all_data.loc[na_index, 'GarageYrBlt'] = None
all_data.GarageYrBlt.fillna(all_data.YearBuilt, inplace=True)
cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"]
for col in cols1:
all_data[col].fillna("None",inplace=True)
cols2=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
cols1 = ["GarageQual", "GarageCond", "GarageFinish", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual",
"BsmtFinType2", "BsmtFinType1", "MasVnrType"]
for col in cols1:
all_data[col].fillna("None", inplace=True)
cols2 = ["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
for col in cols2:
all_data[col] = all_data[col].astype(float)
all_data[col].fillna(0, inplace=True)
all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]),inplace=True)
all_data["LotFrontage"].fillna(np.mean(all_data["LotFrontage"]), inplace=True)
cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"]
cols3 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual",
"SaleType", "Exterior1st", "Exterior2nd"]
for col in cols3:
all_data[col].fillna(all_data[col].mode()[0], inplace=True)
numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if x != 'Id' and x != 'SalePrice']
numeric_cols = [x for x in all_data.select_dtypes(exclude=['object']).columns.tolist() if
x != 'Id' and x != 'SalePrice']
object_cols = [x for x in all_data.select_dtypes(include=['object']).columns.tolist()]
for col in numeric_cols:
@ -67,6 +74,7 @@ def preprocessing(local_train: pd.DataFrame, new_data: pd.DataFrame):
dataset = pd.get_dummies(all_data, columns=object_cols)
return dataset
def build_dataset(dataset):
dataset.SalePrice = np.log1p(dataset.SalePrice)
train = dataset[~dataset.SalePrice.isna()].copy()
@ -84,11 +92,12 @@ def build_dataset(dataset):
def build_model(dtrain, dvalid, watchlist, num_iter=5000, early_stop=200, **params):
logger.info('开始本地建模')
model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop, verbose_eval=True)
model = xgb.train(params, dtrain, evals=watchlist, num_boost_round=num_iter, early_stopping_rounds=early_stop,
verbose_eval=True)
return model
def predict(data:pd.DataFrame, model, feature_cols):
def predict(data: pd.DataFrame, model, feature_cols):
dtest = xgb.DMatrix(data[feature_cols])
result = np.expm1(model.predict(dtest))
data['SalePrice'] = result
@ -118,7 +127,7 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop
raise Exception("test data is None", extra_train_data)
train_data = load_data()
datasets = preprocessing(train_data, test_data)
_, _, _,feature_cols = build_dataset(datasets)
_, _, _, feature_cols = build_dataset(datasets)
model = load_model()
dtest = datasets[datasets.SalePrice.isna()].copy()
result = np.expm1(model.predict(dtest[feature_cols]))
@ -127,4 +136,4 @@ def run_boston_price(test_data, extra_train_data=None, num_iter=5000, early_stop
if __name__ == '__main__':
pass
pass

View File

@ -1,4 +1,5 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
@ -12,6 +13,7 @@ loss = []
jiance = []
current_path = os.path.dirname(__file__)
def readData(filePath):
data = pd.read_csv(filePath, header=0)
x = data.iloc[:len(data), 5:12].values
@ -19,11 +21,13 @@ def readData(filePath):
return x, y
# 标准化函数
def Z_ScoreNormalization(x, mean, sigma):
x = (x-mean)/sigma
x = (x - mean) / sigma
return x
# 特征标准化
def featureScore(x):
for i in range(7):
@ -34,6 +38,7 @@ def featureScore(x):
return x
# 划分数据集
def dataDivision(x, y, train_scale, val_scale):
train_volumn = int(len(x) * train_scale)
@ -41,17 +46,18 @@ def dataDivision(x, y, train_scale, val_scale):
test_volumn = len(x) - train_volumn - val_volumn
x_train = x[:train_volumn, :]
x_val = x[train_volumn:train_volumn+val_volumn, :]
x_val = x[train_volumn:train_volumn + val_volumn, :]
x_test = x[-test_volumn:, :]
y_train = y[:train_volumn]
y_val = y[train_volumn:train_volumn+val_volumn]
y_val = y[train_volumn:train_volumn + val_volumn]
y_test = y[-test_volumn:]
print(len(x_train), len(y_train), len(x_val), len(y_val))
return x_train, x_val, x_test, y_train, y_val, y_test
# 创建模型
def createModel(neure, activation, learning_rate, loss):
model = tf.keras.models.Sequential([
@ -63,21 +69,25 @@ def createModel(neure, activation, learning_rate, loss):
return model
# 训练模型
def trainModel(model, x_train, y_train, x_val, y_val, epochs):
history = LossHistory()
model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1, callbacks=[history])
model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1,
callbacks=[history])
model.summary()
return model
# 模型预测
def predictModel(x_test, model):
predicted_data = model.predict(x_test)
return predicted_data
# 训练
def train(csv_file, neure, activation, learning_rate, loss, epochs):
x, y = readData(csv_file)
@ -98,21 +108,25 @@ def train(csv_file, neure, activation, learning_rate, loss, epochs):
# model.save(os.path.abspath("./appweb/self_model/ocean_wave_mlp.h5"))
return model, mse, mae, r
def drawLoss(logs):
loss.append(logs['loss'])
print(loss[-1])
def load_model():
logger.info(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
return keras_load_model(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
class LossHistory(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
# print(logs['loss'])
drawLoss(logs)
# return logs['loss'], logs['mae']
def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:str, epochs:int, x_test:list):
def predict_wave_height(csv_file, num_units: int, activation: str, lr: float, loss: str, epochs: int, x_test: [np.ndarray, list]):
"""_summary_
Args:
@ -130,4 +144,4 @@ def predict_wave_height(csv_file, num_units:int, activation:str, lr:float, loss:
model = train(csv_file, num_units, activation, lr, loss, epochs)
result = predictModel(x_test, model)
logger.info(result)
return result[0][0]
return result[0][0]

View File

@ -4,7 +4,8 @@ import datetime as dt
from get_holiday_cn.client import getHoliday
from logzero import logger
def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
def run_prophet(data: pd.DataFrame, period: int = 1, freq: str = 'D'):
"""_summary_
Args:
@ -18,13 +19,13 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
"""
assert period > 0
assert 'ds' in data.columns and 'y' in data.columns
try:
try:
data.ds = pd.to_datetime(data.ds)
except Exception as e:
return e
holiday_data = build_holiday(data.ds.min(), data.ds.max())
train_data = data.copy()
model=Prophet(
model = Prophet(
growth="linear",
yearly_seasonality=True,
weekly_seasonality=True,
@ -32,15 +33,15 @@ def run_prophet(data: pd.DataFrame, period:int=1, freq:str='D'):
seasonality_mode="multiplicative",
seasonality_prior_scale=12,
holidays=holiday_data,
n_changepoints= 100, # change points num, default=25
)
n_changepoints=100, # change points num, default=25
)
model.fit(train_data)
future = model.make_future_dataframe(periods=period, freq=freq, include_history=True)
forecast=model.predict(future)
forecast = model.predict(future)
return forecast
def get_date_type(date:str, holiday_client:getHoliday):
def get_date_type(date: str, holiday_client: getHoliday):
"""一个判断某个日期是哪种假期的类
Args:
@ -58,7 +59,7 @@ def get_date_type(date:str, holiday_client:getHoliday):
return rst.get('holiday').get('name')
def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"):
def build_holiday(start_date: str = "2015-01-01", end_date: str = "2021-12-31"):
"""基于起止日期,将该时间段内的国内假期都找出来,包括本应该放假但是最后调休上班的
Args:
@ -72,10 +73,10 @@ def build_holiday(start_date:str="2015-01-01", end_date:str="2021-12-31"):
ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d'))
client = getHoliday()
ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client))
special_date = ds_list[ds_list.day_type != 'simple'].copy()
special_date = ds_list[ds_list.day_type != 'simple'].copy()
special_date.columns = ['ds', 'holiday']
return special_date
if __name__ == '__main__':
pass
pass

99
run.py
View File

@ -9,11 +9,13 @@ from house_price.house_price_predcition import run_boston_price
from ocean_wave.wave_height_mlp import predict_wave_height
from prophet_predict.prophet_predict import run_prophet
TEXT = "text"
app = Flask(__name__)
@app.route('/house_price', methods=["POST"])
def predict_price():
if request.method=='POST':
resp_info = dict()
if request.method == 'POST':
eta = request.form.get('eta', 0.05)
max_depth = request.form.get('max_depth', 10)
subsample = request.form.get('subsample', 0.7)
@ -22,7 +24,6 @@ def predict_price():
early_stopping_rounds = int(request.form.get('early_stopping_rounds', 200))
train_data = request.files.get('train_data', None)
test_data = request.files.get('test_data', None)
resp = make_response()
logger.info(train_data)
params = {
"eta": float(eta),
@ -31,34 +32,36 @@ def predict_price():
"cosample_bytree": float(cosample_bytree)
}
if not train_data:
train_data = None
train_data = None
else:
train_data = pd.read_csv(train_data)
if test_data is None:
resp.status_code = '406'
resp.response = json.dumps({"text": "test data is None"})
test_data = pd.read_csv(test_data)
try:
if train_data is None:
rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params)
if test_data is None or pd.read_csv(test_data).shape[0] == 0:
resp_info["msg"] = "测试数据为空"
resp_info["code"] = 406
else:
test_data = pd.read_csv(test_data)
try:
if train_data is None:
rst = run_boston_price(test_data, None, num_boost_round, early_stopping_rounds, **params)
else:
rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params)
except Exception as e:
logger.error(f"Error: {e}")
resp_info["msg"] = str(e)
resp_info["code"] = 406
else:
rst = run_boston_price(test_data, train_data, num_boost_round, early_stopping_rounds, **params)
except Exception as e:
logger.error(f"Error: {e}")
resp.status_code = '406'
resp.response = json.dumps({'text': str(e)})
return resp
resp.status_code=200
resp.response = json.dumps({"Id": rst["Id"].values.tolist(), "price": rst["SalePrice"].values.tolist()})
return resp
else:
resp.status_code=405
return resp
resp_info["code"] = 200
resp_info["data"] = rst.to_csv()
resp_info["dtype"] = "csv"
resp = make_response(json.dumps(resp_info))
resp.status_code = 200
return resp
@app.route('/ocean_wave_height', methods=["POST"])
def predict_height():
if request.method=='POST':
resp_info = dict()
if request.method == 'POST':
num_units = int(request.form.get('num_units', 8))
activation = request.form.get('activation', 'relu')
lr = float(request.form.get('learning_rate', 0.01))
@ -75,35 +78,34 @@ def predict_height():
x_test = [WVHT_1, WDIR_1, WSPD_1, WDIR_2, WSPD_2, WDIR, WSPD]
x_test = np.array([x_test])
logger.info(f"test data: {x_test}")
resp = make_response()
if not train_data:
train_data = None
train_data = None
else:
try:
train_data = pd.read_csv(train_data)
except Exception as e:
logger.error(f"Error: {e}")
resp.status_code = '406'
resp.response = json.dumps({'text': str(e)})
return resp
resp_info["msg"] = str(e)
resp_info["code"] = 406
train_data = None
try:
rst = predict_wave_height(train_data, num_units, activation, lr, loss, epochs, x_test)
except Exception as e:
logger.error(f"Error: {e}")
resp.status_code = '406'
resp.response = json.dumps({'text': "上传数据不符合海浪高度预测的规定文件示例,请检查"})
return resp
resp.status_code=200
resp.response = json.dumps({"result": str(rst)})
return resp
else:
resp.status_code=405
return resp
resp_info["msg"] = "上传数据不符合海浪高度预测的规定文件示例,请检查"
resp_info["code"] = 406
else:
resp_info["code"] = 200
resp_info["data"] = rst
resp_info["dtype"] = TEXT
resp = make_response(json.dumps(resp_info))
resp.status_code = 200
return resp
@app.route("/prophet/", methods=["POST"])
def run_ts_predict():
resp = make_response()
resp_info = dict()
if request.method == "POST":
data_file = request.files.get("data")
freq = request.form.get('freq')
@ -117,20 +119,19 @@ def run_ts_predict():
logger.info(rest.columns)
rest['ds'] = rest['ds'].apply(str)
rest['yhat'] = rest['yhat'].apply(str)
resp.data = json.dumps({'ds':rest.ds.values.tolist(), 'yhat':rest.yhat.values.tolist()})
resp.status_code = 200
return resp
except Exception as e:
logger.error(f"Error: {e}")
resp.status_code = '406'
resp.response = json.dumps({'text': str(e)})
return resp
else:
resp.status_code=405
return resp
resp_info["msg"] = str(e)
resp_info["code"] = 406
else:
resp_info["code"] = 200
resp_info["data"] = rest.to_csv()
resp_info["dtype"] = "csv"
resp = make_response(json.dumps(resp_info))
resp.status_code = 200
return resp
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8901, debug=True)