148 lines
4.0 KiB
Python
148 lines
4.0 KiB
Python
import os
|
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
|
import tensorflow as tf
|
|
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
|
from scipy.stats import pearsonr
|
|
from tensorflow.keras.models import load_model as keras_load_model
|
|
from logzero import logger
|
|
|
|
loss = []
|
|
jiance = []
|
|
current_path = os.path.dirname(__file__)
|
|
|
|
|
|
def readData(filePath):
|
|
data = pd.read_csv(filePath, header=0)
|
|
x = data.iloc[:len(data), 5:12].values
|
|
y = data.iloc[:len(data), 12].values
|
|
|
|
return x, y
|
|
|
|
|
|
# 标准化函数
|
|
def Z_ScoreNormalization(x, mean, sigma):
|
|
x = (x - mean) / sigma
|
|
return x
|
|
|
|
|
|
# 特征标准化
|
|
def featureScore(x):
|
|
for i in range(7):
|
|
mean = np.average(x[:, i])
|
|
sigma = np.std(x[:, i])
|
|
for j in range(76967):
|
|
x[j, i] = Z_ScoreNormalization(x[j, i], mean, sigma)
|
|
|
|
return x
|
|
|
|
|
|
# 划分数据集
|
|
def dataDivision(x, y, train_scale, val_scale):
|
|
train_volumn = int(len(x) * train_scale)
|
|
val_volumn = int(len(x) * val_scale)
|
|
test_volumn = len(x) - train_volumn - val_volumn
|
|
|
|
x_train = x[:train_volumn, :]
|
|
x_val = x[train_volumn:train_volumn + val_volumn, :]
|
|
x_test = x[-test_volumn:, :]
|
|
|
|
y_train = y[:train_volumn]
|
|
y_val = y[train_volumn:train_volumn + val_volumn]
|
|
y_test = y[-test_volumn:]
|
|
|
|
print(len(x_train), len(y_train), len(x_val), len(y_val))
|
|
|
|
return x_train, x_val, x_test, y_train, y_val, y_test
|
|
|
|
|
|
# 创建模型
|
|
def createModel(neure, activation, learning_rate, loss):
|
|
model = tf.keras.models.Sequential([
|
|
tf.keras.layers.Dense(neure, activation=activation, input_shape=(7,)),
|
|
tf.keras.layers.Dense(1)
|
|
])
|
|
|
|
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=loss, metrics=['mae'])
|
|
|
|
return model
|
|
|
|
|
|
# 训练模型
|
|
def trainModel(model, x_train, y_train, x_val, y_val, epochs):
|
|
history = LossHistory()
|
|
|
|
model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), validation_freq=1,
|
|
callbacks=[history])
|
|
|
|
model.summary()
|
|
|
|
return model
|
|
|
|
|
|
# 模型预测
|
|
def predictModel(x_test, model):
|
|
predicted_data = model.predict(x_test)
|
|
return predicted_data
|
|
|
|
|
|
# 训练
|
|
def train(csv_file, neure, activation, learning_rate, loss, epochs):
|
|
x, y = readData(csv_file)
|
|
x_train, x_val, x_test, y_train, y_val, y_test = dataDivision(x, y, 0.64, 0.16)
|
|
model = createModel(neure, activation, learning_rate, loss)
|
|
model = trainModel(model, x_train, y_train, x_val, y_val, epochs)
|
|
predicted_data = predictModel(x_test, model)
|
|
predicted_wave_height = []
|
|
for i in predicted_data:
|
|
for j in i:
|
|
predicted_wave_height.append(j)
|
|
|
|
# 评估模型
|
|
mse = mean_squared_error(predicted_wave_height, y_test)
|
|
mae = mean_absolute_error(predicted_wave_height, y_test)
|
|
r, p = pearsonr(y_test, predicted_wave_height)
|
|
|
|
# model.save(os.path.abspath("./appweb/self_model/ocean_wave_mlp.h5"))
|
|
return model, mse, mae, r
|
|
|
|
|
|
def drawLoss(logs):
|
|
loss.append(logs['loss'])
|
|
print(loss[-1])
|
|
|
|
|
|
def load_model():
|
|
logger.info(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
|
|
return keras_load_model(f"{current_path}/pretrain_models/ocean_wave_mlp.h5")
|
|
|
|
|
|
class LossHistory(tf.keras.callbacks.Callback):
|
|
def on_epoch_end(self, epoch, logs={}):
|
|
# print(logs['loss'])
|
|
drawLoss(logs)
|
|
# return logs['loss'], logs['mae']
|
|
|
|
|
|
def predict_wave_height(csv_file, num_units: int, activation: str, lr: float, loss: str, epochs: int, x_test: [np.ndarray, list]):
|
|
"""_summary_
|
|
|
|
Args:
|
|
csv_file (pd.DataFrame): 训练文件
|
|
num_units (int): 神经元个数
|
|
activation (str): 激活函数 relu tanh
|
|
learning_rate (float): 学习率
|
|
loss (_type_): 损失函数 mae/mse
|
|
epochs (int): 迭代轮数
|
|
x_test (np.ndarray): 测试用例
|
|
"""
|
|
if csv_file is None:
|
|
model = load_model()
|
|
else:
|
|
model = train(csv_file, num_units, activation, lr, loss, epochs)
|
|
result = predictModel(x_test, model)
|
|
logger.info(result)
|
|
return result[0][0]
|