1 Markov model
1.1 Preparation and definition
Import modules
1
2
3
4
5
6
7
8
9
10
11
12import numpy as np
import pandas as pd
from numpy import linalg as la
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.metrics import mean_squared_error
# Plot module
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
# Microsoft YaHei, Times New Roman
plt.rcParams['axes.unicode_minus'] = False
Plot results
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30def plot_result(test_Y, pred_Y, Title):
ylim_min = np.round(test_Y.min()) - 1
ylim_max = np.round(test_Y.max()) + 1
# plot comparison results
fig = plt.figure(figsize = (8, 6))
ax0 = fig.add_subplot(111)
ax0.scatter(test_Y, pred_Y)
ax0.plot([ylim_min, ylim_max], [ylim_min, ylim_max], '--k')
ax0.set_xlabel('True Target', fontsize = 15)
ax0.set_ylabel('Target predicted', fontsize = 15)
ax0.set_title(Title , fontsize = 16)
ax0.text(ylim_min + 2, ylim_max - 4, r'$R^2$ = %.2f, $MAE$ = %.2f' %(r2_score(test_Y, pred_Y),
mean_absolute_error(test_Y, pred_Y)), fontsize = 14)
ax0.text(ylim_min + 2, ylim_max - 5, r'$MSE$ = %.2f, $RMSE$ = %.2f' %(mean_squared_error(test_Y, pred_Y),
mean_squared_error(test_Y, pred_Y, squared=False)), fontsize = 14)
ax0.set_xlim([ylim_min, ylim_max])
ax0.set_ylim([ylim_min, ylim_max])
plt.show()
# Plot prediction and real values
plt.figure(figsize = (8, 6)) # length x width
plt.plot(pred_Y, 'r', label='prediction', lw = 0.8)
plt.plot(test_Y, 'b', label='real', lw = 0.8)
plt.xticks(range(19), fontsize = 15)
plt.yticks(fontsize = 15)
plt.title(Title, fontsize = 16)
plt.legend(loc='best', fontsize = 15)
plt.show()绘制结果图(可选,可通过调整
plot_b
的值来选择是否绘图),包含两个:Fig. 1-1 Trend figure of Coal
Fig. 1-2 Comparison figure of Coal
Calculate loss
1
2
3
4
5
6
7
8
9def calculate_error(test_data, np_Pred_results, title):
# Storing error results
R2 = r2_score(test_data, np_Pred_results)
MAE = mean_absolute_error(test_data, np_Pred_results)
MSE = mean_squared_error(test_data, np_Pred_results)
RMSE = mean_squared_error(test_data, np_Pred_results, squared=False)
error = [title, R2, MAE, MSE, RMSE]
return errorCalculate the loss: R2, MAE, MSE, RMSE
Definition of Markov model
The first step: calculate and return transfer matrix (type: np.array)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31def Markov_trans(data_stru, year_start, year_end):
P = np.identity(4)
K = np.array([1.0, 1., 1., 1.])
year_start_data = data_stru.loc[year_start, :]
year_end_data = data_stru.loc[year_end,:]
bool_value = np.array(year_end_data < year_start_data)
i = 0 # index
for b in bool_value:
if b == False: # Decrease
K[i] = year_end_data[i] - year_start_data[i]
elif b == True: # Increase
P[i, i] = year_end_data[i]/year_start_data[i]
K[i] = 0
i += 1
KK = sum(K)
for j in range(4):
for n in range(4):
if (j != n) & (K[j] == 0):
P[j,n] = (1-P[j,j])*K[n]/KK
# v 为特征值 Q 为特征向量
P = np.round(P, 3)
v, Q = la.eig(P)
# diag_P = np.round(np.dot(np.dot(la.inv(Q), P), Q), 3)
V = np.diag(v)**((1)/(year_end-year_start))
Predict_P = np.round(np.dot(np.dot(Q, V), la.inv(Q)), 3)
return Predict_PThe second step: calculate and return forecasting result (type: np.array)
1
2
3
4
5
6
7
8
9
10def Markov_predict(data_stru, P, year_end, year_pred):
year_end_data = data_stru.loc[year_end,:]
P = np.round(P, 3)
v, Q = la.eig(P)
# diag_P = np.round(np.dot(np.dot(la.inv(Q), P), Q), 3)
V = np.diag(v)**(year_pred - year_end)
Predict_P = np.round(np.dot(np.dot(Q, V), la.inv(Q)), 3)
np_year_end_data =np.array(year_end_data)
Predict_energy_stru = np.around(np.dot(np_year_end_data, Predict_P), 3)
return Predict_energy_stru
1.2 Preparation
Read data file
Data.xlsx
文件需要根程序文件在同一目录1
2
3
4
5
6
7
8
9
10
11
12
13# Read datafile
data_stru = pd.read_excel('./Data.xlsx', header = 0,
sheet_name = 0, index_col= 0)
num_year = len(data_stru) # the length of the dataset
train_percent = 0.7 # Set the first 70% data as train set
Error_results = [] # Store all error results
plot_b = False # Choose whether plot result
# Dataset start with 1953, end with 2017
year_train_start, year_end = 1953, 2018
year_train_end = year_train_start + round(len(data_stru) * 0.7) # = 1999
test_data = np.array(data_stru.loc[year_train_end:, :]) # from 1999 to 2017
1.3 Model application
Avarage trans_P
Model test based on avarage trans_P
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34trans_P = [] # store all transfer matric
for startY in range(year_train_start, year_train_end - 1):
for endY in range(startY, year_train_end):
trans_temp = Markov_trans(year_start = year_train_start,
year_end = year_train_end, data_stru = data_stru)
temp = trans_temp.tolist()
trans_P.append(temp)
np_trans_P = np.array(trans_P) # Transfer list into numpy
av_trans_P = np.sum(np_trans_P, 0)/np_trans_P.shape[0] # Calculate avarage
Pred_results = [] # Using average transfer matrix to predict energy structure
for predY in range(year_train_end, year_end):
# Forecasting
Pred_temp = Markov_predict(data_stru = data_stru, P = av_trans_P,
year_end = year_train_end - 1, year_pred = predY)
temp = Pred_temp.tolist() # Turn numpy.array into list
Pred_results.append(temp) # It's convenient to append value using list
np_Pred_results = np.array(Pred_results) # Turn list into np.array
Columns = data_stru.columns
# Plot forecasting result if needed
if plot_b == True:
for i in range(4):
test_Y, pred_Y = test_data[:, i], np_Pred_results[:, i]
Title = '{} based on average transfer matrix to predict:{}'.format(regression,
Columns[i])
plot_result(test_Y, pred_Y, Title)
# Storing error results
title = 'average transfer matrix'
# Calculate 4 error
error = calculate_error(test_data, np_Pred_results, title)
Error_results.append(error)One step and 46 step
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24for year_train_start in [1953, 1998]: # from '1953 to 1999' or '1998 to 1999'
year_train_end, year_end = 1999, 2018
trans_temp = Markov_trans(year_start = year_train_start,
year_end = year_train_end, data_stru = data_stru)
Pred_results = []
for predY in range(year_train_end, year_end):
Pred_temp = Markov_predict(data_stru = data_stru, P = trans_temp,
year_end = year_train_end - 1, year_pred = predY)
temp = Pred_temp.tolist()
Pred_results.append(temp)
np_Pred_results = np.array(Pred_results)
Columns = data_stru.columns
if plot_b == True:
for i in range(4):
test_Y, pred_Y = test_data[:, i], np_Pred_results[:, i]
Title = '{} based on {} and {} to predict:{}'.format(regression,
year_train_start, year_train_end, Columns[i])
plot_result(test_Y, pred_Y, Title)
# Storing error results
title = '{} step'.format(year_train_end - year_train_start)
error = calculate_error(test_data, np_Pred_results, title)
Error_results.append(error)
1.4 Export result to excel
1 | np_error = np.array(Error_results) |
1.5 All codes
1 | # Markov model for energy structure prediction# |
2 Traditional models
- Ridge
- Lasso
- PLS
- MLP
- Linear SVM
- DecisionTree
不同之处在于104 行开始的模型定义不同
1 | # ========================= Import modules ==================================== |