1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
| from scipy.interpolate import interp1d from sklearn.metrics import mean_squared_error
df.interpolate(method='cubicspline')
def fill_matrix(df,target_col,origin_df=None): fig, axes = plt.subplots(7, 1, sharex=True, figsize=(10, 12)) plt.rcParams.update({'xtick.bottom' : False})
df.plot(title='Actual', ax=axes[0], label='Actual', color='green', style=".-") axes[0].legend(["Missing Data", "Available Data"]) df_ffill = df.ffill() df_ffill[target_col].plot(title='Forward Fill', ax=axes[1], label='Forward Fill', style=".-") df_bfill = df.bfill() df_bfill[target_col].plot(title="Backward Fill", ax=axes[2], label='Back Fill', color='firebrick', style=".-") df['rownum'] = np.arange(df.shape[0]) df_nona = df.dropna(subset = [target_col]) f = interp1d(df_nona['rownum'], df_nona[target_col]) df['linear_fill'] = f(df['rownum']) df['linear_fill'].plot(title="Linear Fill", ax=axes[3], label='Linear Fill', color='brown', style=".-")
f2 = interp1d(df_nona['rownum'], df_nona[target_col], kind='cubic') df['cubic_fill'] = f2(df['rownum']) df['cubic_fill'].plot(title="Cubic Fill", ax=axes[4], label='Cubic Fill', color='red', style=".-")
def knn_mean(ts, n): out = np.copy(ts) for i, val in enumerate(ts): if np.isnan(val): n_by_2 = np.ceil(n/2) lower = np.max([0, int(i-n_by_2)]) upper = np.min([len(ts)+1, int(i+n_by_2)]) ts_near = np.concatenate([ts[lower:i], ts[i:upper]]) out[i] = np.nanmean(ts_near) return out df['knn_mean'] = knn_mean(df[target_col].values, 8) df['knn_mean'].plot(title="KNN Mean", ax=axes[5], label='KNN Mean', color='tomato', alpha=0.5, style=".-")
def seasonal_mean(ts, n, lr=0.7): out = np.copy(ts) for i, val in enumerate(ts): if np.isnan(val): ts_seas = ts[i-1::-n] if np.isnan(np.nanmean(ts_seas)): ts_seas = np.concatenate([ts[i-1::-n], ts[i::n]]) out[i] = np.nanmean(ts_seas) * lr return out
df['seasonal_mean'] = seasonal_mean(df[target_col], n=12, lr=1.25) df['seasonal_mean'].plot(title="Seasonal Mean", ax=axes[6], label='Seasonal Mean', color='blue', alpha=0.5, style=".-") plt.show()
def GM11(x0): import numpy as np x1 = x0.cumsum() z1 = (x1[:len(x1)-1] + x1[1:])/2.0 z1 = z1.reshape((len(z1),1)) B = np.append(-z1, np.ones_like(z1), axis = 1) Yn = x0[1:].reshape((len(x0)-1, 1)) [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)])) C = delta.std()/x0.std() P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0) return f, a, b, x0[0], C, P index = pd.date_range('2020-03-09','2021-02-01', freq='B') t_df = pd.DataFrame(range(len(index)),index=index,columns=['target']) t_df = t_df.resample('D').asfreq() fill_matrix(t_df,'target')
|