HOMEWORK 3

# Import library
setwd("C:/Users/asus/desktop")
library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.0-2

library(Metrics)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Task A

In this part, our aim is to create lag168 and lag 48 in order to predict consumption of electricity. After predicting consumption of electricity, we asses accuracy of them. Therefore, mean absolute percentage error is calculated for lag 168 and lag 48

# Read data
setwd("C:/Users/asus/desktop")
all_data = read.table("Tuketim.csv", sep = ",")
names(all_data)[1] = "Date"
names(all_data)[2] = "Hour"
names(all_data)[3] = "Cons"
all_data <- all_data[-1,]
all_data$Cons = as.numeric(gsub(",","",all_data$Cons))*1000

# Create Lag 48 and Lag 168
data <- all_data[169:nrow(all_data),]
data$LAG168 <- all_data$Cons[1:(nrow(all_data)-168)]
data$LAG48  <- all_data$Cons[121:(nrow(all_data)-48)]

test_nov_20 <- data[(42386-169):nrow(data),]

# Calculate mape for Lag 168 and Lag 48
mape_lag168 = mape(test_nov_20$Cons,test_nov_20$LAG168)*100
mape_lag48= mape(test_nov_20$Cons,test_nov_20$LAG48)*100

print(paste("MAPE for Lag168 :",mape_lag168))

## [1] "MAPE for Lag168 : 3.44918848261228"

print(paste("MAPE for Lag 48 :",mape_lag48))

## [1] "MAPE for Lag 48 : 8.06031450907751"

MAPE value for lag168 is equal to 3.5 % and MAPE value for lag48 is equal to 8 % . This means that prediction accuracy is higher when lag148 is used for predicting electricity consumption.

Task B

Our aim is to model certain data which is until 1 Nov. 2020 and then to test data to evaluate the model accuracy. For modelling purposes, Linear regression is used to model data. After modelling certain data, we predict some electiricty consumption belonging to some days which is between 1 Nov 2020 and 01 Dec 2020.

train_data = data[1:42216,]
features_train = train_data[,3:5]

test_data = data[42217:nrow(data),]
features_test = test_data[,3:5]

fit= lm(Cons~.,features_train)
summary(fit)

## 
## Call:
## lm(formula = Cons ~ ., data = features_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25242.2   -984.2     -0.5   1016.9  16102.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.574e+03  8.353e+01   18.84   <2e-16 ***
## LAG168      6.435e-01  3.094e-03  208.03   <2e-16 ***
## LAG48       3.084e-01  3.096e-03   99.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2392 on 42213 degrees of freedom
## Multiple R-squared:  0.7753, Adjusted R-squared:  0.7753 
## F-statistic: 7.281e+04 on 2 and 42213 DF,  p-value: < 2.2e-16

Residual is the actual value - predicted value.When looking at summary statistics of the model, we can realize that some consumption of electricity are predicted lower than actual value which is around 16 MW and some of ones are predicted higher than actual value which is around 25 MW. Three features including intercept are found to be significant according to its p value.

prediction = predict(fit,features_test)

plot(test_data$Cons,prediction, xlab = "Actual value of consumption", ylab = "Predicted value of consuption") 
abline(a=0,b=1,col=2)

mape_prediction = mape(test_data$Cons,prediction)* 100
print(mape_prediction)

## [1] 4.229428

Mape value for test data is around 4 % which means that prediction accuracy is high. In addtion to this, it is possible to make evaluations in terms of a particular consumption point looking at graph which is actual value vs predicted value. In general, data fits to model but some consumption values higher than 40 MW was predicted as lower than actual value by model.

Task C

hours = c("00:00","01:00","02:00","03:00","04:00","05:00","06:00","07:00","08:00","09:00","10:00",
          "11:00","12:00","13:00","14:00","15:00","16:00","17:00","18:00","19:00","20:00","21:00"
          ,"22:00","23:00")

mape_1 = rep(0,24)
summary_1 = matrix(rep(0,72),24,3)

for(i in 1:24){

  train_data_i = train_data %>% filter(Hour == hours[i])
  features_train_i = train_data_i[,3:5]
  fit_i = lm(Cons~.,features_train_i)
  summary_1[i,] = fit_i$coefficients
  
  test_data_i = test_data %>% filter(Hour == hours[i])
  features_test_i = test_data_i[,3:5]
  predicted = predict(fit_i,test_data_i)
  
  act_val = test_data %>% filter(Hour == hours[i])
  mape_1[i] = mape(predicted,act_val[,3])*100
  print(paste("Mape Value (%)  for ",hours[i],":",mape_1[i] ))
  
}

## [1] "Mape Value (%)  for  00:00 : 3.34344593288689"
## [1] "Mape Value (%)  for  01:00 : 3.36226353331919"
## [1] "Mape Value (%)  for  02:00 : 3.47750975545259"
## [1] "Mape Value (%)  for  03:00 : 3.27693407487705"
## [1] "Mape Value (%)  for  04:00 : 3.26256912799877"
## [1] "Mape Value (%)  for  05:00 : 3.27744479918112"
## [1] "Mape Value (%)  for  06:00 : 3.26230144562775"
## [1] "Mape Value (%)  for  07:00 : 3.92884852896431"
## [1] "Mape Value (%)  for  08:00 : 4.83478904145347"
## [1] "Mape Value (%)  for  09:00 : 5.70603598296241"
## [1] "Mape Value (%)  for  10:00 : 6.31593628336217"
## [1] "Mape Value (%)  for  11:00 : 6.50313866009252"
## [1] "Mape Value (%)  for  12:00 : 6.80320692651713"
## [1] "Mape Value (%)  for  13:00 : 6.94292478057779"
## [1] "Mape Value (%)  for  14:00 : 6.71127371138644"
## [1] "Mape Value (%)  for  15:00 : 6.33192688239147"
## [1] "Mape Value (%)  for  16:00 : 5.39358013662158"
## [1] "Mape Value (%)  for  17:00 : 4.86030200181668"
## [1] "Mape Value (%)  for  18:00 : 3.895634381416"
## [1] "Mape Value (%)  for  19:00 : 3.55631055978235"
## [1] "Mape Value (%)  for  20:00 : 3.26609721328125"
## [1] "Mape Value (%)  for  21:00 : 3.26164841764531"
## [1] "Mape Value (%)  for  22:00 : 3.2612766569859"
## [1] "Mape Value (%)  for  23:00 : 3.48747937831156"

summary_1 = as.data.frame(summary_1)
names(summary_1)[1] = " Intercept"
names(summary_1)[2] = "LAG168"
names(summary_1)[3] = "LAG48"
print(summary_1)

##     Intercept    LAG168     LAG48
## 1    2191.321 0.4428359 0.4868831
## 2    2120.452 0.4364232 0.4919552
## 3    2831.137 0.4252267 0.4753444
## 4    2228.551 0.4269591 0.4927976
## 5    2205.266 0.4449390 0.4746852
## 6    2225.031 0.4784599 0.4398487
## 7    2126.946 0.5462000 0.3759635
## 8    2310.651 0.6528305 0.2663126
## 9    3232.765 0.7255429 0.1737235
## 10   3594.994 0.7206583 0.1743794
## 11   3740.841 0.6868558 0.2062089
## 12   3958.706 0.6635927 0.2249924
## 13   3662.112 0.6131376 0.2802066
## 14   3806.670 0.5867997 0.3035491
## 15   4066.615 0.6255587 0.2597276
## 16   3950.800 0.6319420 0.2563915
## 17   3815.717 0.6456301 0.2471083
## 18   3389.937 0.6430749 0.2620916
## 19   3018.022 0.5819842 0.3337088
## 20   3163.115 0.5250949 0.3868581
## 21   2782.314 0.5143698 0.4080330
## 22   2658.049 0.4954232 0.4290922
## 23   2825.549 0.4753820 0.4427003
## 24   2541.261 0.4720343 0.4510817

plot(1:24,mape_1,lwd = 2, type = "l",xlab = "Hour", ylab = "MAPE",main = "Mape Value vs Hour" )

Every hours were modelled separately using whole training data set.In order to model train data, intercept, lag 48 and lag 168 were used as features.In many cases, intercept is useful because it tries to model the noise.The effects that cannot be explained will be modeled in the intercept part.After modelling part,test data was used to determine prediction accuracy of this model. Mape values for certain hours which is first and last hours of the day was calculated lower than other hours of the day. This means that prediction accuracy for certain hours is higher than middle hours of the day.

Task D

wide_format_features_train <- reshape(train_data, timevar = "Hour", idvar = "Date", drop = c("Cons"),direction = "wide")
wide_format_target_train <- reshape(train_data[,1:3], timevar = "Hour", idvar ="Date", direction = "wide")
wide_train = cbind(wide_format_features_train,wide_format_target_train[,-1] )
wide_train = na.omit(wide_train)

wide_feat_test <- reshape(test_data, timevar = "Hour", idvar = "Date", drop = "Cons", direction = "wide")
wide_targ_test <- reshape(test_data[,1:3], timevar = "Hour", idvar = "Date", direction = "wide" )


mape_2 = rep(0, times=24)
set.seed(1010)

summary_2 = matrix(rep(0,72), 24, 3)
for(i in 1:24){

  
glmmod = cv.glmnet(as.matrix(wide_train[,2:49]),wide_train[,(i+49)],nfolds= 10,family = "gaussian")

lass = glmnet(as.matrix(wide_train[,2:49]),wide_train[,i+49],family = "gaussian",lambda = glmmod$lambda.min)

prediction = predict(lass,as.matrix(wide_feat_test[,2:ncol(wide_feat_test)]))

mape_cons <- mape(prediction,wide_targ_test[,(i+1)])*100

print(paste("Mape for",hours[i],mape_cons))

mape_2[i] <- mape_cons
}

## [1] "Mape for 00:00 1.40473109535683"
## [1] "Mape for 01:00 1.56570130861615"
## [1] "Mape for 02:00 1.46968415610627"
## [1] "Mape for 03:00 1.40273484118917"
## [1] "Mape for 04:00 1.44847402199552"
## [1] "Mape for 05:00 1.38237771699215"
## [1] "Mape for 06:00 1.64492864845687"
## [1] "Mape for 07:00 1.82615218500099"
## [1] "Mape for 08:00 2.58186243356565"
## [1] "Mape for 09:00 3.65693126139875"
## [1] "Mape for 10:00 4.23211163389052"
## [1] "Mape for 11:00 4.52160308213581"
## [1] "Mape for 12:00 4.71771799796364"
## [1] "Mape for 13:00 4.59033394732374"
## [1] "Mape for 14:00 4.42838597344516"
## [1] "Mape for 15:00 3.9380904013438"
## [1] "Mape for 16:00 3.05134760057007"
## [1] "Mape for 17:00 2.1337634368256"
## [1] "Mape for 18:00 1.6216457190452"
## [1] "Mape for 19:00 1.54432451330507"
## [1] "Mape for 20:00 1.67261574333859"
## [1] "Mape for 21:00 1.71575116421152"
## [1] "Mape for 22:00 1.53595045958882"
## [1] "Mape for 23:00 1.85706103641134"

plot(1:24,mape_2,type = "l", col = "blue", xlab = "Hour", ylab = "MAPE", lwd = 2,main = "Mape Value vs Hour")

In this part, we use lasso regression in order to minimize objective function. With lasso regression, a budget on the coefficients is provided.In other words, we define lambda value to minimize objective function. First, cross-validation was used to define lambda value, and then lambda min which gives us minimum error was selected to perform lasso regression. Compared to part c, prediction accuracy for all hours was improved but graph which is mape value vs hours still has the same pattern with linear regression mape values graph.

Task F

Boxplot was drawn in order to compare mape values. As seen in the boxplot, model devising penalized linear regression has lower mape values than the one using linear regression.