The log return or equivalently the continuous grow rate from \(P_1\) to \(P2\) is
\[ log(P_2) - log(P_2) =
log(\frac{P2}{P1})\]. F.ex. consider
log(96/95)=log(1.01053)~0.0105, which equals a growth rate of 1.05%. The
percentage change would be \[\frac{P2-P1}{P1}=\frac{96-95}{95}~0.0105\],
so we can see that this approximation works.
# install libraries
if (!require("quantmod")) install.packages("quantmod")
## Loading required package: quantmod
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'quantmod'
## Installing package into '/Users/runner/work/_temp/Library'
## (as 'lib' is unspecified)
## also installing the dependencies 'xts', 'zoo', 'TTR'
##
## The downloaded binary packages are in
## /var/folders/tb/y368xp_x10s3ty1b_mtl5mxr0000gn/T//RtmpV9w6QE/downloaded_packages
# Load required libraries
suppressMessages({ library(quantmod); library(xts)
})
# Download S&P 500 index data (2020-2023, i.e. 4 years)
getSymbols("^GSPC", from="2020-01-01", to="2023-12-31", auto.assign=TRUE, warnings=FALSE)
## [1] "GSPC"
prices <- Cl(GSPC) # closing prices (252*4 = 1008)
print(length(prices))
## [1] 1006
# log(P_t) - log(P_{t-1}) = approximation of a percentage change (for small changes)
# continuously compounded rate of return (i.e. the log return)
returns <- diff(log(prices))[-1]
print(returns)
## GSPC.Close
## 2020-01-03 -0.0070849094
## 2020-01-06 0.0035271452
## 2020-01-07 -0.0028071751
## 2020-01-08 0.0048904735
## 2020-01-09 0.0066332141
## 2020-01-10 -0.0028592625
## 2020-01-13 0.0069519940
## 2020-01-14 -0.0015156808
## 2020-01-15 0.0018684502
## 2020-01-16 0.0083317474
## ...
## 2023-12-15 -0.0000762524
## 2023-12-18 0.0045181223
## 2023-12-19 0.0058492675
## 2023-12-20 -0.0147931480
## 2023-12-21 0.0102487693
## 2023-12-22 0.0016586822
## 2023-12-26 0.0042227610
## 2023-12-27 0.0014294356
## 2023-12-28 0.0003701061
## 2023-12-29 -0.0028304770
print(prices)
## GSPC.Close
## 2020-01-02 3257.85
## 2020-01-03 3234.85
## 2020-01-06 3246.28
## 2020-01-07 3237.18
## 2020-01-08 3253.05
## 2020-01-09 3274.70
## 2020-01-10 3265.35
## 2020-01-13 3288.13
## 2020-01-14 3283.15
## 2020-01-15 3289.29
## ...
## 2023-12-15 4719.19
## 2023-12-18 4740.56
## 2023-12-19 4768.37
## 2023-12-20 4698.35
## 2023-12-21 4746.75
## 2023-12-22 4754.63
## 2023-12-26 4774.75
## 2023-12-27 4781.58
## 2023-12-28 4783.35
## 2023-12-29 4769.83
# Calculate realized variance (proxy using squared returns)
rv <- as.numeric(returns^2 * 252) # Annualized realized variance
rv <- rv[!is.na(rv)] # Remove NAs
head(rv)
## [1] 0.012649377 0.003135070 0.001985818 0.006027016 0.011087881 0.002060196
print( length(rv) )
## [1] 1005
# Create HAR regressors
n <- length(rv)
start_idx <- 23
# Pre-allocate matrices
n_obs <- n - start_idx + 1
y <- numeric(n_obs); X <- matrix(NA, n_obs, 4)
length(y)
## [1] 983
colnames(X) <- c("Intercept", "Daily", "Weekly", "Monthly")
# Fill data matrices
for(i in 1:n_obs) {
t <- start_idx + i - 1
# Dependent variable
y[i] <- rv[t]
# Regressors (regressor matrix)
X[i, 1] <- 1 # intercept
X[i, 2] <- rv[t-1] # daily rv
X[i, 3] <- mean(rv[(t-5):(t-1)]) # weekly realized variance (5 trading days)
X[i, 4] <- mean(rv[(t-22):(t-1)]) # monthly rv
}
T_obs <- length(y)
dim(X) # shape of the regressor matrix
## [1] 983 4
length(y) # length of the dep var vector
## [1] 983
head(y) # dependent variable
## [1] 0.0315419306 0.0027779043 0.0073905408 0.0134279399 0.0007172256
## [6] 0.0104573628
head(X) # regressor matrix
## Intercept Daily Weekly Monthly
## [1,] 1 0.0557163321 0.03039275 0.01523969
## [2,] 1 0.0315419306 0.03666322 0.01609844
## [3,] 1 0.0027779043 0.03672521 0.01608221
## [4,] 1 0.0073905408 0.02211875 0.01632788
## [5,] 1 0.0134279399 0.02217093 0.01666428
## [6,] 1 0.0007172256 0.01117111 0.01619289
The Gibbs sampler generates a Markov chain with stationary distribution \(f\) from a at least two dimensional probability density function (or probability mass function) \(f\) on the discrete, finite sample space \(\Omega \in \mathbb{R}^{d}, d \geq 2\) by sequentially updating coordinates of \(\boldsymbol{x} \in \Omega\). We don’t need proposal functions because the Gibbs sampler draws from the conditional distribution of \(X_k|X_{-k}=x_k\) to update entry \(k\) of \(\boldsymbol{x}\). In this case, we use a systematic-scan Gibbs-sampler because we cycle through the parameters of interest, \(\beta\) and \(\sigma^{2}\) in a fixed order.
# Gibbs sampler for Bayesian HAR
gibbs_har <- function(y,
X,
n_iter = 10000,
burnin = 2000
)
{ # Prior parameters
beta_0 <- c(0, 0.4, 0.3, 0.2) # Prior means
V_0 <- diag(c(100, 0.04, 0.04, 0.04)) # Prior covariance
a_0 <- 2; b_0 <- 1 # InvGamma parameters for sigma^2
a_0
# Initialize
k <- ncol(X); T_obs <- length(y); beta <- beta_0; sigma2 <- 1
# Store only post-burnin samples
n_keep <- n_iter - burnin
beta_draws <- matrix(NA, n_keep, k); sigma2_draws <- numeric(n_keep)
# Precompute
V_0_inv <- solve(V_0); XtX <- t(X) %*% X; Xty <- t(X) %*% y
##print(V_0_inv)
# V_n is the posterior variance-covariance matrix of the regressors; diagonal elements: var, off-diagonal elements: covariances between the coefficients
# beta_n and V_n are the parameters of the multivariate normal distribution of the beta parameter
# a_n and b_n are the parameters of the inverse gamma distribution of the sigma^2 parameter
# main loop (systematic scan Gibs-sampler)
for (iter in 1:n_iter) {
# Update beta | sigma2, y
V_n_inv <- V_0_inv + XtX / sigma2
V_n <- solve(V_n_inv)
beta_n <- V_n %*% (V_0_inv %*% beta_0 + Xty / sigma2)
beta <- drop(beta_n + t(chol(V_n)) %*% rnorm(k))
# Update sigma2 | beta, y
residuals <- y - X %*% beta
a_n <- a_0 + T_obs / 2
b_n <- b_0 + sum(residuals^2) / 2
sigma2 <- 1 / rgamma(1, a_n, b_n)
# Store post-burnin draws only
if (iter > burnin) {
beta_draws[iter - burnin, ] <- beta; sigma2_draws[iter - burnin] <- sigma2
} # end if function
} # end for loop
return(list(beta = beta_draws,
sigma2 = sigma2_draws))
} # end function body
# results$beta and results$sigma2 contain the empirical (posterior) distributions of the two estimated parameters of interest
results <- gibbs_har(y, X)
colMeans(results$beta) # posterior means of the volatility persistence params: intercept, daily rv, weekly rv, monthly rv
## [1] 0.01323416 0.23584119 0.43132166 0.08930988
mean(results$sigma2) # posterior mean for the error variane
## [1] 0.03167416
Recap of the Markov Chain Monte Carlo algorithm:
Two main problems arise:
(P1) can be addressed by discarding a large chunk of initial observations (burn-in sample) or by increasing the no. of iterations (MCMC sample size)
Markov Chain Monte Carlo (MCMC) diagnostics are tools that can be used to check whether the quality of a sample generated with an MCMC algorithm is sufficient to provide an accurate approximation of the target distribution.
In particular, MCMC diagnostics are used to check:
Diagnostic tools:
# trace plot for the daily realized volatility coefficient, i.e. the path the sampler visited to map out the posterior distribution
plot(results$beta[, 2], type = "l", col = "steelblue",
main = "Trace Plot for beta_d (daily rv)",
xlab = "MCMC Iteration (t)", ylab = "Value")
trace plot of all mcmc samples including the burn-in samples
# new function call with burn-in samples
results <- gibbs_har(y, X,
n_iter = 10000,
burnin = 0 # setting the no. of burnin samples to 0 to plot all mcmc samples
)
# Now results$beta contains all 10,000 iterations starting from iteration 1
plot(results$beta[, 2], type = "l", col = "steelblue",
main = "Trace Plot for Beta_d (daily rv) with burn-in")
# Add a red dashed line to mark the end of burn-in
abline(v = 2000, col = "red", lty = 2, lwd = 2)
legend("topright", legend = "End of Burn-in", col = "red", lty = 2)
# posterior histogram for the daily realized volatility coefficient
hist(results$beta[, 2],
probability=FALSE, # TRUE: frequency density, FALSE: counts
breaks=30,
col = "steelblue",
main = "Histogram of the posterior for Beta_d (daily rv)",
ylab = "Frequency (counts)",
xlab = expression(paste("Daily Persistence ", beta[d])) # R markdown subscript syntax
)
# Add a blue dashed line to mark the posterior mean
abline(v = mean(results$beta[,2]), col = "blue", lty = 2, lwd = 2)
legend("topright", legend = "posterior mean", col = "blue", lty = 2)
library(parallel)
# Run 2 chains (i.e. running the MCMC algorithm twice)
n_chains <- 2
# function to run chains in parallel (Mac/Linux use mclapply; Windows requires parLapply)
chains_list <- mclapply(1:n_chains, function(i) {
gibbs_har(y, X, n_iter = 10000, burnin = 2000)
}, mc.cores = n_chains)
# lapply(): applies a function over a list or vector
chains_out <- lapply(1:n_chains, function(i) {
# Start from a different random beta
# This helps verify if they converge to the same posterior
beta_init <- rnorm(ncol(X), 0, 1)
# Call your function (make sure it returns the full n_iter)
gibbs_har(y, X, n_iter = 10000, burnin = 2000)
})
str( chains_out ) # displaying the structure of the list object
## List of 2
## $ :List of 2
## ..$ beta : num [1:8000, 1:4] 0.00722 0.00849 0.00811 0.01331 0.00822 ...
## ..$ sigma2: num [1:8000] 0.0347 0.0293 0.0322 0.0319 0.0298 ...
## $ :List of 2
## ..$ beta : num [1:8000, 1:4] 0.0173 0.0104 0.0128 0.0186 0.0102 ...
## ..$ sigma2: num [1:8000] 0.0337 0.0333 0.03 0.0319 0.0305 ...
# comparing the posterior colMeans of both chains
colMeans(chains_out[[1]]$beta)
## [1] 0.01305197 0.23642811 0.43025828 0.09084691
colMeans(chains_out[[2]]$beta)
## [1] 0.01314847 0.23619552 0.43098901 0.08942888