RPubs

by RStudio

raj678

RAJ KHOSLA

Recently Published

gold price forecasting sample

# ------------------ DATA ------------------ > # Yearly average gold prices (Rs per 10g, 24K) from 1964 to 2020 > # Source: https://www.bankbazaar.com/gold-rate/gold-rate-trend-in-india.html > # (I compiled this from the site; 2006 value filled from historical records ~ Rs. 9,000) > > gold_prices_1964_2020 <- c( + 63.25, 71.75, 83.75, 102.50, 162.00, 176.00, 184.00, 193.00, 202.00, 278.50, # 1964-1973 + 506.00, 540.00, 432.00, 486.00, 685.00, 937.00, 1330.00, 1670.00, 1645.00, 1800.00, # 1974-1983 + 1970.00, 2130.00, 2140.00, 2570.00, 3130.00, 3140.00, 3200.00, 3466.00, 4334.00, 4140.00, # 1984-1993 + 4598.00, 4680.00, 5160.00, 4725.00, 4045.00, 4234.00, 4400.00, 4300.00, 4990.00, 5600.00, # 1994-2003 + 5850.00, 7000.00, 9000.00, 10800.00, 12500.00, 14500.00, 18500.00, 26400.00, 31050.00, 29600.00, # 2004-2013 + 28006.50, 26343.50, 28623.50, 29667.50, 31438.00, 35220.00, 48651.00 # 2014-2020 + ) > > # For 1964-2019 only (as in one of the models) > gold_prices_1964_2019 <- gold_prices_1964_2020[1:56] > > # Create time series objects > ts_1964_2019 <- ts(gold_prices_1964_2019, start = 1964, frequency = 1) > ts_1964_2020 <- ts(gold_prices_1964_2020, start = 1964, frequency = 1) > > # ------------------ EXPLORATORY ANALYSIS ------------------ > cat("Summary Statistics (1964-2020):\n") Summary Statistics (1964-2020): > print(summary(ts_1964_2020)) Min. 1st Qu. Median Mean 3rd Qu. Max. 63.25 685.00 4045.00 8450.00 9000.00 48651.00 > > # Plot original series > par(mfrow = c(2,1)) > plot(ts_1964_2020, main = "Gold Prices in India (1964-2020)", + ylab = "Price (Rs per 10g)", xlab = "Year", col = "blue", lwd = 2) > plot(diff(ts_1964_2020, differences = 2), main = "Second Differenced Series", + ylab = "Diff(Price)", xlab = "Year", col = "red") > > # Stationarity test (Augmented Dickey-Fuller) > cat("\nADF Test on Original Series (1964-2020):\n") ADF Test on Original Series (1964-2020): > print(adf.test(ts_1964_2020)) Augmented Dickey-Fuller Test data: ts_1964_2020 Dickey-Fuller = 1.6521, Lag order = 3, p-value = 0.99 alternative hypothesis: stationary Warning message: In adf.test(ts_1964_2020) : p-value greater than printed p-value > > cat("\nADF Test after 2nd Differencing:\n") ADF Test after 2nd Differencing: > print(adf.test(diff(ts_1964_2020, differences = 2))) Augmented Dickey-Fuller Test data: diff(ts_1964_2020, differences = 2) Dickey-Fuller = -1.5994, Lag order = 3, p-value = 0.736 alternative hypothesis: stationary > > # ACF & PACF of differenced series > par(mfrow = c(1,2)) > acf(diff(ts_1964_2020, differences = 2), main = "ACF (2nd Diff)", lag.max = 20) > pacf(diff(ts_1964_2020, differences = 2), main = "PACF (2nd Diff)", lag.max = 20) > > # ------------------ MODEL FITTING ------------------ > # ARIMA(0,2,3) as selected in the paper > model_2019 <- Arima(ts_1964_2019, order = c(0, 2, 3)) > model_2020 <- Arima(ts_1964_2020, order = c(0, 2, 3)) > > cat("\n=== ARIMA(0,2,3) Model (1964-2019) ===\n") === ARIMA(0,2,3) Model (1964-2019) === > print(summary(model_2019)) Series: ts_1964_2019 ARIMA(0,2,3) Coefficients: ma1 ma2 ma3 -0.1249 -0.4303 -0.2670 s.e. 0.1282 0.1719 0.1661 sigma^2 = 1401692: log likelihood = -457.77 AIC=923.54 AICc=924.35 BIC=931.49 Training set error measures: ME RMSE MAE MPE MAPE MASE ACF1 Training set 163.1887 1129.841 626.6489 2.645264 10.18231 0.7232524 -0.000845543 > > cat("\n=== ARIMA(0,2,3) Model (1964-2020) ===\n") === ARIMA(0,2,3) Model (1964-2020) === > print(summary(model_2020)) Series: ts_1964_2020 ARIMA(0,2,3) Coefficients: ma1 ma2 ma3 0.3198 -0.3481 -0.6217 s.e. 0.1649 0.1217 0.1872 sigma^2 = 2864090: log likelihood = -486.18 AIC=980.36 AICc=981.16 BIC=988.39 Training set error measures: ME RMSE MAE MPE MAPE MASE ACF1 Training set 247.6729 1616.432 877.2209 2.323572 11.83656 0.8042002 -0.08991406 > > # Residual diagnostics > checkresiduals(model_2020) Ljung-Box test data: Residuals from ARIMA(0,2,3) Q* = 11.245, df = 7, p-value = 0.1283 Model df: 3. Total lags used: 10 > > # ------------------ FORECASTING ------------------ > # Forecast 10 years ahead (2021-2030) using 1964-2020 model > forecast_10yr <- forecast(model_2020, h = 10, level = 95) > > cat("\nForecast for 2021-2030:\n") Forecast for 2021-2030: > print(forecast_10yr) Point Forecast Lo 95 Hi 95 2021 62646.40 59329.43 65963.37 2022 72057.85 63678.74 80436.96 2023 77018.86 63256.32 90781.40 2024 81979.88 63668.64 100291.12 2025 86940.89 64344.73 109537.06 2026 91901.91 65107.17 118696.65 2027 96862.93 65877.26 127848.59 2028 101823.94 66613.98 137033.90 2029 106784.96 67293.90 146276.01 2030 111745.97 67902.92 155589.02 > > # Plot forecast > plot(forecast_10yr, main = "Gold Price Forecast (ARIMA(0,2,3))", + ylab = "Price (Rs per 10g)", xlab = "Year", + xlim = c(2015, 2030), lwd = 2) > > # Compare with paper's forecasts (approximate match) > # The sudden 2020 spike (COVID) affects long-term projections, as noted in the paper. >

about 5 hours ago

Plot