Benchmarking forecast models

Benchmarking forecast models#

import warnings

warnings.simplefilter("ignore", category=FutureWarning)

import numpy as np
import pandas as pd
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
)

import plotly.express as px
import plotly.graph_objects as go

df=pd.read_csv("https://raw.githubusercontent.com/Naren8520/Serie-de-tiempo-con-Machine-Learning/main/Data/ads.csv")
df.head()

# Split into training/ testing datasets.

train = df[df.Time<='2017-09-20 17:00:00']
test = df[df.Time>'2017-09-20 17:00:00']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=train["Time"],
    y=train["Ads"],
    name="Train"
))
fig.add_trace(go.Scatter(x = test.Time, y = test.Ads, name="Test"))
fig.update_layout(title="Hourly Online Advertising Activity Dataset")
fig.show()

Simple forecasting methods (benchmarking)#

Lets use our training dataset on some key benchmarking models and evaluate their performance.

Naive forecast#

A simple forecast is simply to take the most recent time point and forecast it forward. This means all but 1 data point is ignored.

test.loc[:, ["naive", "snaive", "mean", "drift"]] = np.nan

# Calculating Naive forecast

latest_value = train.iloc[-1]["Ads"]  # Get latest data point
test.loc[:, "naive"] = [latest_value] * len(test)  # Add to forecast_data

test.head()

simple_fcsts_fig = go.Figure()
simple_fcsts_fig.add_trace(
    go.Scatter(
        x=train["Time"],
        y=train["Ads"], name="Training data"
    )
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["naive"], name="Naive forecast")
)
simple_fcsts_fig.update_layout(title="Naive Benchmark")

Seasonal naive forecast#

If we are working with data with clear seasonal pattern, we may want to use the most recent seasonal period. Seasonal naive uses the latest value from the same season (i.e., most recent December, or Monday, etc…). In this case, we are working with monthly data, therefore the latest value from the same season ould be the most recent months during the observational period.

def s_naive(ts, period, forecast_horizon):
    """
    Inputs:
        ts (pandas.Series): Historical time series observations (in order)
        period (int): Seasonal period (i.e., 12 for monthly data)
        forecast_horizon (int): Number of timesteps forecasted into the future
    Outputs:
        list: Forecasted time series
    """

    most_recent_seasonal = ts[-period:].to_list()

    # We now need to make the forecast
    # Number of times to multiply the list to ensure we meet forecast horizon
    mult_list = int(np.ceil(forecast_horizon / period))

    return (most_recent_seasonal * mult_list)[:forecast_horizon]

# Use custom function to calculate seasonal naive forecast
test.loc[:, "snaive"] = s_naive(
    train["Ads"], period=24, forecast_horizon=len(test)
)

test.head()

simple_fcsts_fig = go.Figure()
simple_fcsts_fig.add_trace(
    go.Scatter(
        x=train["Time"],
        y=train["Ads"], name="Training data"
    )
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["snaive"], name="Seasonal naive forecast")
)
simple_fcsts_fig.update_layout(title="Seasonal Naive Benchmark")
simple_fcsts_fig.add_vrect(
    x0=train.Time.iloc[-24],
    x1=train.Time.iloc[-1],
    annotation_text="Latest seasonal period",
    annotation_position="top left",
    fillcolor="green",
    opacity=0.25,
    line_width=0,
)

Mean forecast#

We simply take the mean of historical data as our forecast.

test.loc[:, "mean"] = train["Ads"].mean()

simple_fcsts_fig = go.Figure()
simple_fcsts_fig.add_trace(
    go.Scatter(
        x=train["Time"],
        y=train["Ads"], name="Training data"
    )
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["mean"], name="Mean forecast")
)
simple_fcsts_fig.update_layout(title="Mean Benchmark")

Drift forecast#

The drift method is an extension of the Naive method that allows for increase/decrease over time. To calculate the drift, we simply look at the difference between the first and last observations.

def drift_method(ts, forecast_horizon):
    """
    Inputs:
        ts (pandas.Series): Historical time series observations (in order)
        forecast_horizon (int): Number of timesteps forecasted into the future
    Outputs:
        list: Forecasted time series
    """

    latest_obs = ts.iloc[-1]
    first_obs = ts.iloc[0]

    slope = (latest_obs - first_obs) / (len(ts) - 1)

    forecast_list = [latest_obs + slope * h for h in range(1, forecast_horizon + 1)]

    return forecast_list

test.loc[:, "drift"] = drift_method(
    train["Ads"], len(test)
)

test.head()

simple_fcsts_fig = go.Figure()
simple_fcsts_fig.add_trace(
    go.Scatter(
        x=train["Time"],
        y=train["Ads"], name="Training data"
    )
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["drift"], name="Drift forecast")
)

simple_fcsts_fig.add_trace(
    go.Scatter(
        x=[train.Time.iloc[0], train.Time.iloc[-1]],
        y=[train.Ads.iloc[0], train.Ads.iloc[-1]],
        name="Drift",
        line=dict(dash='dash', color='grey'),
        showlegend=True
    )
)

simple_fcsts_fig.update_layout(title="Drift Benchmark")

Plot all benchmark forecasts along with the test data

simple_fcsts_fig = go.Figure()
simple_fcsts_fig.add_trace(
    go.Scatter(
        x=train["Time"],
        y=train["Ads"], name="Training data"
    )
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["naive"], name="Naive forecast")
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["snaive"], name="Seasonal Naive forecast")
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["mean"], name="Mean forecast")
)
simple_fcsts_fig.add_trace(
    go.Scatter(x=test.Time, y=test["drift"], name="Drift forecast")
)
simple_fcsts_fig.add_trace(
    go.Scatter(
        x=test["Time"],
        y=test["Ads"], name="Test data"
    )
)
simple_fcsts_fig.update_layout(title="Training + Testing + Benchmarking")

Evaluating forecast accuracy#

We can quantify how well each model performs using simple accuracy metrics, such as mean absolute percentage error (MAPE).

MAPE has the advantage of being unit free (given as a percentage) and can therefore be compared across datasets.

Lets calculate MAPE manually to compare each forecast using data following 2022 (after the drop in attendances during COVID)

# Creating a function to calculate MAPE
def mape(observed, forecasted):
    """
    Inputs:
        observed (pandas.Series): Series of observed values
        forecasted (pandas.Series): Series of forecasted values
    Outputs:
        int: MAPE
    """

    absolute_error = abs(observed - forecasted)
    mape_t = np.mean((absolute_error / observed) * 100)

    return mape_t

# Loop through each forecast and print the MAPE score
for fcst in [x for x in test.columns if x not in ["Time", "Ads"]]:

    mape_score = mape(
        test["Ads"],  # observations
        test[fcst],  # Benchmark forecasts
    )
    print(f"{fcst} had a MAPE score of {round(mape_score, 2)}%")

We could equally calculate this (and other accuracy metrics) by using sklearn (imported in the first cell) or Nixtla (among others):

for fcst in [x for x in test.columns if x not in ["Time", "Ads"]]:

    mape_score = mean_absolute_percentage_error(
        test["Ads"],  # observations
        test[fcst],  # Benchmark forecasts
    )
    print(f"{fcst} had a MAPE score of {round(mape_score * 100, 2)}%")

Each becnhmark forecast is simple to interpret and impliment:
- The mean forecast is simply the mean of historical observations.
- The Naive forecast is the most recent value
- The Seasonal Naive forecast is set to be the most recent value from the same season.
- The drift method is similar to Naive but accounts for growth (in terms of the difference between the first and last observed values).

These simple forecasts can (and should) be used to benchmark against more advanced time series models.

Questions#

Using the new train_df below pick one (or more) benchmark models and evaluate against test_df. N.B: Monthly dataset -> seasonal period = 12.

Optional: Use a different accruacy metric, either create your own function, skearn or Nixtla etc.

q_df = pd.read_csv("https://raw.githubusercontent.com/Naren8520/Serie-de-tiempo-con-Machine-Learning/main/Data/candy_production.csv")
train_df = q_df[q_df.observation_date <= "2000-01-01"]
test_df = q_df[q_df.observation_date > "2000-01-01"]
train_df.head()

# Naive example (~15% MAPE)

# test_df.loc[:, "Naive"] = [train_df["IPG3113N"].iloc[-1]] * len(test_df)
# mean_absolute_percentage_error(test_df["IPG3113N"], test_df["Naive"])