Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

The HP Filter

Learning objectives

  • Understand trend–cycle decomposition

  • Interpret the smoothing parameter λ\lambda

Smoothing time series data is useful in many economic situations. The main idea is to try to separate a “trend” from a “cycle.” The smoothed series should not differ too much from the actual data. This is done through a choice of parameters in the filter.

Setup

Suppose we have TT observations on the log of some time series yty_{t} for t=1,2,,Tt=1,2, \ldots , T, where yty_{t} consists of a trend term, τt\tau_{t}, and a cyclical component, ctc_{t}, so that yt=τt+cty_{t} = \tau_{t} + c_{t}.

The HP filter finds {τt}\{\tau_{t}\} that solves

minτ(t=1T(c)2+λt=1T[(τtτt1)(τt1τt2)]2),\min_{\tau} \Big(\sum_{t=1}^{T} (c)^2 + \lambda \sum_{t=1}^{T} [(\tau_{t}-\tau_{t-1})-(\tau_{t-1}-\tau_{t-2})]^2 \Big),

or, rewriting

minτ(t=1T(ytτt)2+λt=1T[(τtτt1)(τt1τt2)]2),\min_{\tau} \Big(\sum_{t=1}^{T} (y_{t}-\tau_{t})^2 + \lambda \sum_{t=1}^{T} [(\tau_{t}-\tau_{t-1})-(\tau_{t-1}-\tau_{t-2})]^2 \Big),

where λ\lambda is the ``smoothness penalty.‘’ Note that as λ0\lambda \rightarrow 0, we just have the series, yty_{t}, itself and λ\lambda \rightarrow \infty is just a regression on a linear trend (second difference is 0). Here is a Python implementation:

import numpy as np
import pandas as pd
import pandas_datareader as pdr
import datetime
from statsmodels.tsa.filters.hp_filter import hpfilter
import matplotlib.pyplot as plt
import matplotlib as mpl
from cycler import cycler
from tabulate import tabulate
from matplotlib.dates import DateFormatter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters
import matplotlib.ticker as ticker
###from statsmodels.tsa.x13 import x13_arima_analysis
import sys
### Some libraries for web scraping (BLS revisions)
import requests
from bs4 import BeautifulSoup
import time

start_time = time.time()

pd.set_option('display.max_rows', None)

my_colors = ["#00688b",
             "#cd3333",
             "#6c8b3d", 
             "cornflowerblue",
             "#cd6600",
             "#8b2323",
             "#5d478b",
             "red"]
plt.rcParams['axes.prop_cycle'] = cycler(color = my_colors)
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.xmargin'] = 0
#plt.rcParams['axes.ymargin'] = 0.0
plt.rcParams["figure.figsize"] = (10.5,6.5)
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.size'] = 14
plt.rcParams['legend.fontsize'] = 'medium'
plt.rcParams['legend.frameon'] = False
plt.rcParams['legend.framealpha'] = None
plt.rcParams["legend.loc"] = 'best' 
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['axes.titlecolor'] = '#3333B3FF'
plt.rcParams['lines.linewidth'] = 3

plt.rcParams['figure.subplot.left'] = 0
plt.rcParams['figure.subplot.bottom'] = 0
plt.rcParams['figure.subplot.right'] = 1
plt.rcParams['figure.subplot.top'] = 1

def crosscorr(datax, datay, lag=0):
   return datax.corr(datay.shift(lag))

def NBER(start_date):
    ax2 = ax.twinx()
    Recessions = monthly['recessiondates'][monthly.index >= start_date]
    ax2.fill_between(Recessions.index,
                     Recessions,
                     step="pre",
                     color="gray",
                     alpha=0.2)
    ax2.get_yaxis().set_visible(False)
    return ax2

def simpleaxis(ax):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()

def day_format(label):
    """
    Convert time label to the format of pandas line plot
    """
    return f'{label.month} {label.day}'

def month_format(label):
    """
    Convert time label to the format of pandas line plot
    """
    month = label.month_name()[:3]
    if month == 'Jan':
        month = month + '\n' + f'{label.year}'
    #else:
    #    month = ""
    return month

def quarter_format(label):
    """
    Convert time label to the format of pandas line plot
    """
    month = label.month_name()[:3]
    if month == 'Jan':
        month = f'Q1\n{label.year}'
    if month == 'Apr':
        month = 'Q2'
    if month == 'Jul':
        month = 'Q3'
    if month == 'Oct':
        month = 'Q4'
    return month

Now we will read in some data.

### Quarterly

FREDmap = {
    'GDPC1': 'real_gdp'
    }

quarterly = pdr.DataReader(list(FREDmap.keys()), 'fred', datetime.datetime(1800,1,1))
quarterly.rename(columns = FREDmap, inplace = True)

quarterly.to_pickle('quarterly.pickle')
quarterly['real_gdp'].tail()
DATE 2023-07-01 22780.933 2023-10-01 22960.600 2024-01-01 23053.545 2024-04-01 23223.906 2024-07-01 23386.733 Name: real_gdp, dtype: float64
def EconLineGraph(dataframe = None,
                  series = None,
                  labels = None,
                  title = None,
                  subtitle = None,
                  source = None,
                  ylabel = None,
                  start_date = None,
                  end_date = None,
                  graph_name = None,
                  recessions = False,
                  logscale = False,
                  ylines = None,
                  yzero = False):
   """For creating line graphs from a data frame

    Inputs:
    -------
    dataframe : dataframe
         The dataframe containing the data to be plotted                           
    series : vector of text
         The "names" of the variables to be plotted
    labels : vector of text
         The labels (descriptions) of each variable
    title : text string
         Title for the figure
    subtitle : text string
         A "title" which appears at top left of the figure
    source : text string
         The source for the data; appears at bottom right of the figure
    ylabel : text string
         Text that appears beside the y axis
    start_date : string
         The first date to appear in the figure (end date is always the last available
         observation). String as 'yyyy-mm-dd'.
    graph_name : text string
         The basename for the graph files. Both pdf and png files are produced
    recessions : logical
         If True, then include NBER business cycle recession shading from 
         the dataframe "monthly"
    """
    
   if start_date == None:
      df = dataframe[series].dropna(how = 'all')
      start_date = df.first_valid_index()
   else:
      df = dataframe.loc[start_date:][series].dropna(how = 'all')

   fig, ax = plt.subplots()
       
   if logscale:
      ax.set_yscale('log')
      ax.yaxis.set_major_formatter(plt.FormatStrFormatter("%.0f"))
      ax.yaxis.set_minor_formatter(plt.FormatStrFormatter("%.0f"))

   if recessions:
      ax.plot(df, clip_on = False)
   else:
      df.plot(ax = ax, clip_on = False)

   if yzero is True:
      plt.axhline(y = 0, color = '#a0a0a0', linewidth = 0.75, label = None)
   else:
      if ylines is not None:
         if len(ylines) == 1:
            df[str(ylines)] = ylines[0]
            df[str(ylines)].loc[start_date:].plot(ax = ax, clip_on = False, color = '#a0a0a0', linewidth = 1.0)
         else:
            for i in ylines:
               df[str(i)] = i
               df[str(i)].loc[start_date:].plot(ax = ax, clip_on = False, color = '#a0a0a0', linewidth = 1.0)
      
   if title is not None:
      ax.set_title(title)

   if subtitle is not None:
      ax.set_title(subtitle,
                   loc = 'left',
                   color = 'black',
                   size = 'medium')

   if ylabel is not None:
      ax.set_ylabel(ylabel)

   if labels is not None:
      #if ax.get_legend() != None:
      ax.legend(labels)
      #ax.legend().set_zorder(2)
   else:
      if ax.get_legend() != None:
         ax.legend().set_visible(False)

   if source is not None:
      ax.set_xlabel('Source: ' + source,
                    horizontalalignment='right',
                    x=1.0)
   else:
      ax.set_xlabel('')

   if recessions:
      ax2 = ax.twinx()
      Recessions = monthly['recessiondates'][monthly.index >= start_date]
      ax2.fill_between(Recessions.index,
                       Recessions,
                       step="pre",
                       color="gray",
                       alpha=.2)
      ax2.get_yaxis().set_visible(False)
      ax2.set_ymargin(0) 

   fig.savefig('pyfigs/pdf/' + graph_name + '.pdf', bbox_inches='tight', pad_inches=0.25)
   fig.savefig('pyfigs/png/' + graph_name + '.png', bbox_inches='tight', pad_inches=0.25)
   

EconLineGraph(dataframe = quarterly,
              series = ['real_gdp'],
              title = 'Real GDP\n',
              subtitle = 'Billions of Chained 2017 Dollars',
              source = 'BEA',
              start_date = '1947-01-01',
              graph_name = 'log_real_gdp',
              logscale= False)
<Figure size 1050x650 with 1 Axes>

Take the log of real gdp and create the hpfilter, using 1600 for λ\lambda.

quarterly['lrgdp'] = np.log(quarterly['real_gdp'])
hpcycle_gdp, hptrend_gdp = hpfilter(quarterly['lrgdp'].dropna(), lamb=1600)
quarterly['hptrend_gdp'] = hptrend_gdp
quarterly['hpcycle_gdp'] = hpcycle_gdp

Now graph the real gdp series and the hp trend.

EconLineGraph(dataframe = quarterly,
          series = ['lrgdp', 'hptrend_gdp'],
          labels = ['GDP', 'HP Trend'],
          title = 'Real GDP and HP Trend\n',
          subtitle = 'Log of Trillions of 2017 Dollars',
          source = 'BEA',
          start_date = '1947-01-01',
          graph_name = 'hp_log_real_gdp',
          logscale= False)
<Figure size 1050x650 with 1 Axes>
EconLineGraph(dataframe = quarterly,
          series = ['hpcycle_gdp'],
          title = 'Real GDP Cyclical Component\n',
          subtitle = 'Log of Trillions of 2017 Dollars',
          source = 'BEA',
          start_date = '1947-01-01',
          graph_name = 'hp_cycle_log_real_gdp',
          yzero = True,
          logscale= False)
<Figure size 1050x650 with 1 Axes>

Homework #1:

(A) Use a very large number for λ\lambda, like 16000000000 and see the trend and cycle. Explain what is going on. Next plot the cylical component for the two different λ\lambda’s and comment on the difference.

(B) Download GDPCA from FRED. This is annual real GDP back to 1929. Should you use λ=1600\lambda=1600? Why or why not? If not, what value should you use? Why?