# This is accompanying code for # https://jozef.io/r001-reshape/ # Original data source: # http://ec.europa.eu/eurostat/web/sector-accounts/data/annual-data # Important note - the figures are presented # - in millions € for euro area countries and euro area / EU aggregates, # - millions of national currency otherwise # Read Data ------------------------------------------------------------------- gdi <- read.csv( stringsAsFactors = FALSE , url("https://jozef.io/post/data/ESA2010_GDI.csv") ) # Basic long to wide reshape -------------------------------------------------- gdi_long <- reshape(data = gdi # data.frame in wide format to be reshaped , direction = "long" # we are going from wide to long , varying = 2:23 # columns that will be stacked into 1 ) # Basic long to wide reshape more arguments ----------------------------------- gdi_long_full <- reshape(data = gdi # data.frame in wide format to be reshaped , direction = "long" # still going from wide to long , varying = 2:23 # columns that will be stacked into 1 , idvar = "country" # what identifies the rows? , v.names = "GDI" # how will the column with values be called , timevar = "year" # how will the time column be called , times = 1995:2016 # what are the values for the timevar column ) # Basic wide to long reshape -------------------------------------------------- gdi_wide <- reshape(gdi_long_full # data.frame in long format to be reshaped , direction = "wide" # going from long to wide this time , idvar = "country" # identifying the subject in rows , timevar = "year" # column with values that will change to columns , v.names = "GDI" # column with the values ) # Read Data ------------------------------------------------------------------- more_pretty <- read.csv( stringsAsFactors = FALSE , file = "https://jozef.io/post/data/ESA2010_pretty.csv" ) # Advanced wide to long reshape ----------------------------------------------- more_pretty_long <- reshape(data = more_pretty # data.frame in wide format to be reshaped , direction = "long" # we are going from wide to long , varying = 2:67 # columns that will be stacked into 1 , idvar = "country" # identifying the subject in rows ) more_longer <- reshape(data = more_pretty_long , direction = "long" , varying = 3:5 , timevar = "measurement" # column with values that will change to columns , times = names(more_pretty_long[, 3:5]) , v.names = "Value" ) # Read Not so pretty Data ----------------------------------------------------- more_notpretty <- read.csv( stringsAsFactors = FALSE , file = "https://jozef.io/post/data/ESA2010_not_pretty.csv" ) # Does not work --------------------------------------------------------------- notpretty_long <- reshape(data = more_notpretty , direction = "long" , varying = 2:67 ) # We need to specify a lot more ----------------------------------------------- more_notpretty_long <- reshape(data = more_notpretty # data.frame in wide format to be reshaped , direction = "long" # we are going from wide to long , varying = list(2:23 # ConspC columns , 24:45 # AGDIpC columns , 46:67 # GrossSaving columns ) , timevar = "year" # how will the time column be called , times = 1995:2016 # what are the values for the timevar column , idvar = "country" # identifying the subject in rows , v.names = c("ConspC" # vector of names of the varying measurements , "AGDIpC" , "GrossSaving" ) ) # Answers to the Exercises ---------------------------------------------------- # |- Exercise 1. -------------------------------------------------------------- # Reshape the data such that the countries will be in columns # and the years are in rows gdi_wide_2 <- reshape(gdi_long_full , direction = "wide" , idvar = "year" , timevar = "country" , v.names = "GDI" ) # |- Exercise 2. -------------------------------------------------------------- # reshape(reshape(gdi_long_full)) gives us a data.frame # equivalent to gdi_long_full, even though we call the function twice # with no extra arguments, just the data. What kind of sorcery is this? # Why don’t we need to provide at least the direction, or the varying arguments? reshape # stores reshaped data.frame info for future use in it's attributes: attributes(gdi_long_full)[["reshapeLong"]] attributes(reshape(gdi_long_full))[["reshapeWide"]]