# Data Science Data Understanding and Preparation Part 8:
# Pivoting and Transposing Data

# Connecting and reading the data
library(RODBC)
con <- odbcConnect("AWDW", uid = "RUser", pwd = "Pa$$w0rd")
SGY <- as.data.frame(sqlQuery(con,
   "SELECT Country, State, CountryState, CYear, Sales
    FROM dbo.SalesGeoYear;"),
    stringsAsFactors = TRUE)
close(con)
View(SGY)


# Transposing data
# t() function
t(SGY)

# Using row.names
SNA <- SGY
row.names(SNA) <-
 paste(SNA$CountryState, SNA$CYear, sep = ', ')
SNA <- SNA[c("Country", "State", "Sales")]
t(SNA)

# Pivoting
# Counts
table(SGY$Country, SGY$CYear)
# Sums
xtabs(Sales ~ Country + CYear, data = SGY)
xtabs(Sales ~ CountryState + CYear, data = SGY)
# Different aggregate functions
tapply(SGY$Sales, list(SGY$Country, SGY$CYear), FUN = length)
tapply(SGY$Sales, list(SGY$Country, SGY$CYear), FUN = sum)

# RevoScaleR
library(RevoScaleR)
# Crosstabulation
rxCube(formula = Sales ~
       Country:F(CYear),
       data = SGY, means = FALSE)
# Another way
# Crosstabulation object
cTabs <- rxCrossTabs(formula = Sales ~
                     Country:F(CYear),
                     data = SGY)
# Check the results
print(cTabs, output = "counts")
print(cTabs, output = "sums")
print(cTabs, output = "means")

# Histogram
rxHistogram(formula = ~ Country|F(CYear),
            data = SGY)


# Package reshape
# install.packages("reshape")
library(reshape)
cast(formula = Country ~ CYear,
     value = "Sales",
     data = SGY,
     fun.aggregate = sum)

# Package tidyr - transpose
# install.packages("tidyr")
library(tidyr)
spread(data = SGY[c("CountryState", "CYear", "Sales")], key = CYear, value = Sales)

# Package tidyr + dplyr - pivot
# install.packages("dplyr")
library(dplyr)
SGY[c("Country", "CYear", "Sales")] %>%
group_by(Country, CYear) %>%
summarise(Sales = sum(Sales)) %>%
spread(key = CYear, value = Sales)

