---
title: "Algorithmic Complexity"
author: "JJB + Course"
date: "11/30/2018"
output:
   html_document:
     toc: true
     toc_float:
       collapse: false
---

# Empirical Run Time

## Example: Elementary Benchmark

```{r}
out = system.time({Sys.sleep(1)})
out

out[3]
```

What does this say about elapsed vs. relative + system?

## Example: Benchmarking Data Structures

```{r, cache = TRUE}
# Set seed for reproducibility
set.seed(1337)

# Construct large matrix object
matrix.op = matrix(rnorm(10000*100), 10000, 100)

# Convert matrix object to data.frame
dataframe.op = as.data.frame(matrix.op)

library("rbenchmark")
bench_ops = benchmark(mat.op = apply(matrix.op, 2, sd), 
                      df.op  = apply(dataframe.op, 2, sd))
bench_ops
```


## Example: Benchmarking } vs. (

```{r, cache = TRUE}
# install.packages("Rcpp")

# Different R implementations
f = function(n, x = 1) for (i in 1:n) x = 1/(1+x)
g = function(n, x = 1) for (i in 1:n) x = (1/(1+x))
h = function(n, x = 1) for (i in 1:n) x = (1+x)^(-1)
j = function(n, x = 1) for (i in 1:n) x = {1/{1+x} }
k = function(n, x = 1) for (i in 1:n) x = 1/{1+x}
Rcpp::cppFunction(code = 'int d(int n, double x = 1.0) {
                      for (int i = 0; i < n; ++i) x = 1/(1+x);
                      return x;
                  }')

library("rbenchmark")
N = 1e6                                        # Number of Times to Run Loop
bench_curly =                                  # Test Approaches
    benchmark(f(N, 1), g(N, 1), h(N, 1), j(N, 1), k(N, 1), d(N, 1),
                          order = "relative",  # Fastest first (lower is better)
                          replications = 20)   # Run each function x times
bench_curly
```

## Example: Benchmark of the Preallocation of space

```{r, cache = TRUE}
append_elements = function(n) { 
  vec = numeric(0)                     # Vector of length 0
  for(i in seq_len(n)) vec = c(vec, i) # Append results
  vec
}
preallocate_elements = function(n) {
  vec = rep(NA, n)                    # Vector of length n
  for(i in seq_len(n)) vec[i] = i     # Access and update
  vec
}
vectorized_element = function(n) { seq_len(n) }

n = 10000
bench_growth = benchmark(append      = append_elements(n),
                         preallocate = preallocate_elements(n),
                         vectorized  = vectorized_element(n))
bench_growth
```

## Example: Microbenchmarking Code

```{r growth_micro, cache = TRUE}
# install.packages("microbenchmark")
library("microbenchmark")
n = 10000
microbench_growth = 
  microbenchmark(
    append       = append_elements(n),
    preallocate  = preallocate_elements(n),
    vectorized   = vectorized_element(n)
    )

microbench_growth
```

```{r cache = TRUE, dependson="growth_micro"}
library("ggplot2")
autoplot(microbench_growth)
```


# Theoretical Run Time

## Example: Big Oh Runtimes

```{r, echo = FALSE, message = FALSE}
# install.packages(c("ggplot2", "reshape2"))
library("ggplot2");library("reshape2")
N = 1:50    # Calculate some different N sample sizes
obs = length(N) # Determine length

# Create a wide data set
d = data.frame(Constant = rep(1,obs),
               SquareRoot = sqrt(N),
               Logarithmic = log(N),
               Quadlogarithmic = log(N)^2,
               LogLinear = N*log(N),
               Quadratic = N^2,
               Exponential = 2^N,
               Factorial = factorial(N),
               Size = N)

# Wide to Long
d2 = melt(d, id.vars = "Size", variable.name = "Method", value.name = "RunTime")
d2 = d2[is.finite(d2$RunTime) & d2$RunTime < 1e4,]
ggplot(d2) +
  geom_line(aes(x = Size, y = RunTime, color = Method)) +
  ggtitle("Run Time vs. Sample Size") + xlab("Sample Size") + ylab("Run Time") +
  theme(plot.title = element_text(size=17),
        legend.title=element_text(size=17),
        axis.text = element_text(size=15), # Axis labels
        axis.title = element_text(size=17), # Axis names 
        legend.text = element_text(size=15)) +
  ylim(0, 150)
```


```{r}
n = 1:1000
data.frame(n = n ,
           nlogn = n*log(n))
```