Functions

Mini-Lecture 8

Ben Baumer

Smith College

2024-09-26

Slack review

Whither map()?

if map() was created as the easier alternative to all the apply()s, why are there so many variations of it??

  • The variations control type
  • map() always returns a list
  • Stay tuned for Ch. 9

Who runs R?

LOL

“Run the following code in your head, then confirm the result by running the code.

–Hadley Wickham (p. 116)

Non-exported functions

library(tidyverse)
body(tibble)
{
    xs <- quos(...)
    tibble_quos(xs, .rows, .name_repair)
}
body(tibble::tibble_quos)
Error: 'tibble_quos' is not an exported object from 'namespace:tibble'
body(tibble:::tibble_quos)
{
    col_names <- given_col_names <- names2(xs)
    empty_col_names <- which(col_names == "")
    col_names[empty_col_names] <- names(quos_auto_name(xs[empty_col_names]))
    lengths <- rep_along(xs, 0L)
    output <- rep_along(xs, list(NULL))
    env <- new_environment()
    mask <- new_data_mask_with_data(env)
    first_size <- .rows
    for (j in seq_along(xs)) {
        res <- eval_tidy(xs[[j]], mask)
        if (!is.null(res)) {
            if (single_row) {
                if (vec_is(res)) {
                  if (vec_size(res) != 1) {
                    abort_tibble_row_size_one(j, given_col_names[[j]], 
                      vec_size(res))
                  }
                }
                else {
                  res <- list(res)
                }
            }
            else {
                res <- check_valid_col(res, col_names[[j]], j, 
                  call)
                lengths[[j]] <- current_size <- vec_size(res)
                if (is.null(first_size)) {
                  first_size <- current_size
                }
                else if (first_size == 1L && current_size != 
                  1L) {
                  idx_to_fix <- seq2(1L, j - 1L)
                  output[idx_to_fix] <- fixed_output <- map(output[idx_to_fix], 
                    vec_recycle, current_size)
                  map2(output[idx_to_fix], col_names[idx_to_fix], 
                    add_to_env2, env = env)
                  first_size <- current_size
                }
                else {
                  res <- vectbl_recycle_rows(res, first_size, 
                    j, given_col_names[[j]], call)
                }
            }
            output[[j]] <- res
            col_names[[j]] <- add_to_env2(res, given_col_names[[j]], 
                col_names[[j]], env)
        }
    }
    names(output) <- col_names
    is_null <- map_lgl(output, is.null)
    output <- output[!is_null]
    output <- splice_dfs(output)
    output <- set_repaired_names(output, repair_hint = TRUE, 
        .name_repair = .name_repair, call = call)
    new_tibble(output, nrow = first_size %||% 0L)
}

Name masking

  • R looks for the values of names in a sequence of environments
y <- -3:3

dummy_func <- function(x) {
  sd(x)
}
dummy_func(y)
[1] 2.160247
sd(y)
[1] 2.160247

A fresh start

  • R doesn’t remember what happened inside the execution of a function
y <- -3:3

dummy_func <- function(x) {
  # mask stats::sd() with a local nonsense function!
  sd <- function(x) 1.96 * x
  sd(x)
}
dummy_func(y)
[1] -5.88 -3.92 -1.96  0.00  1.96  3.92  5.88
# the real sd() still works (outside the function)
sd(y)
[1] 2.160247

Dynamic lookup

  • A function can work (or not work) differently based on changes in an environment
y <- -3:3
sd <- function(x) 1.96 * x

dummy_func <- function(x) {
  sd(x)
}
dummy_func(y)
[1] -5.88 -3.92 -1.96  0.00  1.96  3.92  5.88
# stats::sd() is masked by our nonsense function
sd(y)
[1] -5.88 -3.92 -1.96  0.00  1.96  3.92  5.88

Always safer to be explicit

y <- -3:3
sd <- function(x) 1.96 * x

dummy_func <- function(x) {
  sd <- function(x) x^2
  sd(x)
}
dummy_func(y)
[1] 9 4 1 0 1 4 9
sd(y)
[1] -5.88 -3.92 -1.96  0.00  1.96  3.92  5.88
stats::sd(y)
[1] 2.160247

Now

Work on