tidyverse · romainfrancois · Nov 4, 2020 · Nov 5, 2020 · Nov 5, 2020 · Nov 5, 2020
diff --git a/R/rows.R b/R/rows.R
@@ -11,8 +11,7 @@
 #'
 #' * `rows_insert()` adds new rows (like `INSERT`); key values in `y` must
 #'    not occur in `x`.
-#' * `rows_update()` modifies existing rows (like `UPDATE`); key values in
-#'   `y` must occur in `x`.
+#' * `rows_update()` modifies existing rows (like `UPDATE`)
 #' * `rows_patch()` works like `rows_update()` but only overwrites `NA` values.
 #' * `rows_upsert()` inserts or updates depending on whether or not the
 #'   key value in `y` already exists in `x`.
@@ -82,15 +81,16 @@ rows_insert <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
 #' @export
 rows_insert.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
   key <- rows_check_key(by, x, y)
+  rows_check_subset(x, y)
   y <- auto_copy(x, y, copy = copy)
   rows_df_in_place(in_place)
 
-  rows_check_key_df(x, key, df_name = "x")
-  rows_check_key_df(y, key, df_name = "y")
+  rows_check_key_names_df(x, key, df_name = "x")
+  rows_check_key_names_df(y, key, df_name = "y")
 
-  idx <- vctrs::vec_match(y[key], x[key])
-  bad <- which(!is.na(idx))
-  if (has_length(bad)) {
+  rows_check_key_unique_df(y, key, df_name = "y")
+
+  if (any(vctrs::vec_in(y[key], x[key]))) {
     abort("Attempting to insert duplicate rows.")
   }
 
@@ -109,19 +109,20 @@ rows_update <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
 #' @export
 rows_update.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
   key <- rows_check_key(by, x, y)
+  rows_check_subset(x, y)
   y <- auto_copy(x, y, copy = copy)
   rows_df_in_place(in_place)
 
-  rows_check_key_df(x, key, df_name = "x")
-  rows_check_key_df(y, key, df_name = "y")
-  idx <- vctrs::vec_match(y[key], x[key])
+  rows_check_key_names_df(x, key, df_name = "x")
+  rows_check_key_names_df(y, key, df_name = "y")
 
-  bad <- which(is.na(idx))
-  if (has_length(bad)) {
-    abort("Attempting to update missing rows.")
-  }
+  rows_check_key_unique_df(y, key, df_name = "y")
 
-  x[idx, names(y)] <- y
+  idx <- vctrs::vec_match(x[key], y[key])
+  pos <- which(!is.na(idx))
+  idx <- idx[pos]
+
+  x[pos, names(y)] <- y[idx, ]
   x
 }
 
@@ -137,21 +138,20 @@ rows_patch <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
 #' @export
 rows_patch.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
   key <- rows_check_key(by, x, y)
+  rows_check_subset(x, y)
   y <- auto_copy(x, y, copy = copy)
   rows_df_in_place(in_place)
 
-  rows_check_key_df(x, key, df_name = "x")
-  rows_check_key_df(y, key, df_name = "y")
-  idx <- vctrs::vec_match(y[key], x[key])
+  rows_check_key_names_df(x, key, df_name = "x")
+  rows_check_key_names_df(y, key, df_name = "y")
 
-  bad <- which(is.na(idx))
-  if (has_length(bad)) {
-    abort("Attempting to patch missing rows.")
-  }
+  rows_check_key_unique_df(y, key, df_name = "y")
 
-  new_data <- map2(x[idx, names(y)], y, coalesce)
+  idx <- vctrs::vec_match(x[key], y[key])
+  pos <- which(!is.na(idx))
+  idx <- idx[pos]
 
-  x[idx, names(y)] <- new_data
+  x[pos, names(y)] <- map2(x[pos, names(y)], y[idx, ], coalesce)
   x
 }
 
@@ -167,18 +167,30 @@ rows_upsert <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
 #' @export
 rows_upsert.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
   key <- rows_check_key(by, x, y)
+  rows_check_subset(x, y)
   y <- auto_copy(x, y, copy = copy)
   rows_df_in_place(in_place)
 
-  rows_check_key_df(x, key, df_name = "x")
-  rows_check_key_df(y, key, df_name = "y")
-  idx <- vctrs::vec_match(y[key], x[key])
-  new <- is.na(idx)
-  idx_existing <- idx[!new]
-  idx_new <- idx[new]
+  rows_check_key_names_df(x, key, df_name = "x")
+  rows_check_key_names_df(y, key, df_name = "y")
+
+  rows_check_key_unique_df(y, key, df_name = "y")
+
+  # update
+  idx <- vctrs::vec_match(x[key], y[key])
+  pos <- which(!is.na(idx))
+  if (length(pos)) {
+    idx <- idx[pos]
+    x[pos, names(y)] <- y[idx, ]
+  }
+
+  # and insert
+  pos_insert <- which(!vctrs::vec_in(y[key], x[key]))
+  if (length(pos_insert)) {
+    x <- rows_bind(x, vec_slice(y, pos_insert))
+  }
+  x
 
-  x[idx_existing, names(y)] <- vec_slice(y, !new)
-  rows_bind(x, vec_slice(y, new))
 }
 
 #' @rdname rows
@@ -193,11 +205,16 @@ rows_delete <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
 #' @export
 rows_delete.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) {
   key <- rows_check_key(by, x, y)
+  rows_check_subset(x, y)
+
   y <- auto_copy(x, y, copy = copy)
   rows_df_in_place(in_place)
 
-  rows_check_key_df(x, key, df_name = "x")
-  rows_check_key_df(y, key, df_name = "y")
+  rows_check_key_names_df(x, key, df_name = "x")
+  rows_check_key_unique_df(x, key, df_name = "x")
+
+  rows_check_key_names_df(y, key, df_name = "y")
+  rows_check_key_unique_df(y, key, df_name = "y")
 
   extra_cols <- setdiff(names(y), key)
   if (has_length(extra_cols)) {
@@ -206,14 +223,7 @@ rows_delete.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place
     )
   }
 
-  idx <- vctrs::vec_match(y[key], x[key])
-
-  bad <- which(is.na(idx))
-  if (has_length(bad)) {
-    abort("Attempting to delete missing rows.")
-  }
-
-  dplyr_row_slice(x, -idx)
+  x[which(!vctrs::vec_in(x[key], y[key])), ]
 }
 
 # helpers -----------------------------------------------------------------
@@ -234,19 +244,24 @@ rows_check_key <- function(by, x, y) {
     abort("`by` must be unnamed.")
   }
 
+  by
+}
+
+rows_check_subset <- function(x, y) {
   bad <- setdiff(colnames(y), colnames(x))
   if (has_length(bad)) {
     abort("All columns in `y` must exist in `x`.")
   }
-
-  by
 }
 
-rows_check_key_df <- function(df, by, df_name) {
+rows_check_key_names_df <- function(df, by, df_name) {
   y_miss <- setdiff(by, colnames(df))
   if (length(y_miss) > 0) {
     abort(glue("All `by` columns must exist in `{df_name}`."))
   }
+}
+
+rows_check_key_unique_df <- function(df, by, df_name) {
   if (vctrs::vec_duplicate_any(df[by])) {
     abort(glue("`{df_name}` key values are not unique."))
   }

diff --git a/tests/testthat/test-rows.R b/tests/testthat/test-rows.R
@@ -20,17 +20,29 @@ test_that("rows_update()", {
     tibble(a = 1:3, b = c("a", "z", "z"), c = data$c)
   )
 
-  expect_error(
-    rows_update(data, tibble(a = 2:3, b = "z"), by = c("a", "b")),
-    "update missing"
-  )
-
   expect_silent(
     expect_identical(
       rows_update(data, tibble(b = "z", a = 2:3), by = "a"),
       tibble(a = 1:3, b = c("a", "z", "z"), c = data$c)
     )
   )
+
+  expect_error(
+    rows_update(data, tibble(d = 1)),
+    "must exist"
+  )
+  expect_error(
+    rows_update(data, tibble(a = c(1, 1))),
+    "not unique"
+  )
+  expect_error(
+    rows_update(data, tibble(a = 1), by = "b"),
+    "must exist"
+  )
+  expect_error(
+    rows_update(data, tibble(a = 1), by = c(b = "a")),
+    "must be unnamed"
+  )
 })
 
 test_that("rows_patch()", {
@@ -41,17 +53,29 @@ test_that("rows_patch()", {
     tibble(a = 1:3, b = c("a", "b", "z"), c = data$c)
   )
 
-  expect_error(
-    rows_patch(data, tibble(a = 2:3, b = "z"), by = c("a", "b")),
-    "patch missing"
-  )
-
   expect_silent(
     expect_identical(
       rows_patch(data, tibble(b = "z", a = 2:3), by = "a"),
       tibble(a = 1:3, b = c("a", "b", "z"), c = data$c)
     )
   )
+
+  expect_error(
+    rows_patch(data, tibble(d = 1)),
+    "must exist"
+  )
+  expect_error(
+    rows_patch(data, tibble(a = c(1, 1))),
+    "not unique"
+  )
+  expect_error(
+    rows_patch(data, tibble(a = 1), by = "b"),
+    "must exist"
+  )
+  expect_error(
+    rows_patch(data, tibble(a = 1), by = c(b = "a")),
+    "must be unnamed"
+  )
 })
 
 test_that("rows_upsert()", {
@@ -61,6 +85,23 @@ test_that("rows_upsert()", {
     rows_upsert(data, tibble(a = 2:4, b = "z")),
     tibble(a = 1:4, b = c("a", "z", "z", "z"), c = c(data$c, NA))
   )
+
+  expect_error(
+    rows_upsert(data, tibble(d = 1)),
+    "must exist"
+  )
+  expect_error(
+    rows_upsert(data, tibble(a = c(1, 1))),
+    "not unique"
+  )
+  expect_error(
+    rows_upsert(data, tibble(a = 1), by = "b"),
+    "must exist"
+  )
+  expect_error(
+    rows_upsert(data, tibble(a = 1), by = c(b = "a")),
+    "must be unnamed"
+  )
 })
 
 test_that("rows_delete()", {
@@ -71,19 +112,14 @@ test_that("rows_delete()", {
     data[1, ]
   )
 
-  expect_error(
-    rows_delete(data, tibble(a = 2:4)),
-    "delete missing"
-  )
-
   expect_identical(
     rows_delete(data, tibble(a = 2:3, b = "b")),
     data[1, ]
   )
 
-  expect_error(
-    rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b")),
-    "delete missing"
+  expect_message(
+    rows_delete(data, tibble(a = 2:3, b = "b")),
+    "Ignoring extra columns"
   )
 })
 
@@ -97,12 +133,10 @@ verify_output("test-rows.txt", {
 
   "# Update"
   rows_update(data, tibble(a = 2:3, b = "z"))
-  rows_update(data, tibble(a = 2:3, b = "z"), by = c("a", "b"))
   rows_update(data, tibble(b = "z", a = 2:3), by = "a")
 
   "# Variants: patch and upsert"
   rows_patch(data, tibble(a = 2:3, b = "z"))
-  rows_patch(data, tibble(a = 2:3, b = "z"), by = c("a", "b"))
   rows_upsert(data, tibble(a = 2:4, b = "z"))
 
   "# Delete and truncate"
@@ -112,9 +146,7 @@ verify_output("test-rows.txt", {
   rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b"))
 
   "# Errors"
-  rows_insert(data[c(1, 1), ], tibble(a = 3))
   rows_insert(data, tibble(a = c(4, 4)))
-
   rows_insert(data, tibble(d = 4))
   rows_insert(data, tibble(a = 4, b = "z"), by = "e")
 })
diff --git a/tests/testthat/test-rows.txt b/tests/testthat/test-rows.txt
@@ -41,9 +41,6 @@ Message: Matching, by = "a"
 2     2 z       1.5
 3     3 z       2.5
 
-> rows_update(data, tibble(a = 2:3, b = "z"), by = c("a", "b"))
-Error: Attempting to update missing rows.
-
 > rows_update(data, tibble(b = "z", a = 2:3), by = "a")
 # A tibble: 3 x 3
       a b         c
@@ -66,9 +63,6 @@ Message: Matching, by = "a"
 2     2 b       1.5
 3     3 z       2.5
 
-> rows_patch(data, tibble(a = 2:3, b = "z"), by = c("a", "b"))
-Error: Attempting to patch missing rows.
-
 > rows_upsert(data, tibble(a = 2:4, b = "z"))
 Message: Matching, by = "a"
 
@@ -95,7 +89,10 @@ Message: Matching, by = "a"
 > rows_delete(data, tibble(a = 2:4))
 Message: Matching, by = "a"
 
-Error: Attempting to delete missing rows.
+# A tibble: 1 x 3
+      a b         c
+  <int> <chr> <dbl>
+1     1 a       0.5
 
 > rows_delete(data, tibble(a = 2:3, b = "b"))
 Message: Matching, by = "a"
@@ -108,17 +105,16 @@ Message: Ignoring extra columns: b
 1     1 a       0.5
 
 > rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b"))
-Error: Attempting to delete missing rows.
+# A tibble: 2 x 3
+      a b         c
+  <int> <chr> <dbl>
+1     1 a       0.5
+2     3 <NA>    2.5
 
 
 Errors
 ======
 
-> rows_insert(data[c(1, 1), ], tibble(a = 3))
-Message: Matching, by = "a"
-
-Error: `x` key values are not unique.
-
 > rows_insert(data, tibble(a = c(4, 4)))
 Message: Matching, by = "a"