library(tidyverse)
library(here)
# for testing
# input = tibble(x = read_lines(
# "0,9 -> 5,9
# 8,0 -> 0,8
# 9,4 -> 3,4
# 2,2 -> 2,1
# 7,0 -> 7,4
# 6,4 -> 2,0
# 0,9 -> 2,9
# 3,4 -> 1,4
# 0,0 -> 8,8
# 5,5 -> 8,2"))
path_data <- here("2021/inputs/05-input.txt")
input <- tibble(x = read_lines(path_data))
We’re drawing lines underwater to dodge some underwater dangers. Let’s go…
Before we can count the overlaps, we need to make the data all friendly. We can do a bunch of separations to split up the lines, finally getting a dataframe where each row is one line, start point and end point.
# just keep splitting, just keep splitting...
points <- input %>%
separate(x, into = c("p1", "p2"), sep = " -> ") %>%
separate(p1, into = c("x1", "y1"), sep = ",") %>%
separate(p2, into = c("x2", "y2"), sep = ",") %>%
mutate(across(where(is.character), as.numeric))
points
## # A tibble: 500 × 4
## x1 y1 x2 y2
## <dbl> <dbl> <dbl> <dbl>
## 1 409 872 409 963
## 2 149 412 281 280
## 3 435 281 435 362
## 4 52 208 969 208
## 5 427 265 884 265
## 6 779 741 779 738
## 7 949 41 13 977
## 8 145 690 145 180
## 9 513 665 513 869
## 10 405 174 405 612
## # … with 490 more rows
For the first part, we only care about horizontal and vertical lines. So we can:
# only consider horizontal and vertical lines
lines_v <- points %>%
# filter and clean
filter(x1 == x2) %>%
select(-x2) %>%
rename(x = x1) %>%
# by row, find extremes
rowwise() %>%
mutate(ymin = min(y1,y2),
ymax = max(y1,y2)) %>%
# you've done your job
select(-c(y1, y2)) %>%
# find diff for interim points
mutate(diff = ymax - ymin + 1) %>%
ungroup() %>%
# label each line
mutate(line = row_number(), .before = x)
# same for horizontal, but swap a bunch of X's and Y's
lines_h <- points %>%
filter(y1 == y2) %>%
select(-y2) %>%
rename(y = y1) %>%
rowwise() %>%
mutate(xmin = min(x1,x2),
xmax = max(x1,x2)) %>%
select(-c(x1, x2)) %>%
mutate(diff = xmax - xmin + 1) %>%
ungroup() %>%
mutate(line = row_number(), .before = y)
# example of horizontal lines
lines_h
## # A tibble: 170 × 5
## line y xmin xmax diff
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 208 52 969 918
## 2 2 265 427 884 458
## 3 3 504 93 943 851
## 4 4 336 35 911 877
## 5 5 709 371 863 493
## 6 6 659 30 432 403
## 7 7 663 213 890 678
## 8 8 864 264 606 343
## 9 9 463 171 395 225
## 10 10 906 65 258 194
## # … with 160 more rows
Now we want to expand each line, making a dataframe that adds in all of the interim points. We can use uncount
, which is awesome!
diff
# make the lines into sets of points
vert_line_points <- lines_v %>%
# uncount, super awesome!
uncount(diff) %>%
# for each line, create increment
group_by(line) %>%
mutate(increment = row_number() - 1) %>%
ungroup() %>%
# create the interim points
mutate(y = ymin + increment) %>%
# tidy up
mutate(type = "vertical") %>%
select(type, line, x, y)
# same for horizontal
horiz_line_points <- lines_h %>%
uncount(diff) %>%
group_by(line) %>%
mutate(increment = row_number() - 1) %>%
ungroup() %>%
mutate(x = xmin + increment) %>%
mutate(type = "horizontal") %>%
select(type, line, x, y)
# example of all vertical points
vert_line_points
## # A tibble: 52,184 × 4
## type line x y
## <chr> <int> <dbl> <dbl>
## 1 vertical 1 409 872
## 2 vertical 1 409 873
## 3 vertical 1 409 874
## 4 vertical 1 409 875
## 5 vertical 1 409 876
## 6 vertical 1 409 877
## 7 vertical 1 409 878
## 8 vertical 1 409 879
## 9 vertical 1 409 880
## 10 vertical 1 409 881
## # … with 52,174 more rows
Now we can find all of the overlaps! We can combine the two dataframes of points and look for x/y pairs that repeat. If any repeat two or more times, we count them.
# combine points
all_points <- bind_rows(horiz_line_points, vert_line_points)
# find large overlaps
overlaps_p1 <- all_points %>%
count(x,y, sort = TRUE, name = "overlaps") %>%
filter(overlaps >= 2)
overlaps_p1
## # A tibble: 7,436 × 3
## x y overlaps
## <dbl> <dbl> <int>
## 1 81 615 4
## 2 85 615 4
## 3 437 265 4
## 4 437 302 4
## 5 478 364 4
## 6 582 479 4
## 7 582 709 4
## 8 591 815 4
## 9 60 615 3
## 10 64 615 3
## # … with 7,426 more rows
We see we have 7436 rows. But now we need to include diagonals…
Note to myself late at night:
Add in the diagonals. Need to take the slope into account when creating interim points. Have everything setup. Too tired, will try tomorrow when I’m more well rested. Zzz…
Tried to do this late at night, but was too tired to think straight. Was much clearer in the morning and I had everything mise en place for myself!
For the diagonal lines, we need to alter our method a bit to deal with each type of diagonal. We can make the dataframe of lines like before, but we will add in a slope term to distinguish each diagonal type.
lines_d <- points %>%
# the one's that aren't horizontal
filter(!(x1 == x2 | y1 == y2)) %>%
# by row, find extremes and interim differences
rowwise() %>%
mutate(xmin = min(x1,x2),
xmax = max(x1,x2),
ymin = min(y1,y2),
ymax = max(y1,y2)) %>%
mutate(diff = xmax - xmin + 1) %>%
ungroup() %>%
# label lines
mutate(line = row_number(), .before = x1) %>%
# add in the slope for orientation
mutate(slope = (y2-y1) / (x2-x1)) %>%
# you've done your job
select(-c(x1, x2, y1, y2))
lines_d
## # A tibble: 177 × 7
## line xmin xmax ymin ymax diff slope
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 149 281 280 412 133 -1
## 2 2 13 949 41 977 937 -1
## 3 3 230 570 468 808 341 -1
## 4 4 29 981 10 962 953 1
## 5 5 301 955 309 963 655 -1
## 6 6 587 652 362 427 66 1
## 7 7 96 756 68 728 661 -1
## 8 8 315 854 403 942 540 1
## 9 9 766 912 639 785 147 1
## 10 10 815 959 588 732 145 1
## # … with 167 more rows
Now when we add in the interim points, we just need to check the slope. From our perspective, the y value is always increasing, so we can add the increment like above. For the x value, we just check the slope and either start at the min value and add or the max value and subtract. And since the diagonals are always 45 degrees, we can increment/decrement by one each time. Tada!
# same as above, but check diagonal
diag_line_points <- lines_d %>%
uncount(diff) %>%
group_by(line) %>%
mutate(increment = row_number() - 1) %>%
ungroup() %>%
mutate(y = ymin + increment) %>%
# left or right diagonal
mutate(x = ifelse(
slope > 0,
xmin + increment,
xmax - increment
)) %>%
# tidy up
mutate(type = "diagonal") %>%
select(type, line, x, y)
diag_line_points
## # A tibble: 83,108 × 4
## type line x y
## <chr> <int> <dbl> <dbl>
## 1 diagonal 1 281 280
## 2 diagonal 1 280 281
## 3 diagonal 1 279 282
## 4 diagonal 1 278 283
## 5 diagonal 1 277 284
## 6 diagonal 1 276 285
## 7 diagonal 1 275 286
## 8 diagonal 1 274 287
## 9 diagonal 1 273 288
## 10 diagonal 1 272 289
## # … with 83,098 more rows
And then tack on the diagonals and check for large overlaps:
# with diagonals
all_points2 <- bind_rows(
horiz_line_points,
vert_line_points,
diag_line_points
)
# overlaps with diagonals
overlaps_p2 <- all_points2 %>%
count(x,y, sort = TRUE, name = "overlaps") %>%
filter(overlaps >= 2)
overlaps_p2
## # A tibble: 21,104 × 3
## x y overlaps
## <dbl> <dbl> <int>
## 1 520 479 6
## 2 724 275 6
## 3 336 663 5
## 4 340 659 5
## 5 348 651 5
## 6 366 633 5
## 7 384 615 5
## 8 398 504 5
## 9 398 601 5
## 10 405 594 5
## # … with 21,094 more rows
We see we have 21104 rows.
And there we go! All overlaps accounted for. Hope you learned something!
How would you do it? What’s your shortcut? Please share!
Till next time!