Day 5: Hydrothermal Venture

Click for Problem Statement

Back to 2021


library(tidyverse)
library(here)
# for testing
# input = tibble(x = read_lines(
#   "0,9 -> 5,9
# 8,0 -> 0,8
# 9,4 -> 3,4
# 2,2 -> 2,1
# 7,0 -> 7,4
# 6,4 -> 2,0
# 0,9 -> 2,9
# 3,4 -> 1,4
# 0,0 -> 8,8
# 5,5 -> 8,2"))

path_data <- here("2021/inputs/05-input.txt")
input <- tibble(x = read_lines(path_data))

We’re drawing lines underwater to dodge some underwater dangers. Let’s go…

Setup

Before we can count the overlaps, we need to make the data all friendly. We can do a bunch of separations to split up the lines, finally getting a dataframe where each row is one line, start point and end point.

# just keep splitting, just keep splitting...
points <- input %>% 
  separate(x, into = c("p1", "p2"), sep = " -> ") %>% 
  separate(p1, into = c("x1", "y1"), sep = ",") %>% 
  separate(p2, into = c("x2", "y2"), sep = ",") %>% 
  mutate(across(where(is.character), as.numeric))

points
## # A tibble: 500 × 4
##       x1    y1    x2    y2
##    <dbl> <dbl> <dbl> <dbl>
##  1   409   872   409   963
##  2   149   412   281   280
##  3   435   281   435   362
##  4    52   208   969   208
##  5   427   265   884   265
##  6   779   741   779   738
##  7   949    41    13   977
##  8   145   690   145   180
##  9   513   665   513   869
## 10   405   174   405   612
## # … with 490 more rows

Part 1

For the first part, we only care about horizontal and vertical lines. So we can:

  • tackle each line type separately
  • filter out for specific line
  • In each case, one of the dimensions can be reduced to a single value, while the other has a min and max value
  • We can then find the difference between the two extremes
# only consider horizontal and vertical lines

lines_v <- points %>% 
  # filter and clean
  filter(x1 == x2) %>% 
  select(-x2) %>% 
  rename(x = x1) %>% 
  # by row, find extremes
  rowwise() %>% 
  mutate(ymin = min(y1,y2),
         ymax = max(y1,y2)) %>% 
  # you've done your job
  select(-c(y1, y2)) %>% 
  # find diff for interim points
  mutate(diff = ymax - ymin + 1) %>% 
  ungroup() %>% 
  # label each line
  mutate(line = row_number(), .before = x)

# same for horizontal, but swap a bunch of X's and Y's
lines_h <- points %>% 
  filter(y1 == y2) %>% 
  select(-y2) %>% 
  rename(y = y1) %>% 
  rowwise() %>% 
  mutate(xmin = min(x1,x2),
         xmax = max(x1,x2)) %>% 
  select(-c(x1, x2)) %>% 
  mutate(diff = xmax - xmin + 1) %>% 
  ungroup() %>% 
  mutate(line = row_number(), .before = y)

# example of horizontal lines
lines_h
## # A tibble: 170 × 5
##     line     y  xmin  xmax  diff
##    <int> <dbl> <dbl> <dbl> <dbl>
##  1     1   208    52   969   918
##  2     2   265   427   884   458
##  3     3   504    93   943   851
##  4     4   336    35   911   877
##  5     5   709   371   863   493
##  6     6   659    30   432   403
##  7     7   663   213   890   678
##  8     8   864   264   606   343
##  9     9   463   171   395   225
## 10    10   906    65   258   194
## # … with 160 more rows

Now we want to expand each line, making a dataframe that adds in all of the interim points. We can use uncount, which is awesome!

  • repeat the rows with uncount by the diff
  • for each line, find the increment for interim points
  • create the interim points and tidy up
# make the lines into sets of points

vert_line_points <- lines_v %>% 
  # uncount, super awesome!
  uncount(diff) %>% 
  # for each line, create increment
  group_by(line) %>% 
  mutate(increment = row_number() - 1) %>% 
  ungroup() %>% 
  # create the interim points
  mutate(y = ymin + increment) %>% 
  # tidy up
  mutate(type = "vertical") %>% 
  select(type, line, x, y)

# same for horizontal
horiz_line_points <- lines_h %>% 
  uncount(diff) %>% 
  group_by(line) %>% 
  mutate(increment = row_number() - 1) %>% 
  ungroup() %>% 
  mutate(x = xmin + increment) %>% 
  mutate(type = "horizontal") %>% 
  select(type, line, x, y)

# example of all vertical points
vert_line_points
## # A tibble: 52,184 × 4
##    type      line     x     y
##    <chr>    <int> <dbl> <dbl>
##  1 vertical     1   409   872
##  2 vertical     1   409   873
##  3 vertical     1   409   874
##  4 vertical     1   409   875
##  5 vertical     1   409   876
##  6 vertical     1   409   877
##  7 vertical     1   409   878
##  8 vertical     1   409   879
##  9 vertical     1   409   880
## 10 vertical     1   409   881
## # … with 52,174 more rows

Now we can find all of the overlaps! We can combine the two dataframes of points and look for x/y pairs that repeat. If any repeat two or more times, we count them.

# combine points
all_points <- bind_rows(horiz_line_points, vert_line_points)

# find large overlaps
overlaps_p1 <- all_points %>% 
  count(x,y, sort = TRUE, name = "overlaps") %>% 
  filter(overlaps >= 2)

overlaps_p1
## # A tibble: 7,436 × 3
##        x     y overlaps
##    <dbl> <dbl>    <int>
##  1    81   615        4
##  2    85   615        4
##  3   437   265        4
##  4   437   302        4
##  5   478   364        4
##  6   582   479        4
##  7   582   709        4
##  8   591   815        4
##  9    60   615        3
## 10    64   615        3
## # … with 7,426 more rows

We see we have 7436 rows. But now we need to include diagonals…

Part 2

Note to myself late at night:

Add in the diagonals. Need to take the slope into account when creating interim points. Have everything setup. Too tired, will try tomorrow when I’m more well rested. Zzz…

Tried to do this late at night, but was too tired to think straight. Was much clearer in the morning and I had everything mise en place for myself!

For the diagonal lines, we need to alter our method a bit to deal with each type of diagonal. We can make the dataframe of lines like before, but we will add in a slope term to distinguish each diagonal type.

lines_d <- points %>% 
  # the one's that aren't horizontal
  filter(!(x1 == x2 | y1 == y2)) %>% 
  # by row, find extremes and interim differences
  rowwise() %>% 
  mutate(xmin = min(x1,x2),
         xmax = max(x1,x2),
         ymin = min(y1,y2),
         ymax = max(y1,y2)) %>% 
  mutate(diff = xmax - xmin + 1) %>% 
  ungroup() %>% 
  # label lines
  mutate(line = row_number(), .before = x1) %>% 
  # add in the slope for orientation
  mutate(slope = (y2-y1) / (x2-x1)) %>% 
  # you've done your job
  select(-c(x1, x2, y1, y2))

lines_d
## # A tibble: 177 × 7
##     line  xmin  xmax  ymin  ymax  diff slope
##    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1   149   281   280   412   133    -1
##  2     2    13   949    41   977   937    -1
##  3     3   230   570   468   808   341    -1
##  4     4    29   981    10   962   953     1
##  5     5   301   955   309   963   655    -1
##  6     6   587   652   362   427    66     1
##  7     7    96   756    68   728   661    -1
##  8     8   315   854   403   942   540     1
##  9     9   766   912   639   785   147     1
## 10    10   815   959   588   732   145     1
## # … with 167 more rows

Now when we add in the interim points, we just need to check the slope. From our perspective, the y value is always increasing, so we can add the increment like above. For the x value, we just check the slope and either start at the min value and add or the max value and subtract. And since the diagonals are always 45 degrees, we can increment/decrement by one each time. Tada!

# same as above, but check diagonal
diag_line_points <- lines_d %>% 
  uncount(diff) %>% 
  group_by(line) %>% 
  mutate(increment = row_number() - 1) %>% 
  ungroup() %>% 
  mutate(y = ymin + increment) %>% 
  # left or right diagonal
  mutate(x = ifelse(
    slope > 0,
    xmin + increment,
    xmax - increment
  )) %>% 
  # tidy up
  mutate(type = "diagonal") %>% 
  select(type, line, x, y)

diag_line_points
## # A tibble: 83,108 × 4
##    type      line     x     y
##    <chr>    <int> <dbl> <dbl>
##  1 diagonal     1   281   280
##  2 diagonal     1   280   281
##  3 diagonal     1   279   282
##  4 diagonal     1   278   283
##  5 diagonal     1   277   284
##  6 diagonal     1   276   285
##  7 diagonal     1   275   286
##  8 diagonal     1   274   287
##  9 diagonal     1   273   288
## 10 diagonal     1   272   289
## # … with 83,098 more rows

And then tack on the diagonals and check for large overlaps:

# with diagonals
all_points2 <- bind_rows(
  horiz_line_points, 
  vert_line_points,
  diag_line_points
)

# overlaps with diagonals
overlaps_p2 <- all_points2 %>% 
  count(x,y, sort = TRUE, name = "overlaps") %>% 
  filter(overlaps >= 2)

overlaps_p2
## # A tibble: 21,104 × 3
##        x     y overlaps
##    <dbl> <dbl>    <int>
##  1   520   479        6
##  2   724   275        6
##  3   336   663        5
##  4   340   659        5
##  5   348   651        5
##  6   366   633        5
##  7   384   615        5
##  8   398   504        5
##  9   398   601        5
## 10   405   594        5
## # … with 21,094 more rows

We see we have 21104 rows.

All Done!

And there we go! All overlaps accounted for. Hope you learned something!

How would you do it? What’s your shortcut? Please share!

Till next time!