library(move2)
library(dplyr)
library(units)
library(sf)
Download example data and select columns to reduce printing.
movebank_download_study(2911040,
galapagos_albatrosses <-attributes = c(
"ground_speed",
"heading",
"height_above_ellipsoid",
"eobs_temperature",
"individual_local_identifier"
)%>%
) select_track_data(study_site, weight, animal_life_stage)
%>%
galapagos_albatrosses filter(!st_is_empty(.))
#> A <move2> object containing 28 tracks consisting of:
#> Simple feature collection with 16028 features and 6 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 16,028 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 16,023 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
First location each 6 hour window
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
mt_filter_per_interval(unit = "6 hours")
#> A <move2> object containing 28 tracks consisting of:
#> Simple feature collection with 4109 features and 6 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3296 ymin: -12.79464 xmax: -77.52837 ymax: 0.1814998
#> Geodetic CRS: WGS 84
#> # A tibble: 4,109 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0.2 9.83 24.8 18
#> 3 0.32 334. 14.8 15
#> 4 0.08 330. 10.4 11
#> 5 0.1 10.5 8.6 12
#> # ℹ 4,104 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
Random location each day
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
mt_filter_per_interval(criterion = "random", unit = "days")
#> A <move2> object containing 28 tracks consisting of:
#> Simple feature collection with 1057 features and 6 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.0672 ymin: -12.35742 xmax: -77.56545 ymax: 0.1684183
#> Geodetic CRS: WGS 84
#> # A tibble: 1,057 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.2 9.83 24.8 18
#> 2 0.04 343. 15.3 13
#> 3 0.85 309. -2.9 11
#> 4 0.3 353. 0.5 12
#> 5 7.41 136. -4.4 12
#> # ℹ 1,052 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
When dealing with trajectories frequently duplicated records do occur. There are many reasons these can appear ranging from the way in which data is recorded to duplicated data transmissions and uploads. These data are often stored, but for analysis they need to be removed. A simple definition of a duplicate record would be an observation at exactly the same time of the same individual. However many tracking devices record additional information such as acceleration. These records frequently have the same time as location records meaning not all records with duplicated timestamps can directly be deleted.
Duplicated records can be found in the following way:
%>%
galapagos_albatrosses group_by(mt_time(), mt_track_id()) %>%
filter(n() != 1) %>%
arrange(mt_time())
#> A <move2> object containing 28 tracks consisting of:
#> Simple feature collection with 8092 features and 8 fields (with 4066 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.24518 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 8,092 × 9
#> # Groups: mt_time(), mt_track_id() [4,046]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> [m/s] [°] [m] [°C]
#> 1 0.3 14.4 7.9 27
#> 2 NA NA NA NA
#> 3 0.55 330. 1.6 24
#> 4 NA NA NA NA
#> 5 0.15 53.1 11.4 27
#> # ℹ 8,087 more rows
#> # ℹ 5 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_time()` <dttm>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
If you are only interested in finding duplicated records where there is a location this can as follows (in this case there are none):
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
group_by(mt_time(), mt_track_id()) %>%
filter(n() != 1) %>%
arrange(mt_time())
#> A <move2> object containing 0 tracks consisting of:
#> Simple feature collection with 0 features and 8 fields
#> Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
#> Geodetic CRS: WGS 84
#> # A tibble: 0 × 9
#> # Groups: mt_time(), mt_track_id() [0]
#> # ℹ 9 variables: ground_speed [m/s], heading [°], height_above_ellipsoid [m],
#> # eobs_temperature [°C], individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <GEOMETRY [°]>, mt_time() <dttm>, mt_track_id() <fct>
#> Track features:
#> # A tibble: 0 × 4
#> # ℹ 4 variables: study_site <chr>, weight [g], animal_life_stage <fct>,
#> # individual_local_identifier <fct>
The package also has some build in functions for filtering unique records. Several strategies for omitting duplicated records are build in.
First it is possible to omit all records that are a subset of other records, i.e. records that got added later with more information are retained. This happens with some tracking devices if data gets directly downloaded from the tag. As no information is lost this is the default strategy.
mt_sim_brownian_motion(1:2)[rep(1:4, 2), ]
simulated_data <-$temperature <- c(1:3, NA, 1:2, 7:8)
simulated_data
simulated_data#> A <move2> object containing 2 tracks consisting of:
#> Simple feature collection with 8 features and 3 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -2.023305 ymin: -1.062563 xmax: 0.1393898 ymax: 0
#> CRS: NA
#> First 5 features:
#> time track geometry temperature
#> 1 1 1 POINT (0 0) 1
#> 2 2 1 POINT (-2.023305 -0.7934728) 2
#> 3 1 2 POINT (0 0) 3
#> 4 2 2 POINT (0.1393898 -1.062563) NA
#> 1.1 1 1 POINT (0 0) 1
#> Track features:
#> track
#> 1 1
#> 2 2
%>% mt_filter_unique()
simulated_data #> Warning: After removing all records that are subsets of other records there are
#> still remaining duplicates.
#> A <move2> object containing 2 tracks consisting of:
#> Simple feature collection with 5 features and 3 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -2.023305 ymin: -1.062563 xmax: 0.1393898 ymax: 0
#> CRS: NA
#> time track geometry temperature
#> 1 1 1 POINT (0 0) 1
#> 2 2 1 POINT (-2.023305 -0.7934728) 2
#> 3 1 2 POINT (0 0) 3
#> 3.1 1 2 POINT (0 0) 7
#> 4.1 2 2 POINT (0.1393898 -1.062563) 8
#> Track features:
#> track
#> 1 1
#> 2 2
This strategy how ever does not guarantee not duplicates are left, as two records might not be subsets from each other.
An alternative is to take a random record from each set of duplicates, this is not advised for formal analysis but might help for a quick inspection of data. This is also a lot quicker then inspecting subsets. How ever care needs to be taken as the example below, for example, results in empty points being retained at the cost of informative locations.
%>% mt_filter_unique("sample")
galapagos_albatrosses #> A <move2> object containing 28 tracks consisting of:
#> Simple feature collection with 110883 features and 6 fields (with 96847 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1814998
#> Geodetic CRS: WGS 84
#> # A tibble: 110,883 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 110,878 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
n
locations%>%
galapagos_albatrosses group_by(mt_track_id()) %>%
filter(n() > 500)
#> A <move2> object containing 20 tracks consisting of:
#> Simple feature collection with 112639 features and 7 fields (with 96941 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 112,639 × 8
#> # Groups: mt_track_id() [20]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 112,634 more rows
#> # ℹ 4 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 20 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 15 more rows
%>%
galapagos_albatrosses group_by(mt_track_id()) %>%
filter(as_units(diff(range(mt_time()))) > set_units(1, "week"))
#> A <move2> object containing 19 tracks consisting of:
#> Simple feature collection with 111971 features and 7 fields (with 96369 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 111,971 × 8
#> # Groups: mt_track_id() [19]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 111,966 more rows
#> # ℹ 4 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 19 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 14 more rows
st_as_sfc(st_bbox(c(
foraging_area <-xmin = -82, xmax = -77,
ymax = -0.5, ymin = -13
crs = 4326))
), library(ggplot2, quietly = TRUE)
ggplot() +
geom_sf(data = rnaturalearth::ne_coastline(returnclass = "sf", 50)) +
theme_linedraw() +
geom_sf(data = foraging_area, fill = "red", alpha = .3, color = "red") +
geom_sf(
data = galapagos_albatrosses %>% filter(!st_is_empty(.)),
aes(color = `individual_local_identifier`)
+
) coord_sf(
crs = sf::st_crs("+proj=aeqd +lon_0=-83 +lat_0=-6 +units=km"),
xlim = c(-1000, 600), ylim = c(-800, 700)
)#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, will retire in October 2023.
#> Please refer to R-spatial evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html.
#> It may be desirable to make the sf package available;
#> package maintainers should consider adding sf to Suggests:.
#> The sp package is now running under evolution status 2
#> (status 2 uses the sf package in place of rgdal)
# Filter to tracks making it at least once to the foraging area
%>%
galapagos_albatrosses group_by(mt_track_id()) %>%
filter(any(st_intersects(geometry, foraging_area, sparse = FALSE)))
#> A <move2> object containing 15 tracks consisting of:
#> Simple feature collection with 106151 features and 7 fields (with 91303 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 106,151 × 8
#> # Groups: mt_track_id() [15]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 106,146 more rows
#> # ℹ 4 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 15 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 10 more rows
%>%
galapagos_albatrosses filter_track_data(study_site == "Punta Suarez")
#> A <move2> object containing 12 tracks consisting of:
#> Simple feature collection with 38072 features and 6 fields (with 32699 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -9.087225 xmax: -78.65155 ymax: -0.6481274
#> Geodetic CRS: WGS 84
#> # A tibble: 38,072 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 38,067 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 12 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Punta Suarez 22 adult 4262-84830876
#> 2 Punta Suarez 22 adult 4270-84831217
#> 3 Punta Suarez 22 adult 4261-2228
#> 4 Punta Suarez 22 adult 4264-84830852
#> 5 Punta Suarez 22 adult 4266-84831108
#> # ℹ 7 more rows
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
mutate(
next_new_track = mt_time_lags(.) > set_units(4, "h") |
is.na(mt_time_lags(.)),
track_index = cumsum(lag(next_new_track, default = FALSE))
%>%
) mt_set_track_id("track_index")
#> A <move2> object containing 81 tracks consisting of:
#> Simple feature collection with 16028 features and 8 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 16,028 × 9
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 16,023 more rows
#> # ℹ 5 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, next_new_track <lgl>, track_index <int>
#> First 5 track features:
#> # A tibble: 81 × 4
#> track_index study_site weight animal_life_stage
#> <int> <chr> [g] <fct>
#> 1 51 Punta Cevallos 22 adult
#> 2 3 Punta Cevallos 22 adult
#> 3 4 Punta Cevallos 22 adult
#> 4 5 Punta Cevallos 22 adult
#> 5 6 Punta Cevallos 22 adult
#> # ℹ 76 more rows
library(lubridate, quietly = TRUE)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
%>%
galapagos_albatrosses mt_set_track_id(paste(mt_track_id(.),
sep = "_", month.name[month(mt_time(.))]
))#> A <move2> object containing 71 tracks consisting of:
#> Simple feature collection with 114929 features and 6 fields (with 98901 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 114,929 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 114,924 more rows
#> # ℹ 3 more variables: individual_local_identifier <chr>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 71 × 4
#> individual_local_identifier study_site weight animal_life_stage
#> <chr> <chr> [g] <fct>
#> 1 1094-1094_June Punta Cevallos 22 adult
#> 2 1103-1103_June Punta Cevallos 22 adult
#> 3 1103-1103_July Punta Cevallos 22 adult
#> 4 1163-1163_June Punta Cevallos 22 adult
#> 5 1163-1163_July Punta Cevallos 22 adult
#> # ℹ 66 more rows