## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = F, warning = FALSE ) ## ----------------------------------------------------------------------------- # # Load RuHere package # library(RuHere) ## ----eval = TRUE-------------------------------------------------------------- # Loading package occurrence data data("occurrences", package = "RuHere") # Number of records per species table(occurrences$species) ## ----------------------------------------------------------------------------- # # Standardize country names # occ_country_std <- standardize_countries( # occ = occurrences, # country_column = "country", # max_distance = 0.1, # Maximum error distance for fuzzy matching # lookup_na_country = TRUE # Try to extract country from coords if value is # # NA using the country_from_coords() function internally # ) ## ----------------------------------------------------------------------------- # # Printing first rows and columns # occ_country_std$occ[1:3, 1:5] # #> country country_suggested country_source record_id species # #> 1 AR argentina metadata gbif_5516 Araucaria angustifolia # #> 2 AR argentina metadata gbif_15849 Araucaria angustifolia # #> 3 AR argentina metadata gbif_4935 Araucaria angustifolia # # occ_country_std$report[1:5, ] # #> country country_suggested # #> 1 argentina argentina # #> 2 bolivia bolivia # #> 3 brasil brazil # #> 4 UY uruguay # #> 5 PT portugal ## ----------------------------------------------------------------------------- # # Standardize state names # occ_state_std <- standardize_states( # occ = occ_country_std$occ, # state_column = "stateProvince", # country_column = "country_suggested", # max_distance = 0.1, # lookup_na_state = TRUE # Try to extract state from coords if value is NA # ) ## ----------------------------------------------------------------------------- # occ_state_std$occ[1:3, 1:6] # #> stateProvince state_suggested state_source country_suggested country country_source # #> 1 acre acre metadata brazil brazil metadata # #> 2 acre acre metadata brazil brazil metadata # #> 3 acre acre metadata brazil brazil metadata # # occ_state_std$report[1:3, ] # #> stateProvince state_suggested country_suggested # #> 1 sa£o paulo sao paulo brazil # #> 2 tocantins tocantins brazil # #> 3 RS rio grande do sul brazil ## ----------------------------------------------------------------------------- # # Explicitly extract country from coordinates for all records # occ_with_country_xy <- country_from_coords( # occ = occ_state_std$occ, # from = "all", # 'all' extracts for every record; 'na_only' extracts for missing ones # output_column = "country_xy" # ) # # # Compare the original country vs. the one derived from coordinates # head(occ_with_country_xy[, c("country", "country_xy")]) # #> country country_xy # #> 1 brazil brazil # #> 2 brazil brazil # #> 3 brazil brazil # #> 4 BR brazil # #> 5 BR brazil # #> 6 BR brazil ## ----------------------------------------------------------------------------- # # Extract state from coordinates for all records # occ_imputed <- states_from_coords( # occ = occ_with_country_xy, # from = "all", # state_column = "stateProvince", # output_column = "state_xy" # ) # # head(occ_imputed[, c("stateProvince", "state_xy", "state_source")]) # #> stateProvince state_xy state_source # #> 1 acre acre metadata # #> 2 acre acre metadata # #> 3 acre acre metadata # #> 4 acre amazonas metadata # #> 5 acre acre metadata # #> 6 acre acre metadata ## ----------------------------------------------------------------------------- # # Check if coordinates fall within the assigned country # occ_checked_country <- check_countries( # occ = occ_imputed, # country_column = "country_suggested", # distance = 5, # Allows a 5 km buffer for border points # try_to_fix = TRUE # Automatically attempts to fix inverted/swapped coordinates # ) # #> Testing countries... # #> 468 records fall in wrong countries # #> Task 1 of 7: testing if longitude is inverted # #> 0 coordinates with longitude inverted # #> Task 2 of 7: testing if latitude is inverted # #> 0 coordinates with latitude inverted # #> Task 3 of 7: testing if longitude and latitude are inverted # #> 2 coordinates with longitude and latitude inverted # #> Task 4 of 7: testing if longitude and latitude are swapped # #> 1 coordinates with longitude and latitude swapped # #> Task 5 of 7: testing if longitude and latitude are swapped with longitude inverted # #> 0 coordinates with longitude and latitude swapped and latitude inverted # #> Task 6 of 7: testing if longitude and latitude are swapped - with latitude inverted # #> 0 coordinates with longitude and latitude swapped and longitude inverted # #> Task 7 of 7: testing if longitude and latitude are swapped - with longitude latitude inverted # #> 0 coordinates with longitude and latitude swapped and inverted # # # The 'correct_country' column indicates validity # head(occ_checked_country[, c("country_suggested", "correct_country", "country_issues")]) # #> country_suggested correct_country country_issues # #> 1 brazil TRUE correct # #> 2 brazil TRUE correct # #> 3 brazil TRUE correct # #> 4 brazil TRUE correct # #> 5 brazil TRUE correct # #> 6 brazil TRUE correct ## ----------------------------------------------------------------------------- # # Check if coordinates fall within the assigned state # occ_checked_state <- check_states( # occ = occ_checked_country, # state_column = "state_suggested", # distance = 5, # try_to_fix = FALSE # We just want to flag issues here, not auto-fix # ) # #> Testing states... # #> 87 records fall in wrong states # # head(occ_checked_state[, c("state_suggested", "correct_state")]) # #> state_suggested correct_state # #> 1 acre TRUE # #> 2 acre TRUE # #> 3 acre TRUE # #> 4 acre FALSE # #> 5 acre TRUE # #> 6 acre TRUE ## ----------------------------------------------------------------------------- # # This step is only necessary if you did NOT set try_to_fix = TRUE above # fixing_example <- fix_countries( # occ = occ_checked_country, # country_column = "country_suggested", # correct_country = "correct_country" # Column created by check_countries # ) # #> Task 1 of 7: testing if longitude is inverted # #> 0 coordinates with longitude inverted # #> Task 2 of 7: testing if latitude is inverted # #> 0 coordinates with latitude inverted # #> Task 3 of 7: testing if longitude and latitude are inverted # #> 0 coordinates with longitude and latitude inverted # #> Task 4 of 7: testing if longitude and latitude are swapped # #> 0 coordinates with longitude and latitude swapped # #> Task 5 of 7: testing if longitude and latitude are swapped with longitude inverted # #> 0 coordinates with longitude and latitude swapped and latitude inverted # #> Task 6 of 7: testing if longitude and latitude are swapped - with latitude inverted # #> 0 coordinates with longitude and latitude swapped and longitude inverted # #> Task 7 of 7: testing if longitude and latitude are swapped - with longitude latitude inverted # #> 0 coordinates with longitude and latitude swapped and inverted