R 有一个名为“state”的内置数据集,其中包含美国各州的列表。该列表中没有“哥伦比亚特区”,但您可以将其添加到列表中,然后使用该列表将“states_county”拆分为州和县,例如
library(tidyverse)
states_county_names_df <- data.frame(states_county = c(
"California San Francisco",
"New York Bronx",
"New York Kings",
"New York New York",
"New York Queens",
"New York Richmond",
"Washington King",
"Illinois Cook",
"Massachusetts Suffolk",
"District of Columbia District of Columbia"
)
)
data(state)
state.name
#> [1] "Alabama" "Alaska" "Arizona" "Arkansas"
#> [5] "California" "Colorado" "Connecticut" "Delaware"
#> [9] "Florida" "Georgia" "Hawaii" "Idaho"
#> [13] "Illinois" "Indiana" "Iowa" "Kansas"
#> [17] "Kentucky" "Louisiana" "Maine" "Maryland"
#> [21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
#> [25] "Missouri" "Montana" "Nebraska" "Nevada"
#> [29] "New Hampshire" "New Jersey" "New Mexico" "New York"
#> [33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
#> [37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
#> [41] "South Dakota" "Tennessee" "Texas" "Utah"
#> [45] "Vermont" "Virginia" "Washington" "West Virginia"
#> [49] "Wisconsin" "Wyoming"
states_inc_Columbia <- c(state.name, "District of Columbia")
states_county_names_df %>%
mutate(state = str_extract(states_county, paste(states_inc_Columbia, collapse = "|")),
county = str_remove(states_county, paste(states_inc_Columbia, collapse = "|")))
#> states_county state
#> 1 California San Francisco California
#> 2 New York Bronx New York
#> 3 New York Kings New York
#> 4 New York New York New York
#> 5 New York Queens New York
#> 6 New York Richmond New York
#> 7 Washington King Washington
#> 8 Illinois Cook Illinois
#> 9 Massachusetts Suffolk Massachusetts
#> 10 District of Columbia District of Columbia District of Columbia
#> county
#> 1 San Francisco
#> 2 Bronx
#> 3 Kings
#> 4 New York
#> 5 Queens
#> 6 Richmond
#> 7 King
#> 8 Cook
#> 9 Suffolk
#> 10 District of Columbia
Created on 2022-03-16 by the reprex package https://reprex.tidyverse.org (v2.0.1)
还有一个基本的 R 选项:
states_county_names_df <- data.frame(states_county = c(
"California San Francisco",
"New York Bronx",
"New York Kings",
"New York New York",
"New York Queens",
"New York Richmond",
"Washington King",
"Illinois Cook",
"Massachusetts Suffolk",
"District of Columbia District of Columbia"
)
)
data(state)
state.name
#> [1] "Alabama" "Alaska" "Arizona" "Arkansas"
#> [5] "California" "Colorado" "Connecticut" "Delaware"
#> [9] "Florida" "Georgia" "Hawaii" "Idaho"
#> [13] "Illinois" "Indiana" "Iowa" "Kansas"
#> [17] "Kentucky" "Louisiana" "Maine" "Maryland"
#> [21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
#> [25] "Missouri" "Montana" "Nebraska" "Nevada"
#> [29] "New Hampshire" "New Jersey" "New Mexico" "New York"
#> [33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
#> [37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
#> [41] "South Dakota" "Tennessee" "Texas" "Utah"
#> [45] "Vermont" "Virginia" "Washington" "West Virginia"
#> [49] "Wisconsin" "Wyoming"
states_inc_Columbia <- c(state.name, "District of Columbia")
states_county_names_df$state <- sapply(regmatches(x = states_county_names_df$states_county,
regexec(paste(states_inc_Columbia, collapse = "|"),
states_county_names_df$states_county)),
"[", 1)
states_county_names_df$county <- sub(x = states_county_names_df$states_county,
pattern = paste(states_inc_Columbia, collapse = "|"),
replacement = "", ignore.case = TRUE)
states_county_names_df
#> states_county state
#> 1 California San Francisco California
#> 2 New York Bronx New York
#> 3 New York Kings New York
#> 4 New York New York New York
#> 5 New York Queens New York
#> 6 New York Richmond New York
#> 7 Washington King Washington
#> 8 Illinois Cook Illinois
#> 9 Massachusetts Suffolk Massachusetts
#> 10 District of Columbia District of Columbia District of Columbia
#> county
#> 1 San Francisco
#> 2 Bronx
#> 3 Kings
#> 4 New York
#> 5 Queens
#> 6 Richmond
#> 7 King
#> 8 Cook
#> 9 Suffolk
#> 10 District of Columbia
Created on 2022-03-16 by the reprex package https://reprex.tidyverse.org (v2.0.1)