Recently Published
Industry Proximity Network | Sep 29, 2024
# Load required libraries
library(tidyverse)
library(igraph)
library(ggraph)
library(colorspace)
library(plotly)
# Read the data
data <- read_csv("Downloads/cgt_county_data_sep_29_2024.csv")
# Filter data and calculate RCA
data_filtered <- data %>%
filter(!grepl("county not reported", county_name, ignore.case = TRUE))
# Create a matrix of industry co-occurrences
industry_matrix <- data_filtered %>%
filter(M == 1) %>%
select(county_geoid, industry_code) %>%
table() %>%
as.matrix()
# Calculate the co-occurrence matrix
co_occurrence <- t(industry_matrix) %*% industry_matrix
# Calculate the proximity matrix
diag_values <- diag(co_occurrence)
proximity <- co_occurrence / pmax(rep(diag_values, each = nrow(co_occurrence)),
rep(diag_values, times = nrow(co_occurrence)))
diag(proximity) <- 0
# Create an edge list from the proximity matrix
edge_list <- which(proximity > quantile(proximity[proximity > 0], 0.95), arr.ind = TRUE)
edge_df <- data.frame(
from = rownames(proximity)[edge_list[,1]],
to = colnames(proximity)[edge_list[,2]],
weight = proximity[edge_list]
)
# Create the graph object
g <- graph_from_data_frame(edge_df, directed = FALSE)
# Add node attributes
V(g)$name <- data_filtered$industry_desc[match(V(g)$name, data_filtered$industry_code)]
V(g)$sector <- substr(data_filtered$industry_code[match(V(g)$name, data_filtered$industry_desc)], 1, 2)
# Create a color palette for sectors
n_sectors <- length(unique(V(g)$sector))
sector_colors <- setNames(
rainbow_hcl(n_sectors),
sort(unique(V(g)$sector))
)
# Create the ggplot object with tooltips
set.seed(42) # For reproducibility
p <- ggraph(g, layout = "fr") +
geom_edge_link(aes(edge_alpha = weight), show.legend = FALSE, edge_colour = "lightgray") +
geom_node_point(aes(color = sector, text = name), size = 3) + # 'text' stores the industry name for tooltips
geom_node_text(aes(label = name), repel = TRUE, size = 2, max.overlaps = 10) +
scale_edge_alpha(range = c(0.1, 0.5)) +
scale_color_manual(values = sector_colors) +
theme_graph() +
theme(legend.position = "none") +
labs(title = "Industry Space: Co-location Based Proximity")
# Convert ggplot to an interactive plotly object with hover info
p_interactive <- ggplotly(p, tooltip = "text") # 'tooltip = "text"' ensures industry names appear on hover
# Display the interactive plot
p_interactive
# Save the interactive plot as an HTML file
htmlwidgets::saveWidget(p_interactive, "industry_space_interactive.html")
Network Mapping
Including all NAICS six-digit industries.
CGT Network Mapping
September 19, 2024
Energy Transition Industry Network Mapping
By: Ben Feshbach | Date: September 19, 2024 | Data source: Clean Growth Tool county-level backend data