This document analyzes correlations between characteristics of different whiskies. It creates a correlation matrix between 12 whisky characteristics for 67 distilleries. It then transforms this into a graph with distilleries as nodes and correlations above 0.8 as edges. Various graph metrics and visualizations are calculated and displayed, including communities detected within the graph.
17. #
colnames(cor.mat) <- whiskies$Distillery
rownames(cor.mat) <- whiskies$Distillery
#
cor.mat[upper.tri(cor.mat, diag = TRUE)] <- NA
cor.mat[1:5, 1:5]
Aberfeldy Aberlour AnCnoc Ardbeg Ardmore
Aberfeldy NA NA NA NA NA
Aberlour 0.7086322 NA NA NA NA
AnCnoc 0.6973541 0.5030737 NA NA NA
Ardbeg -0.1473114 -0.2285909 -0.1404355 NA NA
Ardmore 0.7319024 0.5118338 0.5570195 0.2316174 NA
25. #
g <- g %>%
mutate(centrality = centrality_betweenness())
g
# A tbl_graph: 67 nodes and 135 edges
#
# An undirected simple graph with 1 component
#
# Node Data: 67 x 2 (active)
name centrality
<chr> <dbl>
1 Auchroisk 174.
2 Benrinnes 122.
3 Benromach 411.
26. #
g <- g %E>%
mutate(centrality = centrality_edge_betweenness())
g
# A tbl_graph: 67 nodes and 135 edges
#
# An undirected simple graph with 1 component
#
# Edge Data: 135 x 4 (active)
from to cor centrality
<int> <int> <dbl> <dbl>
1 1 54 0.824 79.3
2 2 54 0.842 42.9
3 3 54 0.855 54.2
27. #
g <- g %E>%
mutate(centrality = centrality_edge_betweenness())
g
# A tbl_graph: 67 nodes and 135 edges
#
# An undirected simple graph with 1 component
#
# Edge Data: 135 x 4 (active)
from to cor centrality
<int> <int> <dbl> <dbl>
1 1 54 0.824 79.3
2 2 54 0.842 42.9
3 3 54 0.855 54.2
28. #
g <- g %>%
mutate(community = as.factor(group_fast_greedy(weights = cor)))
g
# A tbl_graph: 67 nodes and 135 edges
#
# An undirected simple graph with 1 component
#
# Node Data: 67 x 2 (active)
name community
<chr> <fct>
1 Auchroisk 2
2 Benrinnes 3
3 Benromach 2