Modelos biométricos
library(metan)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## |=========================================================|
## | Multi-Environment Trial Analysis (metan) v1.15.0 |
## | Author: Tiago Olivoto |
## | Type 'citation('metan')' to know how to cite metan |
## | Type 'vignette('metan_start')' for a short tutorial |
## | Visit 'https://bit.ly/pkgmetan' for a complete tutorial |
## |=========================================================|
library(rio)
# gerar tabelas html
print_tbl <- function(table, digits = 3, ...){
knitr::kable(table, booktabs = TRUE, digits = digits, ...)
}
df_ge <- import("http://bit.ly/df_ge", setclass = "tbl")
Correlação linear
A função corr_coef()
pode ser usada para calcular o coeficiente de correlação de Pearson com valores de p. Um mapa de calor de correlação pode ser criado com a função plot()
.
# Todas as variáveis numéricas
ccoef <- corr_coef(df_ge)
plot(ccoef)
## Warning: Removed 5 rows containing missing values (geom_text).
Podemos usar uma função auxiliar de seleção para selecionar variáveis. Aqui, selecionaremos variáveis que começam com “C” ** OU ** termina com “D” usando union_var ()
.
ccoef2 <- corr_coef(df_ge, contains("A"))
plot(ccoef2, dígitos = 2)
A função corr_plot()
pode ser usada para visualizar (graficamente e numericamente) uma matriz de correlação. Os gráficos de dispersão em pares são produzidos e podem ser mostrados na diagonal superior ou inferior, o que pode ser visto como uma versão mais agradável e personalizável baseada em ggplot2 da função R de base de pairs()
.
a <- corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE)
corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE,
lower = NULL,
upper = "corr")
corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE,
shape.point = 19,
size.point = 2,
alpha.point = 0.5,
alpha.diag = 0,
pan.spacing = 0,
diag.type = "boxplot",
col.sign = "gray",
alpha.sign = 0.3,
axis.labels = TRUE)
corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE,
prob = 0.01,
shape.point = 21,
col.point = "black",
fill.point = "orange",
size.point = 2,
alpha.point = 0.6,
maxsize = 4,
minsize = 2,
smooth = TRUE,
size.smooth = 1,
col.smooth = "black",
col.sign = "cyan",
col.up.panel = "black",
col.lw.panel = "black",
col.dia.panel = "black",
pan.spacing = 0,
lab.position = "tl")
Também é possível usar uma variável categórica dos dados para mapear o gráfico de dispersão por cores.
corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE, col.by = ENV)
Matrizes de correlação/covariância
A função covcor_design()
pode ser usada para calcular matrizes de correlação genéticas, fenotípicas e residuais de correlação por meio da Análise de Variância (ANOVA) usando um delineamento de bloco completo ao acaso (DBC) ou delineamento inteiramente ao acaso (DIC).
As correlações fenotípicas (\(r_p\)), genotípicas (\(r_g\)) e residuais (\(r_r\)) são calculadas da seguinte forma:
$$
r ^ p_ {xy} = \frac {cov ^ p_ {xy}} {\sqrt {var ^ p_ {x} var ^ p_ {y}}} \
r ^ g_ {xy} = \frac {cov ^ g_ {xy}} {\sqrt {var ^ g_ {x} var ^ g_ {y}}} \
r ^ r_ {xy} = \frac {cov ^ r_ {xy}} {\sqrt {var ^ r_ {x} var ^ r_ {y}}}
$$
Usando os quadrados médios (MS) do método ANOVA, as variâncias (var) e as covariâncias (cov) são calculadas da seguinte forma:
$$
cov ^ p_ {xy} = [(MST_ {x + y} - MST_x - MST_y) / 2] / r \
var ^ p_x = MST_x / r \
var ^ p_y = MST_y / r \
cov ^ r_ {xy} = (MSR_ {x + y} - MSR_x - MSR_y) / 2 \
var ^ r_x = MSR_x \
var ^ r_y = MSR_y \
cov ^ g_ {xy} = [(cov ^ p_ {xy} \times r) - cov ^ r_ {xy}] / r \
var ^ g_x = (MST_x - MSE_x) / r \
var ^ g_y = (MST_x - MSE_y) / r \
$$
onde \(MST\) é o quadrado médio para tratamento, \(MSR\) é o quadrado médio para resíduos e \(r\) é o número de repetições. A função covcor_design()
retorna uma lista com as matrize. Matrizes específicas podem ser retornadas usando o argumento type
, conforme mostrado abaixo.
df_g <- import("http://bit.ly/df_g", setclass = "tbl")
correl <- covcor_design(df_g,
gen = GEN,
rep = BLOCO,
resp = c(MMG, MGE, COMPES, DIAMES, NGE))
Correlações
# genéticas
print_tbl(correl$geno_cor)
MMG | MGE | COMPES | DIAMES | NGE | |
---|---|---|---|---|---|
MMG | 1.000 | 0.927 | 0.671 | 0.906 | 0.368 |
MGE | 0.927 | 1.000 | 0.918 | 0.933 | 0.692 |
COMPES | 0.671 | 0.918 | 1.000 | 0.752 | 0.992 |
DIAMES | 0.906 | 0.933 | 0.752 | 1.000 | 0.561 |
NGE | 0.368 | 0.692 | 0.992 | 0.561 | 1.000 |
# fenotípicas
print_tbl(correl$phen_cor)
MMG | MGE | COMPES | DIAMES | NGE | |
---|---|---|---|---|---|
MMG | 1.000 | 0.893 | 0.628 | 0.852 | 0.241 |
MGE | 0.893 | 1.000 | 0.883 | 0.897 | 0.651 |
COMPES | 0.628 | 0.883 | 1.000 | 0.661 | 0.854 |
DIAMES | 0.852 | 0.897 | 0.661 | 1.000 | 0.496 |
NGE | 0.241 | 0.651 | 0.854 | 0.496 | 1.000 |
# residuais
print_tbl(correl$resi_cor)
MMG | MGE | COMPES | DIAMES | NGE | |
---|---|---|---|---|---|
MMG | 1.000 | 0.431 | 0.348 | 0.223 | -0.417 |
MGE | 0.431 | 1.000 | 0.704 | 0.466 | 0.624 |
COMPES | 0.348 | 0.704 | 1.000 | 0.050 | 0.408 |
DIAMES | 0.223 | 0.466 | 0.050 | 1.000 | 0.275 |
NGE | -0.417 | 0.624 | 0.408 | 0.275 | 1.000 |
Covariâncias
# genéticas
print_tbl(correl$geno_cov)
MMG | MGE | COMPES | DIAMES | NGE | |
---|---|---|---|---|---|
MMG | 2941.365 | 1721.242 | 39.930 | 120.289 | 819.553 |
MGE | 1721.242 | 1172.429 | 34.496 | 78.218 | 971.370 |
COMPES | 39.930 | 34.496 | 1.205 | 2.020 | 44.672 |
DIAMES | 120.289 | 78.218 | 2.020 | 5.992 | 56.286 |
NGE | 819.553 | 971.370 | 44.672 | 56.286 | 1682.787 |
# fenotípicas
print_tbl(correl$phen_cov)
MMG | MGE | COMPES | DIAMES | NGE | |
---|---|---|---|---|---|
MMG | 3165.893 | 1780.539 | 42.506 | 122.810 | 657.574 |
MGE | 1780.539 | 1256.627 | 37.692 | 81.442 | 1119.668 |
COMPES | 42.506 | 37.692 | 1.449 | 2.038 | 49.899 |
DIAMES | 122.810 | 81.442 | 2.038 | 6.560 | 61.649 |
NGE | 657.574 | 1119.668 | 49.899 | 61.649 | 2354.128 |
# residuais
print_tbl(correl$resi_cov)
MMG | MGE | COMPES | DIAMES | NGE | |
---|---|---|---|---|---|
MMG | 673.583 | 177.892 | 7.730 | 7.564 | -485.939 |
MGE | 177.892 | 252.595 | 9.588 | 9.672 | 444.893 |
COMPES | 7.730 | 9.588 | 0.734 | 0.056 | 15.681 |
DIAMES | 7.564 | 9.672 | 0.056 | 1.703 | 16.090 |
NGE | -485.939 | 444.893 | 15.681 | 16.090 | 2014.021 |
Distância de Mahalanobis
A matriz de covariância residual e as médias podem ser usados na função mahala()
para calcular a distância de Mahalanobis.
D2 <- mahala(.means = correl$means, covar = correl$resi_cov, inverted = FALSE)
print_tbl(D2)
H1 | H10 | H11 | H12 | H13 | H2 | H3 | H4 | H5 | H6 | H7 | H8 | H9 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
H1 | 0.000 | 22.376 | 14.474 | 23.648 | 12.483 | 3.844 | 2.334 | 7.508 | 17.464 | 14.837 | 26.980 | 59.690 | 51.562 |
H10 | 22.376 | 0.000 | 2.334 | 8.772 | 9.100 | 35.607 | 18.127 | 10.696 | 2.903 | 18.311 | 5.290 | 18.563 | 13.522 |
H11 | 14.474 | 2.334 | 0.000 | 5.474 | 10.975 | 25.937 | 9.655 | 6.592 | 4.653 | 19.124 | 6.482 | 23.686 | 19.195 |
H12 | 23.648 | 8.772 | 5.474 | 0.000 | 18.393 | 42.726 | 20.782 | 22.110 | 17.671 | 37.538 | 4.956 | 13.675 | 10.465 |
H13 | 12.483 | 9.100 | 10.975 | 18.393 | 0.000 | 23.219 | 16.462 | 11.848 | 7.893 | 9.072 | 10.616 | 30.521 | 24.450 |
H2 | 3.844 | 35.607 | 25.937 | 42.726 | 23.219 | 0.000 | 5.541 | 10.274 | 24.275 | 14.521 | 44.357 | 85.575 | 77.169 |
H3 | 2.334 | 18.127 | 9.655 | 20.782 | 16.462 | 5.541 | 0.000 | 3.420 | 13.807 | 16.940 | 26.131 | 58.552 | 50.893 |
H4 | 7.508 | 10.696 | 6.592 | 22.110 | 11.848 | 10.274 | 3.420 | 0.000 | 5.399 | 9.827 | 21.315 | 51.358 | 44.707 |
H5 | 17.464 | 2.903 | 4.653 | 17.671 | 7.893 | 24.275 | 13.807 | 5.399 | 0.000 | 7.806 | 11.084 | 31.217 | 25.982 |
H6 | 14.837 | 18.311 | 19.124 | 37.538 | 9.072 | 14.521 | 16.940 | 9.827 | 7.806 | 0.000 | 25.371 | 54.420 | 49.052 |
H7 | 26.980 | 5.290 | 6.482 | 4.956 | 10.616 | 44.357 | 26.131 | 21.315 | 11.084 | 25.371 | 0.000 | 6.956 | 6.100 |
H8 | 59.690 | 18.563 | 23.686 | 13.675 | 30.521 | 85.575 | 58.552 | 51.358 | 31.217 | 54.420 | 6.956 | 0.000 | 1.796 |
H9 | 51.562 | 13.522 | 19.195 | 10.465 | 24.450 | 77.169 | 50.893 | 44.707 | 25.982 | 49.052 | 6.100 | 1.796 | 0.000 |
D2 %>%
as.dist() %>%
hclust() %>%
plot()
Diagnóstico de colinearidade
Os códigos a seguir calculam um diagnóstico de colinearidade completo de uma matriz de correlação de características do preditor. Vários indicadores, como fator de inflação de variância, número de condição e determinante da matriz são considerados1
colin <- colindiag(df_ge)
print(colin)
## The multicollinearity in the matrix should be investigated.
## CN = 607.708
## Largest VIF = 55.4696961923099
## Matrix determinant: 1.7e-06
## Largest correlation: ALT_PLANT x ALT_ESP = 0.932
## Smallest correlation: COMPES x NFIL = -0.014
## Number of VIFs > 10: 4
## Number of correlations with r >= |0.8|: 3
## Variables with largest weight in the last eigenvalues:
## MGE > NGE > MMG > ALT_ESP > ALT_PLANT > DIAMES > DIAM_SAB > NFIL > COMPES > COMP_SAB
print_tbl(colin$evalevet)
Eigenvalues | ALT_PLANT | ALT_ESP | COMPES | DIAMES | COMP_SAB | DIAM_SAB | MGE | NFIL | MMG | NGE |
---|---|---|---|---|---|---|---|---|---|---|
5.275 | -0.358 | -0.346 | -0.297 | -0.382 | -0.263 | -0.283 | -0.416 | -0.178 | -0.306 | -0.265 |
1.669 | 0.106 | 0.066 | -0.311 | 0.137 | -0.119 | -0.344 | 0.012 | 0.634 | -0.398 | 0.420 |
1.448 | -0.163 | -0.219 | 0.464 | -0.201 | -0.389 | 0.452 | 0.085 | -0.010 | -0.335 | 0.440 |
0.914 | 0.498 | 0.487 | -0.049 | -0.260 | -0.568 | -0.163 | 0.035 | -0.306 | 0.000 | 0.011 |
0.294 | -0.155 | -0.419 | -0.208 | 0.301 | -0.474 | -0.194 | 0.421 | -0.105 | 0.447 | 0.128 |
0.179 | -0.175 | 0.111 | -0.190 | 0.012 | 0.370 | -0.215 | 0.221 | -0.615 | -0.249 | 0.498 |
0.092 | 0.194 | -0.054 | -0.504 | 0.465 | -0.120 | 0.574 | -0.239 | -0.197 | -0.220 | -0.033 |
0.073 | -0.216 | 0.201 | 0.457 | 0.644 | -0.197 | -0.317 | -0.250 | -0.151 | -0.216 | -0.135 |
0.047 | 0.668 | -0.598 | 0.237 | 0.064 | 0.164 | -0.237 | -0.120 | -0.147 | -0.124 | 0.042 |
0.009 | 0.033 | -0.038 | -0.009 | 0.024 | 0.006 | 0.019 | 0.677 | 0.011 | -0.514 | -0.523 |
Diagnóstico para cada nível do fator ENV
colin2 <- colindiag(df_ge, by = ENV)
print(colin2)
## # A tibble: 4 x 2
## ENV data
## <chr> <list>
## 1 A1 <colindig>
## 2 A2 <colindig>
## 3 A3 <colindig>
## 4 A4 <colindig>
Análise de trilha
Neste exemplo, a variável massa de grãos por espiga (MGE) será utilziada como resposta e todas as outras como explicativa
pcoeff <- path_coeff(df_ge, resp = MGE)
## Weak multicollinearity.
## Condition Number = 97.581
## You will probably have path coefficients close to being unbiased.
print(pcoeff)
## ----------------------------------------------------------------------------------------------
## Correlation matrix between the predictor traits
## ----------------------------------------------------------------------------------------------
## ALT_PLANT ALT_ESP COMPES DIAMES COMP_SAB DIAM_SAB NFIL MMG
## ALT_PLANT 1.0000 0.9318 0.38020 0.6613 0.32516 0.31539 0.32861 0.56854
## ALT_ESP 0.9318 1.0000 0.36265 0.6303 0.39719 0.28051 0.26481 0.56236
## COMPES 0.3802 0.3627 1.00000 0.3851 0.25541 0.91187 -0.01387 0.44210
## DIAMES 0.6613 0.6303 0.38515 1.0000 0.69746 0.38971 0.55253 0.64199
## COMP_SAB 0.3252 0.3972 0.25541 0.6975 1.00000 0.30036 0.26194 0.61870
## DIAM_SAB 0.3154 0.2805 0.91187 0.3897 0.30036 1.00000 -0.03585 0.44332
## NFIL 0.3286 0.2648 -0.01387 0.5525 0.26194 -0.03585 1.00000 -0.10876
## MMG 0.5685 0.5624 0.44210 0.6420 0.61870 0.44332 -0.10876 1.00000
## NGE 0.4584 0.3881 0.46570 0.5051 0.04894 0.41562 0.62609 -0.06516
## NGE
## ALT_PLANT 0.45838
## ALT_ESP 0.38812
## COMPES 0.46570
## DIAMES 0.50508
## COMP_SAB 0.04894
## DIAM_SAB 0.41562
## NFIL 0.62609
## MMG -0.06516
## NGE 1.00000
## ----------------------------------------------------------------------------------------------
## Vector of correlations between dependent and each predictor
## ----------------------------------------------------------------------------------------------
## ALT_PLANT ALT_ESP COMPES DIAMES COMP_SAB DIAM_SAB NFIL
## MGE 0.7534439 0.7029469 0.6685601 0.8241426 0.470931 0.6259806 0.3621447
## MMG NGE
## MGE 0.6730371 0.6810756
## ----------------------------------------------------------------------------------------------
## Multicollinearity diagnosis and goodness-of-fit
## ----------------------------------------------------------------------------------------------
## Condition number: 97.5813
## Determinant: 9.241e-05
## R-square: 0.982
## Residual: 0.1343
## Response: MGE
## Predictors: ALT_PLANT ALT_ESP COMPES DIAMES COMP_SAB DIAM_SAB NFIL MMG NGE
## ----------------------------------------------------------------------------------------------
## Variance inflation factors
## ----------------------------------------------------------------------------------------------
## # A tibble: 9 x 2
## VAR VIF
## <chr> <dbl>
## 1 ALT_PLANT 11.30
## 2 ALT_ESP 9.302
## 3 COMPES 7.331
## 4 DIAMES 8.636
## 5 COMP_SAB 3.270
## 6 DIAM_SAB 6.814
## 7 NFIL 3.676
## 8 MMG 6.965
## 9 NGE 5.396
## ----------------------------------------------------------------------------------------------
## Eigenvalues and eigenvectors
## ----------------------------------------------------------------------------------------------
## # A tibble: 9 x 10
## Eigenvalues ALT_PLANT ALT_ESP COMPES DIAMES COMP_SAB DIAM_SAB NFIL
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4.382 -0.3957 -0.3860 -0.3207 -0.4210 -0.3025 -0.3069 -0.1968
## 2 1.669 -0.1105 -0.07139 0.3116 -0.1422 0.1131 0.3450 -0.6359
## 3 1.436 -0.1416 -0.1977 0.4834 -0.1783 -0.3687 0.4719 0.01084
## 4 0.9130 0.5057 0.4952 -0.03763 -0.2558 -0.5642 -0.1520 -0.3006
## 5 0.2429 0.03453 0.3984 0.1010 -0.4234 0.5823 0.05879 -0.06332
## 6 0.1638 -0.1483 0.01160 -0.2195 0.2790 0.1830 -0.1836 -0.6531
## 7 0.08619 -0.2376 0.08451 0.6328 -0.1318 0.03175 -0.6776 0.06203
## 8 0.06259 -0.2610 0.3031 0.2035 0.6326 -0.2275 -0.01768 -0.09131
## 9 0.04490 0.6385 -0.5511 0.2544 0.1748 0.1271 -0.2106 -0.1540
## # ... with 2 more variables: MMG <dbl>, NGE <dbl>
## ----------------------------------------------------------------------------------------------
## Variables with the largest weight in the eigenvalue of smallest magnitude
## ----------------------------------------------------------------------------------------------
## ALT_PLANT > ALT_ESP > MMG > COMPES > DIAM_SAB > DIAMES > NFIL > NGE > COMP_SAB
## ----------------------------------------------------------------------------------------------
## Direct (diagonal) and indirect (off-diagonal) effects
## ----------------------------------------------------------------------------------------------
## ALT_PLANT ALT_ESP COMPES DIAMES COMP_SAB
## ALT_PLANT -0.013315310 0.04002135 0.0138641226 0.015196362 -0.0047188442
## ALT_ESP -0.012407581 0.04294928 0.0132244307 0.014482662 -0.0057641364
## COMPES -0.005062428 0.01557572 0.0364657233 0.008850255 -0.0037065040
## DIAMES -0.008805612 0.02706905 0.0140445955 0.022979012 -0.0101216939
## COMP_SAB -0.004329670 0.01705918 0.0093135921 0.016027009 -0.0145121611
## DIAM_SAB -0.004199528 0.01204778 0.0332518264 0.008955216 -0.0043589255
## NFIL -0.004375497 0.01137319 -0.0005059176 0.012696697 -0.0038012563
## MMG -0.007570266 0.02415282 0.0161215357 0.014752226 -0.0089786759
## NGE -0.006103472 0.01666954 0.0169820638 0.011606259 -0.0007102577
## DIAM_SAB NFIL MMG NGE linear
## ALT_PLANT -0.0067498812 -0.0082032241 0.3909975 0.32635179 0.7534439
## ALT_ESP -0.0060034106 -0.0066105076 0.3867462 0.27632993 0.7029469
## COMPES -0.0195154051 0.0003463406 0.3040435 0.33156290 0.6685601
## DIAMES -0.0083404905 -0.0137932898 0.4415098 0.35960124 0.8241426
## COMP_SAB -0.0064282721 -0.0065388826 0.4254949 0.03484530 0.4709310
## DIAM_SAB -0.0214016323 0.0008949437 0.3048852 0.29590577 0.6259806
## NFIL 0.0007672451 -0.0249636726 -0.0747987 0.44575261 0.3621447
## MMG -0.0094878766 0.0027151156 0.6877240 -0.04639172 0.6730371
## NGE -0.0088948788 -0.0156293919 -0.0448120 0.71196770 0.6810756
## ----------------------------------------------------------------------------------------------
Para declarar características preditoras, use o argumento pred
pcoeff2 <-
path_coeff(df_ge,
resp = MGE,
pred = c(MMG, COMPES, DIAMES, NGE))
## Weak multicollinearity.
## Condition Number = 24.907
## You will probably have path coefficients close to being unbiased.
print(pcoeff2)
## ----------------------------------------------------------------------------------------------
## Correlation matrix between the predictor traits
## ----------------------------------------------------------------------------------------------
## MMG COMPES DIAMES NGE
## MMG 1.00000 0.4421 0.6420 -0.06516
## COMPES 0.44210 1.0000 0.3851 0.46570
## DIAMES 0.64199 0.3851 1.0000 0.50508
## NGE -0.06516 0.4657 0.5051 1.00000
## ----------------------------------------------------------------------------------------------
## Vector of correlations between dependent and each predictor
## ----------------------------------------------------------------------------------------------
## MMG COMPES DIAMES NGE
## MGE 0.6730371 0.6685601 0.8241426 0.6810756
## ----------------------------------------------------------------------------------------------
## Multicollinearity diagnosis and goodness-of-fit
## ----------------------------------------------------------------------------------------------
## Condition number: 24.9068
## Determinant: 0.1311275
## R-square: 0.981
## Residual: 0.1379
## Response: MGE
## Predictors: MMG COMPES DIAMES NGE
## ----------------------------------------------------------------------------------------------
## Variance inflation factors
## ----------------------------------------------------------------------------------------------
## # A tibble: 4 x 2
## VAR VIF
## <chr> <dbl>
## 1 MMG 4.277
## 2 COMPES 2.183
## 3 DIAMES 4.245
## 4 NGE 3.529
## ----------------------------------------------------------------------------------------------
## Eigenvalues and eigenvectors
## ----------------------------------------------------------------------------------------------
## # A tibble: 4 x 5
## Eigenvalues MMG COMPES DIAMES NGE
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2.217 -0.4726 -0.5146 -0.5837 -0.4137
## 2 1.077 -0.6640 0.1302 -0.09487 0.7302
## 3 0.6170 -0.02383 -0.7914 0.5790 0.1947
## 4 0.08902 0.5790 -0.3032 -0.5613 0.5076
## ----------------------------------------------------------------------------------------------
## Variables with the largest weight in the eigenvalue of smallest magnitude
## ----------------------------------------------------------------------------------------------
## MMG > DIAMES > NGE > COMPES
## ----------------------------------------------------------------------------------------------
## Direct (diagonal) and indirect (off-diagonal) effects
## ----------------------------------------------------------------------------------------------
## MMG COMPES DIAMES NGE linear
## MMG 0.7207458 0.007125379 -0.007499726 -0.04733433 0.6730371
## COMPES 0.3186425 0.016117081 -0.004499286 0.33829981 0.6685601
## DIAMES 0.4627094 0.006207415 -0.011682054 0.36690786 0.8241426
## NGE -0.0469637 0.007505714 -0.005900382 0.72643393 0.6810756
## ----------------------------------------------------------------------------------------------
Para selecionando um conjunto de preditores com multicolinearidade mínima use o argumento brutstep
.
pcoeff3 <-
path_coeff(df_ge,
resp = MGE,
brutstep = TRUE)
## --------------------------------------------------------------------------
## The algorithm has selected a set of 8 predictors with largest VIF = 8.634.
## Selected predictors: ALT_ESP COMP_SAB NFIL NGE MMG DIAM_SAB COMPES DIAMES
## A forward stepwise-based selection procedure will fit 6 models.
## --------------------------------------------------------------------------
## Adjusting the model 1 with 7 predictors (16.67% concluded)
## Adjusting the model 2 with 6 predictors (33.33% concluded)
## Adjusting the model 3 with 5 predictors (50% concluded)
## Adjusting the model 4 with 4 predictors (66.67% concluded)
## Adjusting the model 5 with 3 predictors (83.33% concluded)
## Adjusting the model 6 with 2 predictors (100% concluded)
## Done!
## --------------------------------------------------------------------------
## Summary of the adjusted models
## --------------------------------------------------------------------------
## Model AIC Numpred CN Determinant R2 Residual maxVIF
## MODEL_1 923 7 51.94 0.00291 0.982 0.135 7.21
## MODEL_2 921 6 42.05 0.01919 0.982 0.135 6.61
## MODEL_3 921 5 34.25 0.06367 0.982 0.136 5.15
## MODEL_4 924 4 24.91 0.13113 0.981 0.138 4.28
## MODEL_5 1234 3 4.00 0.56087 0.860 0.375 1.52
## MODEL_6 1267 2 2.25 0.85166 0.824 0.420 1.17
## --------------------------------------------------------------------------
print(pcoeff3$Models$Model_4)
## ----------------------------------------------------------------------------------------------
## Correlation matrix between the predictor traits
## ----------------------------------------------------------------------------------------------
## DIAMES COMPES NGE MMG
## DIAMES 1.0000 0.3851 0.50508 0.64199
## COMPES 0.3851 1.0000 0.46570 0.44210
## NGE 0.5051 0.4657 1.00000 -0.06516
## MMG 0.6420 0.4421 -0.06516 1.00000
## ----------------------------------------------------------------------------------------------
## Vector of correlations between dependent and each predictor
## ----------------------------------------------------------------------------------------------
## DIAMES COMPES NGE MMG
## MGE 0.8241426 0.6685601 0.6810756 0.6730371
## ----------------------------------------------------------------------------------------------
## Multicollinearity diagnosis and goodness-of-fit
## ----------------------------------------------------------------------------------------------
## Condition number: 24.9068
## Determinant: 0.1311275
## R-square: 0.981
## Residual: 0.1379
## Response: MGE
## Predictors: DIAMES COMPES NGE MMG
## ----------------------------------------------------------------------------------------------
## Variance inflation factors
## ----------------------------------------------------------------------------------------------
## VIF
## DIAMES 4.244679
## COMPES 2.182940
## NGE 3.528542
## MMG 4.277252
## ----------------------------------------------------------------------------------------------
## Eigenvalues and eigenvectors
## ----------------------------------------------------------------------------------------------
## Eigenvalues DIAMES COMPES NGE MMG
## 1 2.21731979 -0.5836962 0.09487169 0.57897124 0.5613292
## 2 1.07668055 -0.5145820 -0.13022977 -0.79140140 0.3031986
## 3 0.61697486 -0.4137466 -0.73021132 0.19470094 -0.5076383
## 4 0.08902479 -0.4725652 0.66395105 -0.02382582 -0.5790367
## ----------------------------------------------------------------------------------------------
## Variables with the largest weight in the eigenvalue of smallest magnitude
## ----------------------------------------------------------------------------------------------
## COMPES > MMG > DIAMES > NGE
## ----------------------------------------------------------------------------------------------
## Direct (diagonal) and indirect (off-diagonal) effects
## ----------------------------------------------------------------------------------------------
## DIAMES COMPES NGE MMG linear
## DIAMES -0.011682054 0.006207415 0.36690786 0.4627094 0.8241426
## COMPES -0.004499286 0.016117081 0.33829981 0.3186425 0.6685601
## NGE -0.005900382 0.007505714 0.72643393 -0.0469637 0.6810756
## MMG -0.007499726 0.007125379 -0.04733433 0.7207458 0.6730371
## ----------------------------------------------------------------------------------------------
Também é possível calcular uma análise para cada nível de um determinado fator
pcoeff4 <-
path_coeff(df_ge,
resp = MGE,
pred = c(MMG, COMPES, DIAMES, NGE),
by = ENV)
## Weak multicollinearity.
## Condition Number = 11.26
## You will probably have path coefficients close to being unbiased.
## Weak multicollinearity.
## Condition Number = 48.08
## You will probably have path coefficients close to being unbiased.
## Weak multicollinearity.
## Condition Number = 20.594
## You will probably have path coefficients close to being unbiased.
## Weak multicollinearity.
## Condition Number = 29.096
## You will probably have path coefficients close to being unbiased.
# diagrama de trilha
library(diagram)
## Carregando pacotes exigidos: shape
pcoeff5 <-
path_coeff(df_ge,
resp = MGE,
pred = c(MMG, COMPES, DIAMES, NGE))
## Weak multicollinearity.
## Condition Number = 24.907
## You will probably have path coefficients close to being unbiased.
coeffs <-
pcoeff5$Coefficients %>%
remove_cols(linear) %>%
round_cols(digits = 3)
coeffs
## MMG COMPES DIAMES NGE
## MMG 0.721 0.007 -0.007 -0.047
## COMPES 0.319 0.016 -0.004 0.338
## DIAMES 0.463 0.006 -0.012 0.367
## NGE -0.047 0.008 -0.006 0.726
plotmat(coeffs,
curve = 0,
box.size = 0.08,
box.prop = 0.5,
box.col = "gray90",
arr.type = "curved",
arr.pos = 0.35,
arr.lwd = 1,
arr.length = 0.4,
arr.width = 0.2)
Correlações canônicas
Em primeiro lugar, renomearemos as características relacionadas à planta ALT_PLANT e ALT_ESP
com o sufixo _PLANTA
para mostrar a usabilidade do select helper contains()
.
data_cc <-
df_ge %>%
rename(ESP_COMPES = COMPES,
ESP_DIAMES = DIAMES,
ESP_COMPSAB = COMP_SAB,
GRAO_MGE = MGE,
GRAO_MMG = MMG)
# Digitar os nomes das variáveis
cc1 <- can_corr(data_cc,
FG = c(GRAO_MGE, GRAO_MMG),
SG = c(ESP_COMPES, ESP_DIAMES, ESP_COMPSAB))
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of first group (FG)
## ---------------------------------------------------------------------------
## GRAO_MGE GRAO_MMG
## GRAO_MGE 1.0000000 0.6730371
## GRAO_MMG 0.6730371 1.0000000
## ---------------------------------------------------------------------------
## Collinearity within first group
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 5.117
## Matrix determinant: 0.547021
## Largest correlation: GRAO_MGE x GRAO_MMG = 0.673
## Smallest correlation: GRAO_MGE x GRAO_MMG = 0.673
## Number of VIFs > 10: 0
## Number of correlations with r >= |0.8|: 0
## Variables with largest weight in the last eigenvalues:
## GRAO_MGE > GRAO_MMG
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of second group (SG)
## ---------------------------------------------------------------------------
## ESP_COMPES ESP_DIAMES ESP_COMPSAB
## ESP_COMPES 1.0000000 0.3851451 0.2554068
## ESP_DIAMES 0.3851451 1.0000000 0.6974629
## ESP_COMPSAB 0.2554068 0.6974629 1.0000000
## ---------------------------------------------------------------------------
## Collinearity within second group
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 6.679
## Matrix determinant: 0.4371931
## Largest correlation: ESP_DIAMES x ESP_COMPSAB = 0.697
## Smallest correlation: ESP_COMPES x ESP_COMPSAB = 0.255
## Number of VIFs > 10: 0
## Number of correlations with r >= |0.8|: 0
## Variables with largest weight in the last eigenvalues:
## ESP_DIAMES > ESP_COMPSAB > ESP_COMPES
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between FG and SG
## ---------------------------------------------------------------------------
## ESP_COMPES ESP_DIAMES ESP_COMPSAB
## GRAO_MGE 0.6685601 0.8241426 0.4709310
## GRAO_MMG 0.4421011 0.6419870 0.6187001
## ---------------------------------------------------------------------------
## Correlation of the canonical pairs and hypothesis testing
## ---------------------------------------------------------------------------
## Var Percent Sum Corr Lambda Chisq DF p_val
## U1V1 0.8430909 79.09135 79.09135 0.9181998 0.12194 319.84591 6 0
## U2V2 0.2228801 20.90865 100.00000 0.4721018 0.77712 38.32842 2 0
## ---------------------------------------------------------------------------
## Canonical coefficients of the first group
## ---------------------------------------------------------------------------
## U1 U2
## GRAO_MGE -0.98237574 0.9289894
## GRAO_MMG -0.02591325 -1.3518180
## ---------------------------------------------------------------------------
## Canonical coefficients of the second group
## ---------------------------------------------------------------------------
## V1 V2
## ESP_COMPES -0.4445443 0.1353212
## ESP_DIAMES -0.8650570 0.6712820
## ESP_COMPSAB 0.1955781 -1.3476588
## ---------------------------------------------------------------------------
## Canonical loads of the first group
## ---------------------------------------------------------------------------
## U1 U2
## GRAO_MGE -0.9998163 0.01916566
## GRAO_MMG -0.6870886 -0.72657362
## ---------------------------------------------------------------------------
## Canonical loads of the second group
## ---------------------------------------------------------------------------
## V1 V2
## ESP_COMPES -0.7277648 0.04966103
## ESP_DIAMES -0.8998626 -0.21654173
## ESP_COMPSAB -0.5213067 -0.84490257
# usando select helpers
cc2 <- can_corr(data_cc,
FG = contains("GRAO_"),
SG = contains("ESP_"))
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of first group (FG)
## ---------------------------------------------------------------------------
## GRAO_MGE GRAO_MMG
## GRAO_MGE 1.0000000 0.6730371
## GRAO_MMG 0.6730371 1.0000000
## ---------------------------------------------------------------------------
## Collinearity within first group
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 5.117
## Matrix determinant: 0.547021
## Largest correlation: GRAO_MGE x GRAO_MMG = 0.673
## Smallest correlation: GRAO_MGE x GRAO_MMG = 0.673
## Number of VIFs > 10: 0
## Number of correlations with r >= |0.8|: 0
## Variables with largest weight in the last eigenvalues:
## GRAO_MGE > GRAO_MMG
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of second group (SG)
## ---------------------------------------------------------------------------
## ESP_COMPES ESP_DIAMES ESP_COMPSAB
## ESP_COMPES 1.0000000 0.3851451 0.2554068
## ESP_DIAMES 0.3851451 1.0000000 0.6974629
## ESP_COMPSAB 0.2554068 0.6974629 1.0000000
## ---------------------------------------------------------------------------
## Collinearity within second group
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 6.679
## Matrix determinant: 0.4371931
## Largest correlation: ESP_DIAMES x ESP_COMPSAB = 0.697
## Smallest correlation: ESP_COMPES x ESP_COMPSAB = 0.255
## Number of VIFs > 10: 0
## Number of correlations with r >= |0.8|: 0
## Variables with largest weight in the last eigenvalues:
## ESP_DIAMES > ESP_COMPSAB > ESP_COMPES
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between FG and SG
## ---------------------------------------------------------------------------
## ESP_COMPES ESP_DIAMES ESP_COMPSAB
## GRAO_MGE 0.6685601 0.8241426 0.4709310
## GRAO_MMG 0.4421011 0.6419870 0.6187001
## ---------------------------------------------------------------------------
## Correlation of the canonical pairs and hypothesis testing
## ---------------------------------------------------------------------------
## Var Percent Sum Corr Lambda Chisq DF p_val
## U1V1 0.8430909 79.09135 79.09135 0.9181998 0.12194 319.84591 6 0
## U2V2 0.2228801 20.90865 100.00000 0.4721018 0.77712 38.32842 2 0
## ---------------------------------------------------------------------------
## Canonical coefficients of the first group
## ---------------------------------------------------------------------------
## U1 U2
## GRAO_MGE -0.98237574 0.9289894
## GRAO_MMG -0.02591325 -1.3518180
## ---------------------------------------------------------------------------
## Canonical coefficients of the second group
## ---------------------------------------------------------------------------
## V1 V2
## ESP_COMPES -0.4445443 0.1353212
## ESP_DIAMES -0.8650570 0.6712820
## ESP_COMPSAB 0.1955781 -1.3476588
## ---------------------------------------------------------------------------
## Canonical loads of the first group
## ---------------------------------------------------------------------------
## U1 U2
## GRAO_MGE -0.9998163 0.01916566
## GRAO_MMG -0.6870886 -0.72657362
## ---------------------------------------------------------------------------
## Canonical loads of the second group
## ---------------------------------------------------------------------------
## V1 V2
## ESP_COMPES -0.7277648 0.04966103
## ESP_DIAMES -0.8998626 -0.21654173
## ESP_COMPSAB -0.5213067 -0.84490257
Gráfico de barras
plot_bars(df_g,
x = GEN,
y = MMG,
lab.bar = 1:13)
plot_factbars(df_ge, ENV, GEN, resp = MMG)
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Spectral is 11
## Returning the palette you asked for with that many colors
-
Olivoto, T., Souza, V. Q., Nardino, M., Carvalho, I. R., Ferrari, M., Pelegrin, A. J., Szareski, V. J., & Schmidt, D. (2017). Multicollinearity in path analysis: A simple method to reduce its effects. Agronomy Journal, 109(1), 131–142. https://doi.org/10.2134/agronj2016.04.0196 ↩︎