Modelos biométricos

library(metan)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## |=========================================================|
## | Multi-Environment Trial Analysis (metan) v1.15.0        |
## | Author: Tiago Olivoto                                   |
## | Type 'citation('metan')' to know how to cite metan      |
## | Type 'vignette('metan_start')' for a short tutorial     |
## | Visit 'https://bit.ly/pkgmetan' for a complete tutorial |
## |=========================================================|
library(rio)
# gerar tabelas html
print_tbl <- function(table, digits = 3, ...){
  knitr::kable(table, booktabs = TRUE, digits = digits, ...)
}
df_ge <- import("http://bit.ly/df_ge", setclass = "tbl")

Correlação linear

A função corr_coef() pode ser usada para calcular o coeficiente de correlação de Pearson com valores de p. Um mapa de calor de correlação pode ser criado com a função plot().

# Todas as variáveis numéricas
ccoef <- corr_coef(df_ge)
plot(ccoef)
## Warning: Removed 5 rows containing missing values (geom_text).

Podemos usar uma função auxiliar de seleção para selecionar variáveis. Aqui, selecionaremos variáveis que começam com “C” ** OU ** termina com “D” usando union_var ().

ccoef2 <- corr_coef(df_ge, contains("A"))
plot(ccoef2, dígitos = 2)

A função corr_plot() pode ser usada para visualizar (graficamente e numericamente) uma matriz de correlação. Os gráficos de dispersão em pares são produzidos e podem ser mostrados na diagonal superior ou inferior, o que pode ser visto como uma versão mais agradável e personalizável baseada em ggplot2 da função R de base de pairs().

a <- corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE)


corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE,
          lower = NULL,
          upper = "corr")

corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE,
          shape.point = 19,
          size.point = 2,
          alpha.point = 0.5,
          alpha.diag = 0,
          pan.spacing = 0,
          diag.type = "boxplot",
          col.sign = "gray",
          alpha.sign = 0.3,
          axis.labels = TRUE)

corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE,
          prob = 0.01,
          shape.point = 21,
          col.point = "black",
          fill.point = "orange",
          size.point = 2,
          alpha.point = 0.6,
          maxsize = 4,
          minsize = 2,
          smooth = TRUE,
          size.smooth = 1,
          col.smooth = "black",
          col.sign = "cyan",
          col.up.panel = "black",
          col.lw.panel = "black",
          col.dia.panel = "black",
          pan.spacing = 0,
          lab.position = "tl")

Também é possível usar uma variável categórica dos dados para mapear o gráfico de dispersão por cores.

corr_plot(df_ge, MMG, MGE, COMPES, DIAMES, NGE, col.by = ENV)

Matrizes de correlação/covariância

A função covcor_design() pode ser usada para calcular matrizes de correlação genéticas, fenotípicas e residuais de correlação por meio da Análise de Variância (ANOVA) usando um delineamento de bloco completo ao acaso (DBC) ou delineamento inteiramente ao acaso (DIC).

As correlações fenotípicas ($r_p$), genotípicas ($r_g$) e residuais ($r_r$) são calculadas da seguinte forma:

$$ r ^ p_ {xy} = \frac {cov ^ p_ {xy}} {\sqrt {var ^ p_ {x} var ^ p_ {y}}} \
r ^ g_ {xy} = \frac {cov ^ g_ {xy}} {\sqrt {var ^ g_ {x} var ^ g_ {y}}} \
r ^ r_ {xy} = \frac {cov ^ r_ {xy}} {\sqrt {var ^ r_ {x} var ^ r_ {y}}} $$

Usando os quadrados médios (MS) do método ANOVA, as variâncias (var) e as covariâncias (cov) são calculadas da seguinte forma:

$$ cov ^ p_ {xy} = [(MST_ {x + y} - MST_x - MST_y) / 2] / r \
var ^ p_x = MST_x / r \
var ^ p_y = MST_y / r \
cov ^ r_ {xy} = (MSR_ {x + y} - MSR_x - MSR_y) / 2 \
var ^ r_x = MSR_x \
var ^ r_y = MSR_y \
cov ^ g_ {xy} = [(cov ^ p_ {xy} \times r) - cov ^ r_ {xy}] / r \
var ^ g_x = (MST_x - MSE_x) / r \
var ^ g_y = (MST_x - MSE_y) / r \
$$

onde $MST$ é o quadrado médio para tratamento, $MSR$ é o quadrado médio para resíduos e $r$ é o número de repetições. A função covcor_design() retorna uma lista com as matrize. Matrizes específicas podem ser retornadas usando o argumento type, conforme mostrado abaixo.

df_g <- import("http://bit.ly/df_g", setclass = "tbl")
correl <- covcor_design(df_g,
                        gen = GEN,
                        rep = BLOCO,
                        resp = c(MMG, MGE, COMPES, DIAMES, NGE))

Correlações

# genéticas
print_tbl(correl$geno_cor)

	MMG	MGE	COMPES	DIAMES	NGE
MMG	1.000	0.927	0.671	0.906	0.368
MGE	0.927	1.000	0.918	0.933	0.692
COMPES	0.671	0.918	1.000	0.752	0.992
DIAMES	0.906	0.933	0.752	1.000	0.561
NGE	0.368	0.692	0.992	0.561	1.000


# fenotípicas
print_tbl(correl$phen_cor)

	MMG	MGE	COMPES	DIAMES	NGE
MMG	1.000	0.893	0.628	0.852	0.241
MGE	0.893	1.000	0.883	0.897	0.651
COMPES	0.628	0.883	1.000	0.661	0.854
DIAMES	0.852	0.897	0.661	1.000	0.496
NGE	0.241	0.651	0.854	0.496	1.000


# residuais
print_tbl(correl$resi_cor)

	MMG	MGE	COMPES	DIAMES	NGE
MMG	1.000	0.431	0.348	0.223	-0.417
MGE	0.431	1.000	0.704	0.466	0.624
COMPES	0.348	0.704	1.000	0.050	0.408
DIAMES	0.223	0.466	0.050	1.000	0.275
NGE	-0.417	0.624	0.408	0.275	1.000

Covariâncias

# genéticas
print_tbl(correl$geno_cov)

	MMG	MGE	COMPES	DIAMES	NGE
MMG	2941.365	1721.242	39.930	120.289	819.553
MGE	1721.242	1172.429	34.496	78.218	971.370
COMPES	39.930	34.496	1.205	2.020	44.672
DIAMES	120.289	78.218	2.020	5.992	56.286
NGE	819.553	971.370	44.672	56.286	1682.787


# fenotípicas
print_tbl(correl$phen_cov)

	MMG	MGE	COMPES	DIAMES	NGE
MMG	3165.893	1780.539	42.506	122.810	657.574
MGE	1780.539	1256.627	37.692	81.442	1119.668
COMPES	42.506	37.692	1.449	2.038	49.899
DIAMES	122.810	81.442	2.038	6.560	61.649
NGE	657.574	1119.668	49.899	61.649	2354.128


# residuais
print_tbl(correl$resi_cov)

	MMG	MGE	COMPES	DIAMES	NGE
MMG	673.583	177.892	7.730	7.564	-485.939
MGE	177.892	252.595	9.588	9.672	444.893
COMPES	7.730	9.588	0.734	0.056	15.681
DIAMES	7.564	9.672	0.056	1.703	16.090
NGE	-485.939	444.893	15.681	16.090	2014.021

Distância de Mahalanobis

A matriz de covariância residual e as médias podem ser usados na função mahala() para calcular a distância de Mahalanobis.

D2 <- mahala(.means = correl$means, covar = correl$resi_cov, inverted = FALSE)
print_tbl(D2)

	H1	H10	H11	H12	H13	H2	H3	H4	H5	H6	H7	H8	H9
H1	0.000	22.376	14.474	23.648	12.483	3.844	2.334	7.508	17.464	14.837	26.980	59.690	51.562
H10	22.376	0.000	2.334	8.772	9.100	35.607	18.127	10.696	2.903	18.311	5.290	18.563	13.522
H11	14.474	2.334	0.000	5.474	10.975	25.937	9.655	6.592	4.653	19.124	6.482	23.686	19.195
H12	23.648	8.772	5.474	0.000	18.393	42.726	20.782	22.110	17.671	37.538	4.956	13.675	10.465
H13	12.483	9.100	10.975	18.393	0.000	23.219	16.462	11.848	7.893	9.072	10.616	30.521	24.450
H2	3.844	35.607	25.937	42.726	23.219	0.000	5.541	10.274	24.275	14.521	44.357	85.575	77.169
H3	2.334	18.127	9.655	20.782	16.462	5.541	0.000	3.420	13.807	16.940	26.131	58.552	50.893
H4	7.508	10.696	6.592	22.110	11.848	10.274	3.420	0.000	5.399	9.827	21.315	51.358	44.707
H5	17.464	2.903	4.653	17.671	7.893	24.275	13.807	5.399	0.000	7.806	11.084	31.217	25.982
H6	14.837	18.311	19.124	37.538	9.072	14.521	16.940	9.827	7.806	0.000	25.371	54.420	49.052
H7	26.980	5.290	6.482	4.956	10.616	44.357	26.131	21.315	11.084	25.371	0.000	6.956	6.100
H8	59.690	18.563	23.686	13.675	30.521	85.575	58.552	51.358	31.217	54.420	6.956	0.000	1.796
H9	51.562	13.522	19.195	10.465	24.450	77.169	50.893	44.707	25.982	49.052	6.100	1.796	0.000

D2 %>% 
  as.dist() %>% 
  hclust() %>% 
  plot()

Diagnóstico de colinearidade

Os códigos a seguir calculam um diagnóstico de colinearidade completo de uma matriz de correlação de características do preditor. Vários indicadores, como fator de inflação de variância, número de condição e determinante da matriz são considerados¹

colin <- colindiag(df_ge)
print(colin)
## The multicollinearity in the matrix should be investigated.
## CN = 607.708
## Largest VIF = 55.4696961923099
## Matrix determinant: 1.7e-06 
## Largest correlation: ALT_PLANT x ALT_ESP = 0.932 
## Smallest correlation: COMPES x NFIL = -0.014 
## Number of VIFs > 10: 4 
## Number of correlations with r >= |0.8|: 3 
## Variables with largest weight in the last eigenvalues: 
## MGE > NGE > MMG > ALT_ESP > ALT_PLANT > DIAMES > DIAM_SAB > NFIL > COMPES > COMP_SAB

print_tbl(colin$evalevet)

Eigenvalues	ALT_PLANT	ALT_ESP	COMPES	DIAMES	COMP_SAB	DIAM_SAB	MGE	NFIL	MMG	NGE
5.275	-0.358	-0.346	-0.297	-0.382	-0.263	-0.283	-0.416	-0.178	-0.306	-0.265
1.669	0.106	0.066	-0.311	0.137	-0.119	-0.344	0.012	0.634	-0.398	0.420
1.448	-0.163	-0.219	0.464	-0.201	-0.389	0.452	0.085	-0.010	-0.335	0.440
0.914	0.498	0.487	-0.049	-0.260	-0.568	-0.163	0.035	-0.306	0.000	0.011
0.294	-0.155	-0.419	-0.208	0.301	-0.474	-0.194	0.421	-0.105	0.447	0.128
0.179	-0.175	0.111	-0.190	0.012	0.370	-0.215	0.221	-0.615	-0.249	0.498
0.092	0.194	-0.054	-0.504	0.465	-0.120	0.574	-0.239	-0.197	-0.220	-0.033
0.073	-0.216	0.201	0.457	0.644	-0.197	-0.317	-0.250	-0.151	-0.216	-0.135
0.047	0.668	-0.598	0.237	0.064	0.164	-0.237	-0.120	-0.147	-0.124	0.042
0.009	0.033	-0.038	-0.009	0.024	0.006	0.019	0.677	0.011	-0.514	-0.523

Diagnóstico para cada nível do fator ENV

colin2 <- colindiag(df_ge, by = ENV)
print(colin2)         
## # A tibble: 4 x 2
##   ENV   data      
##   <chr> <list>    
## 1 A1    <colindig>
## 2 A2    <colindig>
## 3 A3    <colindig>
## 4 A4    <colindig>

Análise de trilha

Neste exemplo, a variável massa de grãos por espiga (MGE) será utilziada como resposta e todas as outras como explicativa

pcoeff <- path_coeff(df_ge, resp = MGE)
## Weak multicollinearity. 
## Condition Number = 97.581
## You will probably have path coefficients close to being unbiased.
print(pcoeff)
## ----------------------------------------------------------------------------------------------
## Correlation matrix between the predictor traits
## ----------------------------------------------------------------------------------------------
##           ALT_PLANT ALT_ESP   COMPES DIAMES COMP_SAB DIAM_SAB     NFIL      MMG
## ALT_PLANT    1.0000  0.9318  0.38020 0.6613  0.32516  0.31539  0.32861  0.56854
## ALT_ESP      0.9318  1.0000  0.36265 0.6303  0.39719  0.28051  0.26481  0.56236
## COMPES       0.3802  0.3627  1.00000 0.3851  0.25541  0.91187 -0.01387  0.44210
## DIAMES       0.6613  0.6303  0.38515 1.0000  0.69746  0.38971  0.55253  0.64199
## COMP_SAB     0.3252  0.3972  0.25541 0.6975  1.00000  0.30036  0.26194  0.61870
## DIAM_SAB     0.3154  0.2805  0.91187 0.3897  0.30036  1.00000 -0.03585  0.44332
## NFIL         0.3286  0.2648 -0.01387 0.5525  0.26194 -0.03585  1.00000 -0.10876
## MMG          0.5685  0.5624  0.44210 0.6420  0.61870  0.44332 -0.10876  1.00000
## NGE          0.4584  0.3881  0.46570 0.5051  0.04894  0.41562  0.62609 -0.06516
##                NGE
## ALT_PLANT  0.45838
## ALT_ESP    0.38812
## COMPES     0.46570
## DIAMES     0.50508
## COMP_SAB   0.04894
## DIAM_SAB   0.41562
## NFIL       0.62609
## MMG       -0.06516
## NGE        1.00000
## ----------------------------------------------------------------------------------------------
## Vector of correlations between dependent and each predictor
## ----------------------------------------------------------------------------------------------
##     ALT_PLANT   ALT_ESP    COMPES    DIAMES COMP_SAB  DIAM_SAB      NFIL
## MGE 0.7534439 0.7029469 0.6685601 0.8241426 0.470931 0.6259806 0.3621447
##           MMG       NGE
## MGE 0.6730371 0.6810756
## ----------------------------------------------------------------------------------------------
## Multicollinearity diagnosis and goodness-of-fit
## ----------------------------------------------------------------------------------------------
## Condition number:  97.5813 
## Determinant:       9.241e-05 
## R-square:          0.982 
## Residual:          0.1343 
## Response:          MGE 
## Predictors:        ALT_PLANT ALT_ESP COMPES DIAMES COMP_SAB DIAM_SAB NFIL MMG NGE 
## ----------------------------------------------------------------------------------------------
## Variance inflation factors
## ----------------------------------------------------------------------------------------------
## # A tibble: 9 x 2
##   VAR          VIF
##   <chr>      <dbl>
## 1 ALT_PLANT 11.30 
## 2 ALT_ESP    9.302
## 3 COMPES     7.331
## 4 DIAMES     8.636
## 5 COMP_SAB   3.270
## 6 DIAM_SAB   6.814
## 7 NFIL       3.676
## 8 MMG        6.965
## 9 NGE        5.396
## ----------------------------------------------------------------------------------------------
## Eigenvalues and eigenvectors
## ----------------------------------------------------------------------------------------------
## # A tibble: 9 x 10
##   Eigenvalues ALT_PLANT  ALT_ESP   COMPES  DIAMES COMP_SAB DIAM_SAB     NFIL
##         <dbl>     <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>
## 1     4.382    -0.3957  -0.3860  -0.3207  -0.4210 -0.3025  -0.3069  -0.1968 
## 2     1.669    -0.1105  -0.07139  0.3116  -0.1422  0.1131   0.3450  -0.6359 
## 3     1.436    -0.1416  -0.1977   0.4834  -0.1783 -0.3687   0.4719   0.01084
## 4     0.9130    0.5057   0.4952  -0.03763 -0.2558 -0.5642  -0.1520  -0.3006 
## 5     0.2429    0.03453  0.3984   0.1010  -0.4234  0.5823   0.05879 -0.06332
## 6     0.1638   -0.1483   0.01160 -0.2195   0.2790  0.1830  -0.1836  -0.6531 
## 7     0.08619  -0.2376   0.08451  0.6328  -0.1318  0.03175 -0.6776   0.06203
## 8     0.06259  -0.2610   0.3031   0.2035   0.6326 -0.2275  -0.01768 -0.09131
## 9     0.04490   0.6385  -0.5511   0.2544   0.1748  0.1271  -0.2106  -0.1540 
## # ... with 2 more variables: MMG <dbl>, NGE <dbl>
## ----------------------------------------------------------------------------------------------
## Variables with the largest weight in the eigenvalue of smallest magnitude
## ----------------------------------------------------------------------------------------------
## ALT_PLANT > ALT_ESP > MMG > COMPES > DIAM_SAB > DIAMES > NFIL > NGE > COMP_SAB 
## ----------------------------------------------------------------------------------------------
## Direct (diagonal) and indirect (off-diagonal) effects
## ----------------------------------------------------------------------------------------------
##              ALT_PLANT    ALT_ESP        COMPES      DIAMES      COMP_SAB
## ALT_PLANT -0.013315310 0.04002135  0.0138641226 0.015196362 -0.0047188442
## ALT_ESP   -0.012407581 0.04294928  0.0132244307 0.014482662 -0.0057641364
## COMPES    -0.005062428 0.01557572  0.0364657233 0.008850255 -0.0037065040
## DIAMES    -0.008805612 0.02706905  0.0140445955 0.022979012 -0.0101216939
## COMP_SAB  -0.004329670 0.01705918  0.0093135921 0.016027009 -0.0145121611
## DIAM_SAB  -0.004199528 0.01204778  0.0332518264 0.008955216 -0.0043589255
## NFIL      -0.004375497 0.01137319 -0.0005059176 0.012696697 -0.0038012563
## MMG       -0.007570266 0.02415282  0.0161215357 0.014752226 -0.0089786759
## NGE       -0.006103472 0.01666954  0.0169820638 0.011606259 -0.0007102577
##                DIAM_SAB          NFIL        MMG         NGE    linear
## ALT_PLANT -0.0067498812 -0.0082032241  0.3909975  0.32635179 0.7534439
## ALT_ESP   -0.0060034106 -0.0066105076  0.3867462  0.27632993 0.7029469
## COMPES    -0.0195154051  0.0003463406  0.3040435  0.33156290 0.6685601
## DIAMES    -0.0083404905 -0.0137932898  0.4415098  0.35960124 0.8241426
## COMP_SAB  -0.0064282721 -0.0065388826  0.4254949  0.03484530 0.4709310
## DIAM_SAB  -0.0214016323  0.0008949437  0.3048852  0.29590577 0.6259806
## NFIL       0.0007672451 -0.0249636726 -0.0747987  0.44575261 0.3621447
## MMG       -0.0094878766  0.0027151156  0.6877240 -0.04639172 0.6730371
## NGE       -0.0088948788 -0.0156293919 -0.0448120  0.71196770 0.6810756
## ----------------------------------------------------------------------------------------------

Para declarar características preditoras, use o argumento pred

pcoeff2 <-
  path_coeff(df_ge,
             resp = MGE,
             pred = c(MMG, COMPES, DIAMES, NGE))
## Weak multicollinearity. 
## Condition Number = 24.907
## You will probably have path coefficients close to being unbiased.
print(pcoeff2)
## ----------------------------------------------------------------------------------------------
## Correlation matrix between the predictor traits
## ----------------------------------------------------------------------------------------------
##             MMG COMPES DIAMES      NGE
## MMG     1.00000 0.4421 0.6420 -0.06516
## COMPES  0.44210 1.0000 0.3851  0.46570
## DIAMES  0.64199 0.3851 1.0000  0.50508
## NGE    -0.06516 0.4657 0.5051  1.00000
## ----------------------------------------------------------------------------------------------
## Vector of correlations between dependent and each predictor
## ----------------------------------------------------------------------------------------------
##           MMG    COMPES    DIAMES       NGE
## MGE 0.6730371 0.6685601 0.8241426 0.6810756
## ----------------------------------------------------------------------------------------------
## Multicollinearity diagnosis and goodness-of-fit
## ----------------------------------------------------------------------------------------------
## Condition number:  24.9068 
## Determinant:       0.1311275 
## R-square:          0.981 
## Residual:          0.1379 
## Response:          MGE 
## Predictors:        MMG COMPES DIAMES NGE 
## ----------------------------------------------------------------------------------------------
## Variance inflation factors
## ----------------------------------------------------------------------------------------------
## # A tibble: 4 x 2
##   VAR      VIF
##   <chr>  <dbl>
## 1 MMG    4.277
## 2 COMPES 2.183
## 3 DIAMES 4.245
## 4 NGE    3.529
## ----------------------------------------------------------------------------------------------
## Eigenvalues and eigenvectors
## ----------------------------------------------------------------------------------------------
## # A tibble: 4 x 5
##   Eigenvalues      MMG  COMPES   DIAMES     NGE
##         <dbl>    <dbl>   <dbl>    <dbl>   <dbl>
## 1     2.217   -0.4726  -0.5146 -0.5837  -0.4137
## 2     1.077   -0.6640   0.1302 -0.09487  0.7302
## 3     0.6170  -0.02383 -0.7914  0.5790   0.1947
## 4     0.08902  0.5790  -0.3032 -0.5613   0.5076
## ----------------------------------------------------------------------------------------------
## Variables with the largest weight in the eigenvalue of smallest magnitude
## ----------------------------------------------------------------------------------------------
## MMG > DIAMES > NGE > COMPES 
## ----------------------------------------------------------------------------------------------
## Direct (diagonal) and indirect (off-diagonal) effects
## ----------------------------------------------------------------------------------------------
##               MMG      COMPES       DIAMES         NGE    linear
## MMG     0.7207458 0.007125379 -0.007499726 -0.04733433 0.6730371
## COMPES  0.3186425 0.016117081 -0.004499286  0.33829981 0.6685601
## DIAMES  0.4627094 0.006207415 -0.011682054  0.36690786 0.8241426
## NGE    -0.0469637 0.007505714 -0.005900382  0.72643393 0.6810756
## ----------------------------------------------------------------------------------------------

Para selecionando um conjunto de preditores com multicolinearidade mínima use o argumento brutstep.

pcoeff3 <-
  path_coeff(df_ge,
             resp = MGE,
             brutstep = TRUE)
## --------------------------------------------------------------------------
## The algorithm has selected a set of 8 predictors with largest VIF = 8.634. 
## Selected predictors: ALT_ESP COMP_SAB NFIL NGE MMG DIAM_SAB COMPES DIAMES 
## A forward stepwise-based selection procedure will fit 6 models.
## --------------------------------------------------------------------------
## Adjusting the model 1 with 7 predictors (16.67% concluded)
## Adjusting the model 2 with 6 predictors (33.33% concluded)
## Adjusting the model 3 with 5 predictors (50% concluded)
## Adjusting the model 4 with 4 predictors (66.67% concluded)
## Adjusting the model 5 with 3 predictors (83.33% concluded)
## Adjusting the model 6 with 2 predictors (100% concluded)
## Done!
## --------------------------------------------------------------------------
## Summary of the adjusted models 
## --------------------------------------------------------------------------
##    Model  AIC Numpred    CN Determinant    R2 Residual maxVIF
##  MODEL_1  923       7 51.94     0.00291 0.982    0.135   7.21
##  MODEL_2  921       6 42.05     0.01919 0.982    0.135   6.61
##  MODEL_3  921       5 34.25     0.06367 0.982    0.136   5.15
##  MODEL_4  924       4 24.91     0.13113 0.981    0.138   4.28
##  MODEL_5 1234       3  4.00     0.56087 0.860    0.375   1.52
##  MODEL_6 1267       2  2.25     0.85166 0.824    0.420   1.17
## --------------------------------------------------------------------------
print(pcoeff3$Models$Model_4)
## ----------------------------------------------------------------------------------------------
## Correlation matrix between the predictor traits
## ----------------------------------------------------------------------------------------------
##        DIAMES COMPES      NGE      MMG
## DIAMES 1.0000 0.3851  0.50508  0.64199
## COMPES 0.3851 1.0000  0.46570  0.44210
## NGE    0.5051 0.4657  1.00000 -0.06516
## MMG    0.6420 0.4421 -0.06516  1.00000
## ----------------------------------------------------------------------------------------------
## Vector of correlations between dependent and each predictor
## ----------------------------------------------------------------------------------------------
##        DIAMES    COMPES       NGE       MMG
## MGE 0.8241426 0.6685601 0.6810756 0.6730371
## ----------------------------------------------------------------------------------------------
## Multicollinearity diagnosis and goodness-of-fit
## ----------------------------------------------------------------------------------------------
## Condition number:  24.9068 
## Determinant:       0.1311275 
## R-square:          0.981 
## Residual:          0.1379 
## Response:          MGE 
## Predictors:        DIAMES COMPES NGE MMG 
## ----------------------------------------------------------------------------------------------
## Variance inflation factors
## ----------------------------------------------------------------------------------------------
##             VIF
## DIAMES 4.244679
## COMPES 2.182940
## NGE    3.528542
## MMG    4.277252
## ----------------------------------------------------------------------------------------------
## Eigenvalues and eigenvectors
## ----------------------------------------------------------------------------------------------
##   Eigenvalues     DIAMES      COMPES         NGE        MMG
## 1  2.21731979 -0.5836962  0.09487169  0.57897124  0.5613292
## 2  1.07668055 -0.5145820 -0.13022977 -0.79140140  0.3031986
## 3  0.61697486 -0.4137466 -0.73021132  0.19470094 -0.5076383
## 4  0.08902479 -0.4725652  0.66395105 -0.02382582 -0.5790367
## ----------------------------------------------------------------------------------------------
## Variables with the largest weight in the eigenvalue of smallest magnitude
## ----------------------------------------------------------------------------------------------
## COMPES > MMG > DIAMES > NGE 
## ----------------------------------------------------------------------------------------------
## Direct (diagonal) and indirect (off-diagonal) effects
## ----------------------------------------------------------------------------------------------
##              DIAMES      COMPES         NGE        MMG    linear
## DIAMES -0.011682054 0.006207415  0.36690786  0.4627094 0.8241426
## COMPES -0.004499286 0.016117081  0.33829981  0.3186425 0.6685601
## NGE    -0.005900382 0.007505714  0.72643393 -0.0469637 0.6810756
## MMG    -0.007499726 0.007125379 -0.04733433  0.7207458 0.6730371
## ----------------------------------------------------------------------------------------------

Também é possível calcular uma análise para cada nível de um determinado fator

pcoeff4 <-
  path_coeff(df_ge,
             resp = MGE,
             pred = c(MMG, COMPES, DIAMES, NGE),
             by = ENV)
## Weak multicollinearity. 
## Condition Number = 11.26
## You will probably have path coefficients close to being unbiased. 
## Weak multicollinearity. 
## Condition Number = 48.08
## You will probably have path coefficients close to being unbiased. 
## Weak multicollinearity. 
## Condition Number = 20.594
## You will probably have path coefficients close to being unbiased. 
## Weak multicollinearity. 
## Condition Number = 29.096
## You will probably have path coefficients close to being unbiased.


# diagrama de trilha
library(diagram)
## Carregando pacotes exigidos: shape

pcoeff5 <-
  path_coeff(df_ge,
             resp = MGE,
             pred = c(MMG, COMPES, DIAMES, NGE))
## Weak multicollinearity. 
## Condition Number = 24.907
## You will probably have path coefficients close to being unbiased.


coeffs <- 
  pcoeff5$Coefficients %>% 
  remove_cols(linear) %>% 
  round_cols(digits = 3)
coeffs
##           MMG COMPES DIAMES    NGE
## MMG     0.721  0.007 -0.007 -0.047
## COMPES  0.319  0.016 -0.004  0.338
## DIAMES  0.463  0.006 -0.012  0.367
## NGE    -0.047  0.008 -0.006  0.726

plotmat(coeffs,
        curve = 0,
        box.size = 0.08,
        box.prop = 0.5,
        box.col = "gray90",
        arr.type = "curved",
        arr.pos = 0.35,
        arr.lwd = 1,
        arr.length = 0.4,
        arr.width = 0.2)

Correlações canônicas

Em primeiro lugar, renomearemos as características relacionadas à planta ALT_PLANT e ALT_ESP com o sufixo _PLANTA para mostrar a usabilidade do select helper contains().

data_cc <- 
  df_ge %>% 
  rename(ESP_COMPES = COMPES,
         ESP_DIAMES = DIAMES,
         ESP_COMPSAB = COMP_SAB,
         GRAO_MGE = MGE,
         GRAO_MMG = MMG)
  
# Digitar os nomes das variáveis
cc1 <- can_corr(data_cc,
                FG = c(GRAO_MGE, GRAO_MMG),
                SG = c(ESP_COMPES, ESP_DIAMES, ESP_COMPSAB))
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of first group (FG)
## ---------------------------------------------------------------------------
##           GRAO_MGE  GRAO_MMG
## GRAO_MGE 1.0000000 0.6730371
## GRAO_MMG 0.6730371 1.0000000
## ---------------------------------------------------------------------------
## Collinearity within first group 
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 5.117
## Matrix determinant: 0.547021 
## Largest correlation: GRAO_MGE x GRAO_MMG = 0.673 
## Smallest correlation: GRAO_MGE x GRAO_MMG = 0.673 
## Number of VIFs > 10: 0 
## Number of correlations with r >= |0.8|: 0 
## Variables with largest weight in the last eigenvalues: 
## GRAO_MGE > GRAO_MMG 
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of second group (SG)
## ---------------------------------------------------------------------------
##             ESP_COMPES ESP_DIAMES ESP_COMPSAB
## ESP_COMPES   1.0000000  0.3851451   0.2554068
## ESP_DIAMES   0.3851451  1.0000000   0.6974629
## ESP_COMPSAB  0.2554068  0.6974629   1.0000000
## ---------------------------------------------------------------------------
## Collinearity within second group 
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 6.679
## Matrix determinant: 0.4371931 
## Largest correlation: ESP_DIAMES x ESP_COMPSAB = 0.697 
## Smallest correlation: ESP_COMPES x ESP_COMPSAB = 0.255 
## Number of VIFs > 10: 0 
## Number of correlations with r >= |0.8|: 0 
## Variables with largest weight in the last eigenvalues: 
## ESP_DIAMES > ESP_COMPSAB > ESP_COMPES 
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between FG and SG
## ---------------------------------------------------------------------------
##          ESP_COMPES ESP_DIAMES ESP_COMPSAB
## GRAO_MGE  0.6685601  0.8241426   0.4709310
## GRAO_MMG  0.4421011  0.6419870   0.6187001
## ---------------------------------------------------------------------------
## Correlation of the canonical pairs and hypothesis testing 
## ---------------------------------------------------------------------------
##            Var  Percent       Sum      Corr  Lambda     Chisq DF p_val
## U1V1 0.8430909 79.09135  79.09135 0.9181998 0.12194 319.84591  6     0
## U2V2 0.2228801 20.90865 100.00000 0.4721018 0.77712  38.32842  2     0
## ---------------------------------------------------------------------------
## Canonical coefficients of the first group 
## ---------------------------------------------------------------------------
##                   U1         U2
## GRAO_MGE -0.98237574  0.9289894
## GRAO_MMG -0.02591325 -1.3518180
## ---------------------------------------------------------------------------
## Canonical coefficients of the second group 
## ---------------------------------------------------------------------------
##                     V1         V2
## ESP_COMPES  -0.4445443  0.1353212
## ESP_DIAMES  -0.8650570  0.6712820
## ESP_COMPSAB  0.1955781 -1.3476588
## ---------------------------------------------------------------------------
## Canonical loads of the first group 
## ---------------------------------------------------------------------------
##                  U1          U2
## GRAO_MGE -0.9998163  0.01916566
## GRAO_MMG -0.6870886 -0.72657362
## ---------------------------------------------------------------------------
## Canonical loads of the second group 
## ---------------------------------------------------------------------------
##                     V1          V2
## ESP_COMPES  -0.7277648  0.04966103
## ESP_DIAMES  -0.8998626 -0.21654173
## ESP_COMPSAB -0.5213067 -0.84490257

# usando select helpers
cc2 <- can_corr(data_cc,
                FG = contains("GRAO_"),
                SG = contains("ESP_"))
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of first group (FG)
## ---------------------------------------------------------------------------
##           GRAO_MGE  GRAO_MMG
## GRAO_MGE 1.0000000 0.6730371
## GRAO_MMG 0.6730371 1.0000000
## ---------------------------------------------------------------------------
## Collinearity within first group 
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 5.117
## Matrix determinant: 0.547021 
## Largest correlation: GRAO_MGE x GRAO_MMG = 0.673 
## Smallest correlation: GRAO_MGE x GRAO_MMG = 0.673 
## Number of VIFs > 10: 0 
## Number of correlations with r >= |0.8|: 0 
## Variables with largest weight in the last eigenvalues: 
## GRAO_MGE > GRAO_MMG 
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between variables of second group (SG)
## ---------------------------------------------------------------------------
##             ESP_COMPES ESP_DIAMES ESP_COMPSAB
## ESP_COMPES   1.0000000  0.3851451   0.2554068
## ESP_DIAMES   0.3851451  1.0000000   0.6974629
## ESP_COMPSAB  0.2554068  0.6974629   1.0000000
## ---------------------------------------------------------------------------
## Collinearity within second group 
## ---------------------------------------------------------------------------
## Weak multicollinearity in the matrix
## CN = 6.679
## Matrix determinant: 0.4371931 
## Largest correlation: ESP_DIAMES x ESP_COMPSAB = 0.697 
## Smallest correlation: ESP_COMPES x ESP_COMPSAB = 0.255 
## Number of VIFs > 10: 0 
## Number of correlations with r >= |0.8|: 0 
## Variables with largest weight in the last eigenvalues: 
## ESP_DIAMES > ESP_COMPSAB > ESP_COMPES 
## ---------------------------------------------------------------------------
## Matrix (correlation/covariance) between FG and SG
## ---------------------------------------------------------------------------
##          ESP_COMPES ESP_DIAMES ESP_COMPSAB
## GRAO_MGE  0.6685601  0.8241426   0.4709310
## GRAO_MMG  0.4421011  0.6419870   0.6187001
## ---------------------------------------------------------------------------
## Correlation of the canonical pairs and hypothesis testing 
## ---------------------------------------------------------------------------
##            Var  Percent       Sum      Corr  Lambda     Chisq DF p_val
## U1V1 0.8430909 79.09135  79.09135 0.9181998 0.12194 319.84591  6     0
## U2V2 0.2228801 20.90865 100.00000 0.4721018 0.77712  38.32842  2     0
## ---------------------------------------------------------------------------
## Canonical coefficients of the first group 
## ---------------------------------------------------------------------------
##                   U1         U2
## GRAO_MGE -0.98237574  0.9289894
## GRAO_MMG -0.02591325 -1.3518180
## ---------------------------------------------------------------------------
## Canonical coefficients of the second group 
## ---------------------------------------------------------------------------
##                     V1         V2
## ESP_COMPES  -0.4445443  0.1353212
## ESP_DIAMES  -0.8650570  0.6712820
## ESP_COMPSAB  0.1955781 -1.3476588
## ---------------------------------------------------------------------------
## Canonical loads of the first group 
## ---------------------------------------------------------------------------
##                  U1          U2
## GRAO_MGE -0.9998163  0.01916566
## GRAO_MMG -0.6870886 -0.72657362
## ---------------------------------------------------------------------------
## Canonical loads of the second group 
## ---------------------------------------------------------------------------
##                     V1          V2
## ESP_COMPES  -0.7277648  0.04966103
## ESP_DIAMES  -0.8998626 -0.21654173
## ESP_COMPSAB -0.5213067 -0.84490257

Gráfico de barras

plot_bars(df_g,
          x = GEN,
          y = MMG,
          lab.bar = 1:13)

plot_factbars(df_ge, ENV, GEN, resp = MMG)

## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Spectral is 11
## Returning the palette you asked for with that many colors

Olivoto, T., Souza, V. Q., Nardino, M., Carvalho, I. R., Ferrari, M., Pelegrin, A. J., Szareski, V. J., & Schmidt, D. (2017). Multicollinearity in path analysis: A simple method to reduce its effects. Agronomy Journal, 109(1), 131–142. https://doi.org/10.2134/agronj2016.04.0196 ↩︎

Last updated on Jul 17, 2021

Edit this page