###------ INSTALLATION AND PACKAGE IMPORT -------------------------------------- # put the full path to the source file mvlognCorrEst_1.0.0.tar.gz devtools::install_local("C:\\Users\\AlessandroDeCarlo\\Desktop\\DT\\projs\\Rpack\\versioneFinale\\mvLognCorrEst_1.0.0.tar.gz", upgrade = "never") library(mvLognCorrEst) library(corrplot)#plot of matrices library(RColorBrewer) ###---- SCENARIO 1: ESTIMATION OF INDIRECT CORRELATIONS & SAMPLING (full workflow) ------------- #useful command for setting the wd as the folder in which Rscript is saved #setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #define initial correlation matrix - this is the input correlation matrix used in the debtgi case study # NA values represent the indirect correlations to be estimated values_corr <- c(1.00,-0.60,-0.75,NA,NA,NA,NA,NA,0,0,0, -0.60,1.00,0.95,0.75,NA,-0.6,NA,0.75,0,0,0, -0.75,0.95,1.00,0.60,NA,NA,NA,0.75,0,0,0, NA,0.75,0.60,1.00,NA,NA,0.6,0.75,0,0,0, NA,NA,NA,NA,1.0,NA,0.6,NA,0,0,0, NA,-0.60,NA,NA,NA,1.0,NA,NA,0,0,0, NA,NA,NA,0.60,0.6,NA,1.0,NA,0,0,0, NA,0.75,0.75,0.75,NA,NA,NA,1.00,0,0,0, 0,0,0,0,0,0,0,0,1.0,0.6,0, 0,0,0,0,0,0,0,0,0.6,1.0,0, 0,0,0,0,0,0,0,0,0,0,1) varNames <- c("$m[u]","$mu[u]","$g[u]","$delta[Vmax]","$R[b]","$Vu[10]" ,"$W[0]","$IV[u50]","$k[2]","$IC[50]","$k[1]") c_start <- matrix(values_corr, ncol = 11, nrow = 11) rownames(c_start) <- varNames colnames(c_start)<- varNames colori <- brewer.pal(7, "Set3") pal <- colorRampPalette(colori) #plot correlation graph plot_graph_corr(c_start,"Graph of Initial Correlation Matrix") #alternative correlation matrix plot #COL1(sequential = c("Oranges", "Purples", "Reds", "Blues", "Greens", # "Greys", "OrRd", "YlOrRd", "YlOrBr", "YlGn"), n = 200) corrplot(c_start, method="color",number.digits = 2, addCoef.col = "black", # Add coefficient of correlation tl.col="black", tl.srt=45, #Text label color and rotation # hide correlation coefficient on the principal diagonal diag=T,col.lim = c(20, 30), col = pal(7))#COL1('YlGn')) #get bounds of correlations estimate_corr_bounds(c_start, widen_factor = 0.3) #estimate indirect correlations corr_est_results <- estimate_indirect_corr(c_start,widen_factor = 0.3) #show final correlation matrix with estimated values c_final <- corr_est_results$corrMatFinal plot_graph_corr(c_final,"Graph of Final Correlation Matrix") #plot correlations estimated corrplot(c_final, method="color",number.digits = 2, addCoef.col = "black", # Add coefficient of correlation tl.col="black", tl.srt=45, #Text label color and rotation # hide correlation coefficient on the principal diagonal diag=T,col.lim = c(20, 30), col = pal(7))#COL1('YlGn')) # use the obtained correlation matrix as correlation structure for a set of 11 # Log-Normal Variables. Define mean and sd of Log-Normal variables, and, as preliminary step # check that these values are compatibile with correlations imposed according with theorical bounds # type ?validate_logN_corrMatrix for more info or check PDF documentation. means <- as.array(c(0.01459310, 13.32131000, 10.60795000, 0.04146360, 0.07309954, 0.00116000 ,22.23622000, 5.30000000 ,0.12929000 ,0.02530008 ,0.38162000 )) # sd and cv of scenario 1 with low variability sd1 <- as.array(c(0.004377930, 0.666065500 ,0.530397500 ,0.008292720 ,0.014619908, 0.000232000, 0.444724400, 0.530000000, 0.025858000, 0.002530008, 0.114486000)) cv1 <- as.array(c(0.30, 0.05, 0.05, 0.20, 0.20, 0.20, 0.02, 0.10, 0.20, 0.10, 0.30)) # sd and cv of scenario 2 with higher variability cv2 <- as.array(c( 0.80, 0.50, 0.50, 0.60, 0.80, 0.60, 0.05, 0.60, 0.40, 0.60, 0.80 )) #6.66065500 sd2 <- as.array(c(0.01167448,20 , 5.30397500, 0.02487816, 0.05847963, 0.00069600, 1.11181100, 3.18000000, 0.05171600, 0.01518005, 0.30529600)) #choose scenario (high or low variability) cv_used <- cv2 #cv1 sd_used <-as.array(means*cv2)#sd2 #cv1 validation_of_logn <- validate_logN_corrMatrix(means,sd_used,c_final) #rapid check of tested condition validation_of_logn$is_valid_logN_corrMat #does each couple of variables respect constrains on correlation values? validation_of_logn$can_log_transf_covMat #can normal covariance matrix be computed with a non linear transformation? validation_of_logn$validation_res #complete results of tests: in this way one can check for each couple of variables the two # conditions tested covMatrixLogN <- sd_used%*%t(sd_used)*c_final # the conditions above described are done also in this function that converts Log-Normal parameters to Normal ones normalParams <- logn_to_normal(means,covMatrixLogN) #get parameters of normal distributions #sampling from multivariate lognormal distribution samples <- mvlogn(100000,mu=means,covMatrix=covMatrixLogN) #plot histogram of parameters and print means and sd for (i in 1:ncol(c_final)) { hist(samples$samples[,i],breaks = 30,main=paste(varNames[i],"mean", mean(samples$samples[,i]), "sd", sd(samples$samples[,i]))) } print("mean") apply(samples$samples, 2, mean) print("sd") apply(samples$samples, 2, sd) #plot correlations of generated samples corrplot(cor(samples$samples), method="color",number.digits = 2, addCoef.col = "black", # Add coefficient of correlation tl.col="black", tl.srt=45, #Text label color and rotation # hide correlation coefficient on the principal diagonal diag=T) #plot corrMat after approximation comment if running scenario 1 with low cvs corrplot(samples$logNcorr_adj, method="color",number.digits = 2, addCoef.col = "black", # Add coefficient of correlation tl.col="black", tl.srt=45, #Text label color and rotation # hide correlation coefficient on the principal diagonal diag=T,col.lim = c(20, 30), col = pal(7))#COL1('YlGn')) ###---- ESTIMATE_CORR_BOUNDS EXAMPLE-------------------------------------------- # create a correlation matrix, define some correlations c_bounds <- diag(rep(1,6)) c_bounds [1,2] <- -0.6 c_bounds [1,3] <- -0.75 c_bounds [2,3] <-0.95 c_bounds [2,4] <- 0.75 c_bounds [2,6] <- -0.6 c_bounds <- c_bounds +t(c_bounds )-diag(rep(1,ncol(c_bounds))) #set to NA indirect correlations c_bounds[c_bounds ==0]<-NA #plot correlation graph plot_graph_corr(c_bounds ,"Graph of Correlation Matrix") #get bounds of correlations estimate_corr_bounds(c_bounds ) ###---- ESTIMATE_INDIRECT_CORR EXAMPLE------------------------------------------ c_est <- diag(rep(1,8)) c_est[1,2] <- 0.9 c_est[1,3] <- 0.2 c_est[2,3] <- -0.2 c_est[2,4] <- 0.75 c_est[2,6]<- -0.5 c_est[4,7]<- 0.1 c_est[5,6] <- 0.6 c_est[3,8] <- -0.2 c_est <- c_est+t(c_est)-diag(rep(1,8)) c_est[c_est==0] <- NA #i want to estimate all indirect correlations plot_graph_corr(c_est ,"Graph of Initial Correlation Matrix") z<-estimate_indirect_corr(c_est) #show final values plot_graph_corr(z$corrMatFinal ,"Graph of Final Correlation Matrix") ###--- GET_LOGN_BOUNDS EXAMPLE ------------------------------------------------- sd_bounds <- array(c(1,2,0.5,3)) #array with standard deviations of variables mu_bounds <- array(c(3,0.2,1,8)) #array with means of variables get_logNcorr_bounds(mu_bounds,sd_bounds) ###--- LOGN_TO_NORMAL and NORMAL TO LOGNORMAL ---------------------------------- #define correlations corr<- diag(rep(1,4)) corr[1,4] <- 0.9 corr[4,1]<-corr[1,4] corr[2,4] <- -0.3 corr[4,2] <- corr[2,4] corr[3,2] <- -0.2 corr[2,3] <- corr[3,2] #define sd of variables sdLN <- array(c(rep(1,4))) #obtain covariance matrix covMatrixLN <- sdLN%*%t(sdLN)*corr #define mean vector muLN <- array(rep(2.5,4)) #get normal params normal_params<-logn_to_normal(muLN,covMatrixLN) #go back to logN logNormal_params <- normal_to_logn(normal_params$muN,normal_params$sigmaN) # it will return the same values stored in muLN and covMatrixLN ###----- VALIDATE LOGN CORR MATRIX -------------------------------------------- c_valid<- diag(rep(1,4)) c_valid[1,4] <- 0.9 c_valid[4,1]<-c_valid[1,4] c_valid[2,4] <- -0.2 c_valid[4,2] <- c_valid[2,4] c_valid[3,2] <- -0.1 c_valid[2,3] <- c_valid[3,2] #input standard deviations sd_valid <- array(c(rep(2,4))) #input means mu_valid <- array(rep(2.5,4)) validate_logN_corrMatrix(mu_valid,sd_valid,c_valid) ###------ MVLOGN EXAMPLES ------------------------------------------------------ #different ways to run this function #define correlations c_samps<- diag(rep(1,4)) c_samps[1,4] <- 0.9 c_samps[4,1]<-c_samps[1,4] c_samps[2,4] <- -0.3 c_samps[4,2] <- c_samps[2,4] c_samps[3,2] <- -0.2 c_samps[2,3] <- c_samps[3,2] #define sd of variables sd_samps <- array(c(rep(3,4))) #obtain covariance matrix covMatrix_samps <- sd_samps%*%t(sd_samps)*c_samps #define mean vector mu_samps <- array(rep(5,4)) cv_samps <- sd_samps/mu_samps Nsamples <- 1000 #call 1 call1<-mvlogn(Nsamples,mu=mu_samps,covMatrix=covMatrix_samps) #call 2 call2<-mvlogn(Nsamples,mu=mu_samps, sd=sd_samps, corrMatrix=c_samps) #call 3 call3<-mvlogn(Nsamples, mu=mu_samps, cv=cv_samps, corrMatrix=c_samps) # call 4 call4<-mvlogn(Nsamples, sd = sd_samps, cv=cv_samps, corrMatrix=c_samps) #call 5 call5<-mvlogn(Nsamples, sd=sd_samps, cv=cv_samps, covMatrix=covMatrix_samps)