############################################################## ############# code used for GCTA GREML analysis ############## ############################################################# ## GCTA software was developed by Jian Yang with supports from Peter Visscher, Mike Goddard and Hong Lee (https://cnsgenomics.com/software/gcta/#Overview) ## the software supports the use of multiple threads using the option --threads or --thread-num # create the genetic relationship matrix using 10 subsets of the genetic data in plink binary format for i in $(seq 1 10); do ~/gcta64 \ --make-grm-part 10 ${i} \ --bfile ~/plink_data \ --keep ~/cases.vs.controls \ ## keep subset of subjects of interest passing quality control --extract ~/post_qc_snp_list.txt \ ## extract SNPs that passed quality control --out data_gcta done # concatenate the different subsets: cat data_gcta.part_10*.grm.id > data_GCTA.grm.id cat data_gcta.part_10*.grm.bin > data_GCTA.grm.bin cat data_gcta.part_10*.grm.N.bin > data_GCTA.grm.N.bin # remove the subsets: rm data_gcta.part_10* # adjust GRM for incomplete tagging of causal SNPs: ~/gcta64 \ --grm data_GCTA \ --grm-adj 0 \ --make-grm \ --out data_GTCA.adjusted \ # remove the previous data rm data_GCTA.grm.* # Remove related subjects using grm-cutoff 0.05 ~/gcta64 \ --grm data_GTCA.adjusted \ --grm-cutoff 0.05 \ --make-grm \ --out data_GTCA.adjusted.unrel \ # Run GREML-SC ~/gcta64 \ --grm data_GTCA.adjusted.unrel \ --reml \ --reml-no-constrain \ --pheno ~/pheno \ --covar ~/categorical_cov \ ## file with categorical covariates --qcovar ~/quantitative_cov \ ## file with quantitative covariates --out data_GCTA.adjust.unrel.GREML_SC ## output file ############################################################## ############# code used for GCTB analysis ############## ############################################################# ## GCTB software was developed by Jian Zeng with supports from Jian Yang, Futao Zhang and Zhili Zheng (https://cnsgenomics.com/software/gctb/#Overview) ## MPI version 2.0 of GCTB (https://cnsgenomics.com/software/gctb/#Download) ## This requires MPI and two C++ libraries i.e. Eigen3 and Boost mpirun -np 5 ~/gctb \ # np is the number of CPU cores used for the analysis --bfile ~/plink_data \ # plink format input data --pheno ~/phenotype_file \ # phenotype file including only subjects after quality control --extract ~/post_qc_snp_list.txt \ # extract SNPs that passed quality control --covar ~/cov_file \ # name of covariate file, categorical covariates were converted to dummy variables --bayes S \ # Bayesian alphabet for the analysis --pi 0.05 \ # initial value of pi (standard value) --hsq 0.5 \ # initial value of hsq (standard value) --S 0 \ # initial value of S (standard value) --chain-length 21000 \ # number of simulations --burn-in 1000 \ # number of burn-in simulations --out-freq 500 \ # frequency of output printing in the .log file --seed 23 \ # set seed for reproducibility of the results --out ~/dep_heritability # name of output ################################################################################## ################# heritability conversion to liability scale ##################### ################################################################################## #### see Yap et al. 2018, https://doi.org/10.1038/s41467-018-04807-3 h2liab_twothresholds <- function(Ku, Kl, h2o, p) { #Ku = population prevalence of cases #Kl = population prevalence of controls #h2o = observed scale heritability #p = sample prevalence #case_threshold = the position of the case threshold on the standard normal distribution #control_threshold = the position of the control threshold on the standard normal distribution #Zu = height of the standard normal distribution at the case prevalence #Zl = height of the standard normal distribution at the control prevalence #C = adjustment factor case_threshold = qnorm(1-Ku) control_threshold = qnorm(Kl) Zu = dnorm(case_threshold) Zl = dnorm(control_threshold) C = p*(1-p)*(((Zu/Ku)+(Zl/Kl))^2) h2l = h2o/C }