##############################################################
############# code used for GCTA GREML analysis ##############
#############################################################

## GCTA software was developed by Jian Yang with supports from Peter Visscher, Mike Goddard and Hong Lee (https://cnsgenomics.com/software/gcta/#Overview)
## the software supports the use of multiple threads using the option --threads or --thread-num 


# create the genetic relationship matrix using 10 subsets of the genetic data in plink binary format
for i in $(seq 1 10); do 
~/gcta64 \
--make-grm-part 10 ${i} \
--bfile ~/plink_data \
--keep ~/cases.vs.controls \ ## keep subset of subjects of interest passing quality control
--extract ~/post_qc_snp_list.txt \ ## extract SNPs that passed quality control
--out data_gcta
done

# concatenate the different subsets:
cat data_gcta.part_10*.grm.id > data_GCTA.grm.id
cat data_gcta.part_10*.grm.bin > data_GCTA.grm.bin
cat data_gcta.part_10*.grm.N.bin > data_GCTA.grm.N.bin

# remove the subsets:
rm data_gcta.part_10*

# adjust GRM for incomplete tagging of causal SNPs:
~/gcta64 \
--grm data_GCTA \
--grm-adj 0 \
--make-grm \
--out data_GTCA.adjusted \

# remove the previous data
rm data_GCTA.grm.*

# Remove related subjects using grm-cutoff 0.05
~/gcta64 \
--grm data_GTCA.adjusted \
--grm-cutoff 0.05 \
--make-grm \
--out data_GTCA.adjusted.unrel \

# Run GREML-SC
~/gcta64 \
--grm data_GTCA.adjusted.unrel \
--reml \
--reml-no-constrain \
--pheno ~/pheno \
--covar ~/categorical_cov \ ## file with categorical covariates
--qcovar ~/quantitative_cov \ ## file with quantitative covariates
--out data_GCTA.adjust.unrel.GREML_SC ## output file


##############################################################
############# code used for GCTB analysis ##############
#############################################################

## GCTB software was developed by Jian Zeng with supports from Jian Yang, Futao Zhang and Zhili Zheng (https://cnsgenomics.com/software/gctb/#Overview)

## MPI version 2.0 of GCTB (https://cnsgenomics.com/software/gctb/#Download)
## This requires MPI and two C++ libraries i.e. Eigen3 and Boost

mpirun -np 5 ~/gctb \ # np is the number of CPU cores used for the analysis
--bfile ~/plink_data \ # plink format input data
--pheno ~/phenotype_file \ # phenotype file including only subjects after quality control
--extract ~/post_qc_snp_list.txt \ # extract SNPs that passed quality control
--covar ~/cov_file \ # name of covariate file, categorical covariates were converted to dummy variables
--bayes S \ # Bayesian alphabet for the analysis
--pi 0.05 \ # initial value of pi (standard value) 
--hsq 0.5 \ # initial value of hsq (standard value)
--S 0 \ # initial value of S (standard value)
--chain-length 21000 \ # number of simulations
--burn-in 1000 \ # number of burn-in simulations
--out-freq 500 \ # frequency of output printing in the .log file
--seed 23 \ # set seed for reproducibility of the results
--out ~/dep_heritability # name of output


##################################################################################
################# heritability conversion to liability scale #####################
##################################################################################

#### see Yap et al. 2018, https://doi.org/10.1038/s41467-018-04807-3

h2liab_twothresholds <- function(Ku, Kl, h2o, p) {
  #Ku = population prevalence of cases
  #Kl = population prevalence of controls
  #h2o = observed scale heritability
  #p = sample prevalence
  #case_threshold = the position of the case threshold on the standard normal distribution
  #control_threshold = the position of the control threshold on the standard normal distribution
  #Zu = height of the standard normal distribution at the case prevalence
  #Zl = height of the standard normal distribution at the control prevalence
  #C = adjustment factor
  case_threshold = qnorm(1-Ku)

  control_threshold = qnorm(Kl)

  Zu = dnorm(case_threshold)

  Zl = dnorm(control_threshold)

  C = p*(1-p)*(((Zu/Ku)+(Zl/Kl))^2)

  h2l = h2o/C
}