Skip to content

Commit

Permalink
Merge pull request #200 from hakyimlab/adj_inflation
Browse files Browse the repository at this point in the history
SprediXcan variance control for inflation
  • Loading branch information
hakyim authored Nov 12, 2024
2 parents e7ecd1e + 1a7f5f2 commit 7b5ee74
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 3 deletions.
47 changes: 46 additions & 1 deletion software/M04_zscores.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

import logging
import os

import sqlite3
import pandas as pd
import numpy as np
from scipy.stats import chi2
from timeit import default_timer as timer

from metax import Logging
Expand All @@ -14,6 +17,42 @@
from metax.metaxcan import AssociationCalculation
from metax.metaxcan import Utilities as MetaxcanUtilities

#calibration functions
def get_phi(db):
# get phi from SQLite database
try:
conn = sqlite3.connect(db)
extras = pd.read_sql_query("SELECT gene, phi FROM extra", conn)
extras = extras.dropna(subset=['phi'])
return extras
except (sqlite3.DatabaseError, sqlite3.OperationalError) as e:
# Log any database-related errors
logging.error(f"An error occurred while accessing the database: {e}")
finally:
# Ensure the connection is closed
if conn:
conn.close()

def correct_inf_phi(xcan_df, predict_db, N, h2):

extras = get_phi(predict_db)
xcan_df = xcan_df.merge(extras, on='gene', how='inner')

xcan_df['uncalibrated_pvalue'] = xcan_df['pvalue']
xcan_df['uncalibrated_zscore'] = xcan_df['zscore']

#QC: Replace negative phi values with 0
xcan_df['phi'] = np.where(xcan_df['phi'] < 0, 0, xcan_df['phi'])

# Calibration value
denominator = 1 + (xcan_df['phi'] * N * h2)

# calibrated p-value
xcan_df['pvalue'] = chi2.sf(xcan_df['zscore']**2 / denominator, df=1)
# calibrated z-score
xcan_df['zscore'] = np.sqrt(chi2.ppf(xcan_df['pvalue'],df = 1)) * np.sign(xcan_df['zscore'])
logging.info("The pvalue and zscore have been calibrated successfully")
return xcan_df

def run_metaxcan(args, context):
logging.info("Started metaxcan association")
Expand Down Expand Up @@ -48,6 +87,12 @@ def run_metaxcan(args, context):
additional = AssociationCalculation.dataframe_from_aditional_stats(additional)
results = MetaxcanUtilities.merge_additional_output(results, additional, context, args.remove_ens_version)

if args.gwas_h2 is not None and args.gwas_N is not None:
logging.info("Calibrating pvalue and zscore using phi in model, N and h2")
results = correct_inf_phi(results, args.model_db_path, args.gwas_N, args.gwas_h2)
else:
logging.warning("IMPORTANT: The pvalue and zscore are uncalibrated for inflation")

if args.output_file:
Utilities.ensure_requisite_folders(args.output_file)
results.to_csv(args.output_file, index=False, na_rep="NA")
Expand Down
6 changes: 5 additions & 1 deletion software/SPrediXcan.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def run(args):
parser.add_argument("--model_db_snp_key", help="Specify a key to use as snp_id")
#GWAS betas
parser.add_argument("--gwas_file", help="Load a single GWAS file. (Alternative to providing a gwas_folder and gwas_file_pattern)")

parser.add_argument("--gwas_h2", help="GWAS heritability (h2)", type=float, default=None, required=False)
parser.add_argument("--gwas_N", help="GWAS sample size (N)", type=int, default=None, required=False)
parser.add_argument("--gwas_folder", help="name of folder containing GWAS data. All files in the folder are assumed to belong to a single study.")
parser.add_argument("--gwas_file_pattern", help="Pattern to recognice GWAS files in folders (in case there are extra files and you don't want them selected).")

Expand All @@ -61,6 +62,9 @@ def run(args):

Logging.configureLogging(int(args.verbosity))

if args.gwas_h2 is None or args.gwas_N is None:
logging.warning("Missing --gwas_h2 and --gwas_N are required to calibrate the pvalue and zscore.")

if args.throw:
run(args)
else:
Expand Down
13 changes: 12 additions & 1 deletion software/metax/Logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,21 @@ def configureLogging(level=5, target=sys.stderr):
kludge=True
logger.handlers.clear()

# create console handler and set level to info
RED = "\033[91m"
RESET = "\033[0m"

# A custom logging formatter for warning
class CustomFormatter(logging.Formatter):
def format(self, record):
if record.levelno == logging.WARNING:
record.msg = f"{RED}{record.msg}{RESET}"
return super().format(record)

# create console handler and set level to info
handler = logging.StreamHandler(target)
handler.setLevel(level)
formatter = logging.Formatter("%(levelname)s - %(message)s")
formatter = CustomFormatter("%(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)

Expand Down

0 comments on commit 7b5ee74

Please sign in to comment.