Skip to content

Commit

Permalink
initial inflation adjustment feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Fnyasimi authored Oct 8, 2024
1 parent 4b3ad95 commit 5c478f9
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
47 changes: 46 additions & 1 deletion software/M04_zscores.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

import logging
import os

import sqlite3
import pandas as pd
import numpy as np
from scipy.stats import chi2
from timeit import default_timer as timer

from metax import Logging
Expand All @@ -14,6 +17,42 @@
from metax.metaxcan import AssociationCalculation
from metax.metaxcan import Utilities as MetaxcanUtilities

#calibration functions
def get_phi(db):
# get phi from SQLite database
try:
conn = sqlite3.connect(db)
extras = pd.read_sql_query("SELECT gene, phi FROM extra", conn)
extras = extras.dropna(subset=['phi'])
return extras
except (sqlite3.DatabaseError, sqlite3.OperationalError) as e:
# Log any database-related errors
logging.error(f"An error occurred while accessing the database: {e}")
finally:
# Ensure the connection is closed
if conn:
conn.close()

def correct_inf_phi(xcan_df, predict_db, N, h2):

extras = get_phi(predict_db)
xcan_df = xcan_df.merge(extras, on='gene', how='inner')

xcan_df['uncalibrated_pvalue'] = xcan_df['pvalue']
xcan_df['uncalibrated_zscore'] = xcan_df['zscore']

#QC: Replace negative phi values with 0
xcan_df['phi'] = np.where(xcan_df['phi'] < 0, 0, xcan_df['phi'])

# Calibration value
denominator = 1 + (xcan_df['phi'] * N * h2)

# calibrated p-value
xcan_df['pvalue'] = chi2.sf(xcan_df['zscore']**2 / denominator, df=1)
# calibrated z-score
xcan_df['zscore'] = np.sqrt(chi2.ppf(xcan_df['pvalue'],df = 1)) * np.sign(xcan_df['zscore'])
logging.info("The pvalue and zscore have been calibrated successfully")
return xcan_df

def run_metaxcan(args, context):
logging.info("Started metaxcan association")
Expand Down Expand Up @@ -48,6 +87,12 @@ def run_metaxcan(args, context):
additional = AssociationCalculation.dataframe_from_aditional_stats(additional)
results = MetaxcanUtilities.merge_additional_output(results, additional, context, args.remove_ens_version)

if args.gwas_h2 is not None and args.gwas_N is not None:
logging.info("Calibrating pvalue and zscore using phi in model, N and h2")
results = correct_inf_phi(results, args.model_db_path, args.gwas_N, args.gwas_h2)
else:
logging.warning("IMPORTANT: The pvalue and zscore are uncalibrated for inflation")

if args.output_file:
Utilities.ensure_requisite_folders(args.output_file)
results.to_csv(args.output_file, index=False, na_rep="NA")
Expand Down
6 changes: 5 additions & 1 deletion software/SPrediXcan.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def run(args):
parser.add_argument("--model_db_snp_key", help="Specify a key to use as snp_id")
#GWAS betas
parser.add_argument("--gwas_file", help="Load a single GWAS file. (Alternative to providing a gwas_folder and gwas_file_pattern)")

parser.add_argument("--gwas_h2", help="GWAS heritability (h2)", type=float, default=None, required=False)
parser.add_argument("--gwas_N", help="GWAS sample size (N)", type=int, default=None, required=False)
parser.add_argument("--gwas_folder", help="name of folder containing GWAS data. All files in the folder are assumed to belong to a single study.")
parser.add_argument("--gwas_file_pattern", help="Pattern to recognice GWAS files in folders (in case there are extra files and you don't want them selected).")

Expand All @@ -61,6 +62,9 @@ def run(args):

Logging.configureLogging(int(args.verbosity))

if args.gwas_h2 is None or args.gwas_N is None:
logging.warning("Missing --gwas_h2 and --gwas_N are required to calibrate the pvalue and zscore.")

if args.throw:
run(args)
else:
Expand Down

0 comments on commit 5c478f9

Please sign in to comment.