Source code for fermi_stacking.preprocessing.Preprocess

############################################################
# 
# Written by Chris Karwin; April 2022; Clemson University.
# 
# Significant restructuring by Chris Karwin Feb 2024; 
# NASA Goddard Space Flight Center. 
#
# Based on original code from Marco Ajello, Vaidehi Paliya, and Abhishek Desai.
#
# Purpose: Main script for Fermi-LAT stacking analysis.
#
###########################################################

########################################
# Imports
from time import sleep
from random import randint
import resource
import random,shutil,yaml
import os,sys
from math import *
import numpy as np
import matplotlib 
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from fermipy.gtanalysis import GTAnalysis
import gc
import pyLikelihood 
from BinnedAnalysis import *
from astropy.io import fits
import pandas as pd
import math
from scipy.stats import norm
import matplotlib.mlab as mlab
from scipy.ndimage.filters import gaussian_filter
from astropy.convolution import convolve, Gaussian2DKernel
from scipy import stats, interpolate, optimize
from matplotlib import ticker, cm
from scipy.stats.contingency import margins
import fermi_stacking.pop_studies.PopStudies as PS
from IntegralUpperLimit import calc_int
from UpperLimits import UpperLimits
from SummedLikelihood import *
from astropy.io import fits
#######################################

# Superclass:
[docs] class StackingAnalysis: """Superclass for conducting Fermi-LAT stacking. Parameters ---------- sample_file : str Full path to sample file. file_type : str Type for the sample file. Must be either 'csv', 'fits', or 'tab'. column_name : str Header of name column in sample file. column_ra : str Header of ra column in sample file. column_dec : str Header of dec column in sample file. run_list : list List of names to run (subsample of full sample). For full sample use "default". psf_low : int Lower PSF iterator for JLA stacking (0-3). Default is 0 for standard analysis and preprocessing. psf_high : int Upper PSF iterator for JLA stacking (1-4). Default is 1 for standard analysis and preprocessing. run_name : str Main name of run. job_type : str "p" for preprocessing or "s" for stacking. ft1 : str Full path to ft1 photon data file. ft2 : str Full path to spacecraft file. galdiff : str Full path to Galactic diffuse model. isodiff : str Full path to isotropic model. ltcube : str, optional Full path to precomputed ltcube (the default is 'None', in which case the ltcube is calculated on the fly). use_scratch : bool If True, will perform analysis in scratch directory. scratch : str Full path to sratch directory. JLA : bool If True will perform joint likelihood analysis, using 4 different PSF classes. If False, a standard analysis will be performed. irfs : str Name of LAT instrument response functions. evclass : int Event class, i.e. source, clean, etc. evtype : int Event type, i.e. back/front conversion, etc. emin : float or int Miminum energy of analysis in MeV. emax : float or int Maximum energy of analysis in MeV. tmin : float or int Minimum time of analysis in mission elapsed time (MET). tmax : float or int Maximum time of analysis in mission elapsed time (MET). zmax : float or int Maximun zenith angle to use in the analysis in degrees. index_min : float Minimum index for stacking scan (absolute value). index_max : float Maximum index for stacking scan (absolute value). flux_min : float Power of min flux for stacking scan. flux_max : float Power of max flux for stacking scan. num_flux_bins : int Number of flux bins to use. alpha_low : float Lower bound of alpha range. alpha_high : float Upper bound of alpha range. alpha_step : float Step size of alpha list beta_low : float Log of Lower bound of beta range, beta_high : float Log of Upper bound of beta range (non-inclusive) beta_step : float step size of beta list calc_sed : bool If True will calculate SED. sed_logEbins : list Log of energy bin edges for SED calculation. delete_4fgl : bool Option to run 4FGL source. If True, must provide "remove_list.csv" file in run directory, with col1=sample_name, col2=4fgl_name, and no header. show_plots : bool If True will show plots. Set to False for submitted batch jobs. Note ---- All inputs are passed with inputs.yaml file. """ def __init__(self,input_yaml): # Get home directory: self.home = os.getcwd() # Load main inputs from yaml file: with open(input_yaml,"r") as file: inputs = yaml.load(file,Loader=yaml.FullLoader) # Main default inputs: self.ft1 = inputs["ft1"] # data file self.ft2 = inputs["ft2"] # spacecraft file self.galdiff = inputs["galdiff"] # Galactic diffuse model self.isodiff = inputs["isodiff"] # isotropic model # ltcube: self.ltcube = inputs["ltcube"] # Scratch directory: self.use_scratch = inputs["use_scratch"] if self.use_scratch == True: self.scratch = inputs["scratch"] if(os.path.isdir(self.scratch)==False): os.system('mkdir %s' %self.scratch) # Perform joint likelihood analysis (True) or standard analysis (False): self.JLA = inputs["JLA"] # Main analysis parameters: self.run_name = inputs["run_name"] self.irfs = inputs["irfs"] self.evclass = inputs["evclass"] self.evtype = inputs["evtype"] self.emin = inputs["emin"] self.emax = inputs["emax"] self.tmin = inputs["tmin"] self.tmax = inputs["tmax"] self.zmax = inputs["zmax"] self.index_min = inputs["index_min"] self.index_max = inputs["index_max"] self.flux_min = inputs["flux_min"] self.flux_max = inputs["flux_max"] self.num_flux_bins = inputs["num_flux_bins"] # Parameters for alpha-beta stack self.alpha_low = inputs["alpha_low"] self.alpha_high = inputs["alpha_high"] self.alpha_step = inputs["alpha_step"] self.beta_low = inputs["beta_low"] self.beta_high = inputs["beta_high"] self.beta_step = inputs["beta_step"] self.alpha_range = np.arange(self.alpha_low,self.alpha_high,self.alpha_step) self.beta_range = np.arange(self.beta_low,self.beta_high,self.beta_step) # Option to run 4FGL sources: # Note: If True, must provide "remove_list.csv" file in run directory, with col1=sample_name, col2=4fgl_name, and no header. self.delete_4fgl = inputs["delete_4fgl"] if self.delete_4fgl == True: df = pd.read_csv("remove_list.csv",names=["col0","col1"]) self.delete_sample_name = np.array(df["col0"].tolist()) self.delete_4fgl_name = np.array(df["col1"].tolist()) # Additional options: self.show_plots = inputs["show_plots"] self.calc_sed = inputs["calc_sed"] self.logEbins = inputs["sed_logEbins"] # Sample data: self.sample_file = inputs["sample_file"] self.file_type = inputs["file_type"] self.column_name = inputs["column_name"] if self.file_type == "fits": hdu = fits.open(self.sample_file) data = hdu[1].data self.sample_name_list = data[self.column_name].strip().tolist() if self.file_type == "csv": df = pd.read_csv(self.sample_file) self.sample_name_list = df[self.column_name].tolist() if self.file_type == "tab": df = pd.read_csv(self.sample_file, delim_whitespace=True) self.sample_name_list = df[self.column_name].tolist() # Science Tools: self.nxpix = inputs["nxpix"] self.nypix = inputs["nypix"] self.xref = inputs["xref"] self.yref = inputs["yref"] self.binsz = inputs["binsz"] self.coordsys = inputs["coordsys"] self.enumbins = inputs["enumbins"] self.proj = inputs["proj"] self.reduced_x = inputs["reduced_x"] self.reduced_y = inputs["reduced_y"] self.path = inputs["path"] self.ROI_RA = inputs["ROI_RA"] self.ROI_DEC = inputs["ROI_DEC"] self.ROI_radius = inputs["ROI_radius"]
[docs] def ang_sep(self,ra0, dec0, ra1, dec1): """Calculate angular distance between two points on the sky. Parameters ---------- ra0 : float Right ascension of first source in degrees. dec0 : float Declination of first source in degrees. ra1 : float Right ascension of second source in degrees. dec1 : float Declination of first source in degrees. Returns ------- float Angular distance between two sources in degrees. """ C = np.pi/180. d0 = C * dec0 d1 = C * dec1 r12 = C * (ra0 - ra1) cd0 = np.cos(d0) sd0 = np.sin(d0) cd1 = np.cos(d1) sd1 = np.sin(d1) cr12 = np.cos(r12) sr12 = np.sin(r12) num = np.sqrt((cd0 * sr12) ** 2 + (cd1 * sd0 - sd1 * cd0 * cr12) ** 2) den = sd0 * sd1 + cd0 * cd1 * cr12 return np.arctan2(num, den) / C
[docs] def run_preprocessing(self,srcname,ra,dec,components=None): """Perform preprocessing of source. Parameters --------- srcname : str Name of source. ra : float Right ascension of source. dec : float Declination of source. components : list of strings, optional Specify components of analysis via a list, where each entry in the list is a line that is to be written in the yaml file. Must include new line command at the end of each line. """ # Make print statement: print() print("Running preprocessing...") print() # Need to specify coordinates as type float ra = float(ra) dec = float(dec) # Define main output directory for source: preprocess_output = os.path.join(self.home,"Preprocessed_Sources") if(os.path.isdir(preprocess_output)==False): os.system('mkdir %s' %preprocess_output) # Remove src output directory if it already exists: src_output_main = os.path.join(preprocess_output,srcname) if(os.path.isdir(src_output_main)==True): shutil.rmtree(src_output_main) # Make src output directory: os.system('mkdir %s' %src_output_main) # Define scratch directory for source: if self.use_scratch == True: preprocess_scratch = os.path.join(self.scratch,"Preprocessing") if(os.path.isdir(preprocess_scratch)==False): os.system('mkdir %s' %preprocess_scratch) src_scratch = os.path.join(preprocess_scratch,srcname) if(os.path.isdir(src_scratch)==True): shutil.rmtree(src_scratch) os.system('mkdir %s' %src_scratch) os.chdir(src_scratch) if self.use_scratch == False: os.chdir(src_output_main) with open('%s.yaml' % srcname, 'w') as yml: yml.write("logging:\n") yml.write(" verbosity : 3\n") yml.write(" chatter : 3\n") yml.write("#--------#\n") yml.write("fileio:\n") yml.write(" outdir : output\n") yml.write(" logfile : %s\n" %srcname) yml.write(" usescratch : False\n") yml.write(" scratchdir : scratch\n") yml.write("#--------#\n") yml.write("data:\n") yml.write(" evfile : '%s'\n" %self.ft1) yml.write(" scfile : '%s'\n" %self.ft2) if self.ltcube != "None": yml.write(" ltcube : '%s'\n" %self.ltcube) yml.write("#--------#\n") yml.write("binning:\n") yml.write(" roiwidth : 10\n") yml.write(" binsz : 0.08\n") yml.write(" binsperdec : 8\n") yml.write("#--------#\n") yml.write("selection:\n") yml.write(" emin : %s\n" %self.emin) yml.write(" emax : %s\n" %self.emax) yml.write(" zmax : %s\n" %self.zmax) yml.write(" target : '%s'\n" %srcname) yml.write(" radius : 15\n") yml.write(" tmin : %s\n" %self.tmin) yml.write(" tmax : %s\n" %self.tmax) yml.write(" evclass : %s\n" %self.evclass) yml.write(" evtype : %s\n" %self.evtype) yml.write(" filter : 'DATA_QUAL>0 && LAT_CONFIG==1'\n") yml.write("#--------#\n") yml.write("gtlike:\n") yml.write(" edisp : True\n") yml.write(" edisp_disable : ['isodiff']\n") yml.write(" irfs : '%s'\n" %self.irfs) yml.write("#--------#\n") yml.write("model:\n") yml.write(" src_radius : 15\n") yml.write(" src_roiwidth : 15\n") yml.write(" galdiff : '%s'\n" %self.galdiff) yml.write(" isodiff : '%s'\n" %self.isodiff) yml.write(" catalogs :\n") yml.write(" - '4FGL-DR3'\n") yml.write(" extdir : '/zfs/astrohe/Software/fermipy_source/lib/python3.9/site-packages/fermipy-1.1.3+2.g21485-py3.9.egg/fermipy/data/catalogs/Extended_12years/'\n") yml.write(" sources :\n") yml.write(" - { 'name' : '%s', 'ra' : %s, 'dec' : %s, 'SpectrumType' : PowerLaw }\n" %(srcname,ra,dec)) yml.write("#--------#\n") yml.write("plotting:\n") yml.write(" format : png\n") yml.write("#--------#\n") yml.write("sed:\n") yml.write(" use_local_index : True\n") # Include components for joint likelihood analysis (JLA): if self.JLA == True and components == None: yml.write("components:\n") yml.write(" - { model: {isodiff: iso_P8R3_SOURCE_V3_PSF0_v1.txt},\n") yml.write(" selection : { evtype : 4 } }\n") yml.write(" - { model: {isodiff: iso_P8R3_SOURCE_V3_PSF1_v1.txt},\n") yml.write(" selection : { evtype : 8 } }\n") yml.write(" - { model: {isodiff: iso_P8R3_SOURCE_V3_PSF2_v1.txt},\n") yml.write(" selection : { evtype : 16 } }\n") yml.write(" - { model: {isodiff: iso_P8R3_SOURCE_V3_PSF3_v1.txt},\n") yml.write(" selection : { evtype : 32 } }\n") # Option to write components from list: # Require user to specify JLA = True when passing components: if self.JLA == False and components != None: print("ERROR: JLA must be True to pass component list.") sys.exit() # Read components from list: if self.JLA == True and components != None: yml.write("components:\n") for each in components: yml.write(each) yml.close() gta = GTAnalysis('%s.yaml' % srcname,logging={'verbosity' : 3}) gta.setup() # if rerunning 4fgl source, first delete source from model: if self.delete_4fgl == True: delete_sample_name = self.delete_sample_name delete_4fgl_name = self.delete_4fgl_name if srcname in delete_sample_name: this_index = srcname == delete_sample_name this_name = delete_4fgl_name[this_index][0] gta.delete_source(this_name) gta.optimize() gta.print_roi() gta.free_source('galdiff') gta.free_source('isodiff') gta.free_sources(minmax_ts=[25,None],pars='norm', distance=5.0) gta.free_sources(minmax_ts=[500,None],distance=7.0) gta.free_source(srcname) gta.fit() gta.write_roi('fit_model_1') gta.delete_source(srcname) model2 = {'Index' : 2.0, 'SpatialModel' : 'PointSource'} finder2 = gta.find_sources(prefix='find2',model=model2,sqrt_ts_threshold=4.0, min_separation=0.5,max_iter=10,sources_per_iter=20, tsmap_fitter='tsmap') gta.write_roi('fit_model_2') names2 = np.array([finder2['sources'][i]['name'] for i in range(len(finder2['sources']))]) ra_ns2 = np.array([finder2['sources'][i]['ra'] for i in range(len(finder2['sources']))]) dec_ns2 = np.array([finder2['sources'][i]['dec'] for i in range(len(finder2['sources']))]) r95_ns2 = np.array([finder2['sources'][i]['pos_r95'] for i in range(len(finder2['sources']))]) dist_sep = '%.2f' %0 namee='' # Check ang separation if more than one source is found close to ra and dec if len(names2)>0: for i in range(len(names2)): sepp2=self.ang_sep(ra_ns2[i],dec_ns2[i],ra,dec) if sepp2 < r95_ns2[i] and sepp2 < 0.2: print(names2[i],sepp2,r95_ns2[i]) dist_sep = '%.2f' %sepp2 namee = names2[i] if namee: # Write name of replaced source: f = open("output/replaced_source_name.txt","w") f.write(str([srcname,namee])) f.close() srcname=namee else: gta.add_source(srcname,{ 'ra' : ra, 'dec' : dec, 'SpectrumType' : 'PowerLaw', 'Index' : 2.0, 'Scale' : 1000, 'Prefactor' : 1e-11, 'SpatialModel' : 'PointSource' }) gta.optimize() gta.print_roi() gta.free_source('galdiff') gta.free_sources(minmax_ts=[500,None],distance=7.0) gta.free_source(srcname) gta.fit() gta.write_roi('fit_model_3') # Calculate SED: if self.calc_sed == True: gta.sed(srcname,loge_bins=self.logEbins) p = np.load('output/fit_model_3.npy',allow_pickle=True).flat[0] src = p['sources'][srcname] if str(src['ts'])=='nan': # To check non-convergence print('****************************') print('Fit has not converged') print('****************************') gta.free_sources(minmax_ts=[None,100],free=False) gta.free_source(srcname) gta.fit(tol=1e-8) gta.write_roi('fit_model_3') p = np.load('output/fit_model_3.npy',allow_pickle=True).flat[0] src = p['sources'][srcname] else: print('****************************') print('Fit has converged') print('****************************') if src['ts'] > 15 and np.fabs(src['param_values'][1]) > 3.0: model4 = {'Index' : 2.8, 'SpatialModel' : 'PointSource'} print('****************************') print('Source still bad TS=%s Index=%s?' %(src['ts'],src['param_values'][1])) print('****************************') gta.delete_source(srcname) mapp=gta.tsmap('fit_no_source_final',model=model4) finder4 = gta.find_sources(prefix='find4',model=model4,sqrt_ts_threshold=4.0, min_separation=1.0,max_iter=10,sources_per_iter=20, tsmap_fitter='tsmap') gta.add_source(srcname,{ 'ra' : ra, 'dec' : dec, 'SpectrumType' : 'PowerLaw', 'Index' : 2.0, 'Scale' : 1000, 'Prefactor' : 1e-11, 'SpatialModel' : 'PointSource' }) gta.free_sources(minmax_ts=[100,None],pars='norm',distance=5.0) gta.free_sources(minmax_ts=[200,None],distance=7.0) gta.free_source(srcname) gta.fit() gta.write_roi('fit_model_3') p = np.load('output/fit_model_3.npy',allow_pickle=True).flat[0] src = p['sources'][srcname] TS = '%.2f' %src['ts'] Flux = '%.2e' %src['eflux'] Flux_err = '%.2e' %src['eflux_err'] Flux_UL = '%.2e' %src['eflux_ul95'] Index = '%.2f' %(np.fabs(src['param_values'][1])) Index_err = '%.2f' %src['param_errors'][1] f = open('%s_Param.txt' % srcname, 'w') f.write(str(srcname) + "\t" + str(dist_sep) + "\t" + str(Flux) + "\t" + str(Flux_err) + "\t" + str(Index) + "\t" + str(Index_err) + "\t" + str(Flux_UL) + "\t" + str(TS) + "\n") f.close() # Calculate likelihood for null hypothesis: if self.JLA == False: iteration_list = [0] if self.JLA == True: iteration_list = [0,1,2,3] for j in iteration_list: srcmap = 'output/srcmap_0%s.fits' %j bexpmap = 'output/bexpmap_0%s.fits' %j xmlfile = 'output/fit_model_3_0%s.xml' %j savexml = 'output/null_likelihood_%s.xml'%j savetxt = 'output/null_likelihood_%s.txt' %j # Check for ltcube: if os.path.isfile(self.ltcube) == False: self.ltcube = 'output/ltcube_0%s.fits' %str(j) if os.path.isfile(self.ltcube) == False: print("ERROR: no ltcube found.") sys.exit() obs = BinnedObs(srcMaps=srcmap,expCube=self.ltcube,binnedExpMap=bexpmap,irfs='%s' %self.irfs) like = BinnedAnalysis(obs,xmlfile,optimizer='Minuit') like.deleteSource(srcname) freeze=like.freeze for k in range(len(like.model.params)): freeze(k) like.model['galdiff'].funcs['Spectrum'].getParam('Prefactor').setFree(True) like.model['galdiff'].funcs['Spectrum'].getParam('Index').setFree(True) like.model['isodiff'].funcs['Spectrum'].getParam('Normalization').setFree(True) likeobj = pyLike.Minuit(like.logLike) thisL = like.fit(verbosity=1,covar=True,optObject=likeobj) #this returns -logL value = like.logLike.value() #this returns logL like.logLike.writeXml(savexml) print("*********") print("-logL: " + str(thisL)) print() f = open(savetxt,'w') f.write(str(value)) f.close() if self.use_scratch == True: shutil.copytree(src_scratch,src_output_main,dirs_exist_ok=True) os.chdir(self.home) return
[docs] def make_preprocessing_summary(self): """Makes sumarray of preprocessing.""" # Construct empty dataframe: df_full = pd.DataFrame(data=None, columns=["name","dist_sep","flux","flux_err","index","index_err","flux_ul","TS"]) # Fill dataframe: missing_list = [] for each in self.sample_name_list: srcname = each # Check for updated name: indir = os.path.join(self.home,"Preprocessed_Sources",srcname,"output") new_name_file = os.path.join(indir,"replaced_source_name.txt") replace_name = False if os.path.exists(new_name_file) == True: f = open(new_name_file,"r") this_list = eval(f.read()) newsrcname = this_list[1] replace_name = True src_file = "%s/%s_Param.txt" % (srcname,srcname) wdir = os.path.join(self.home,"Preprocessed_Sources",src_file) if os.path.exists(wdir) == False and replace_name == True: src_file = "%s/%s_Param.txt" % (srcname,newsrcname) wdir = os.path.join(self.home,"Preprocessed_Sources",src_file) if os.path.exists(wdir) == False: missing_list.append(srcname) print() print("Does not exists: " + srcname) print() if os.path.exists(wdir) == True: this_file = wdir df = pd.read_csv(this_file,sep='\s+',names=["name","dist_sep","flux","flux_err","index","index_err","flux_ul","TS"]) df_full = pd.concat([df,df_full]).reset_index(drop=True) df_full = df_full.sort_values(by=["TS"],ascending=False).reset_index(drop=True) print() print(df_full) print() # Write dataframe to text file: f = open("Preprocessed_Sources/preprocessing_summary_LLAGN.txt","w") f.write("\n") f.write("*****************\n") f.write("preprocessing summary:\n\n") f.write(df_full.to_string(index=True)) f.write("\n\n") f.write("****************\n") f.write("Missing sources:") f.write(str(missing_list)) f.close() df_full.to_csv("Preprocessed_Sources/preprocessing_summary_LLAGN.csv",sep ="\t",index=False) return