Source code for slrealizer.realize_sl

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import slrealizer.utils.utils as utils
import slrealizer.utils.constants as constants
import pandas as pd
import random
import galsim

[docs]class SLRealizer(object): """ Worker class equipped with functions for making LSST-like "Object" and "Source" tables. These are inherited by child classes which are associated with a specific non-LSST catalog, e.g. the child class OM10Realizer realizes the lenses in the OM10 catalog into mock LSST tables. :class:`SLRealizer` objects are instantiated with an observation history, provided as a `pandas` dataframe. """ def __init__(self, observation, add_moment_noise, add_flux_noise): # Observation history self.observation = observation self.num_obs = len(self.observation) # GalSim drawImage params self.fft_params = galsim.GSParams(maximum_fft_size=10240) self.pixel_scale = 0.1 self.nx, self.ny = 49, 49 # Source table df self.source_table = None # Source table column list self.source_columns = ['MJD', 'ccdVisitId', 'objectId', 'filter', 'psf_fwhm', 'x', 'y', 'apFlux', 'apFluxErr', 'apMag', 'apMagErr', 'trace', 'e1', 'e2', 'e_final', 'phi_final', ] # Controlling randomness self.add_moment_noise = add_moment_noise self.add_flux_noise = add_flux_noise if not self.add_flux_noise and not self.add_moment_noise: self.remove_random = True else: self.remove_random = False self.seed = 123 np.random.seed(self.seed)
[docs] def get_obs_info(self, obsID=None, rownum=None): if obsID is not None and rownum is not None: raise ValueError("Need to define either obsID or rownum, not both.") if obsID is not None: return self.observation.loc[self.observation['obsHistID']==obsID] elif rownum is not None: return self.observation.loc[rownum]
def _get_lens_info(self, lensID): ''' This function will depend on the format of each lens catalog. ''' raise NotImplementedError
[docs] def draw_system(self, lens_info, obs_info, save_path=None): ''' Draws all objects of the given lens system in the given observation conditions, using GalSim. Parameters ========== lens_info: pandas.Dataframe a row of the OM10 DB obs_info: pandas.Dataframe a row of the observation history df save_path: string path in which to save the image Returns ======= galsim_img: galsim.Image A GalSim image object of the aggregate system ''' # Old docstring, for comparison - to be deleted! # For more help writing nice docstrings, check out # https://python-sprints.github.io/pandas/guide/pandas_docstring.html ''' Draws all objects of the given lens system in the given observation conditions, using GalSim. Keyword arguments: lens_info -- a row of the OM10 DB obs_info -- a row of the observation history df save_path -- path in which to save the image Returns: A GalSim object of the aggregate system used to render the image ''' histID, MJD, band, PSF_FWHM, sky_mag = obs_info # Construct a "scene" of image components: # i) Lens galaxy #half_light_radius=lens_info['half_light_radius'],\ scene = galsim.Gaussian(sigma=1.0, flux=lens_info['flux'])\ .shear(e=lens_info['e'], beta=lens_info['beta']) # ii) Lensed quasar images for i in xrange(lens_info['num_objects']): quasar = galsim.Gaussian(flux=lens_info['flux_'+str(i)], sigma=1.e-5)\ .shift(lens_info['xy_'+str(i)]) scene += quasar # Convolve the scene with the PSF: psf = galsim.Gaussian(flux=1.0, fwhm=PSF_FWHM) galsim_obj = galsim.Convolve([scene, psf], gsparams=self.fft_params) galsim_img = galsim_obj.drawImage(nx=self.nx, ny=self.ny, scale=self.pixel_scale, method='no_pixel') if save_path is not None: plt.imshow(galsim_img.array, interpolation='none', aspect='auto') plt.savefig(save_path) plt.close() return galsim_img
[docs] def estimate_parameters(self, galsim_img, method="raw_numerical"): """ Performs shape estimati on on the galsim_img using either GalSim's HSM shape estimator or a native numerical moment calculator under the observation conditions obs_info Keyword arguments: galsim_img -- GalSim's Image object on which parameters will be estimated method -- one of "hsm" (GalSim's HSM shape estimator) or "raw_numerical" (a native numerical moment calculator) [default: "raw"] Returns a dictionary of the lens properties, which can be used to draw the emulated image """ estimated_params = {} if method == "hsm": try: shape_info = galsim_img.FindAdaptiveMom(guess_sig=self.pixel_scale*10.0) except: #print "HSM failed" return None # Calculate the real position from the arbitrary pixel position pixelCenter = galsim.PositionD(x=shape_info.moments_centroid.x, y=shape_info.moments_centroid.y) estimated_params['x'], estimated_params['y'] = utils.pixel_to_physical(shape_info.moments_centroid.x, self.nx, self.pixel_scale),\ utils.pixel_to_physical(shape_info.moments_centroid.y, self.ny, self.pixel_scale) estimated_params['apFlux'] = float(np.sum(galsim_img.array)) if self.DEBUG: estimated_params['hlr'] = galsim_img.calculateHLR(center=pixelCenter) estimated_params['det'] = (shape_info.moments_sigma * self.pixel_scale)**4.0 estimated_params['trace'] = 2.0*galsim_img.calculateMomentRadius(center=pixelCenter, rtype='trace')**2.0 estimated_params['e1'] = shape_info.observed_shape.e1 estimated_params['e2'] = shape_info.observed_shape.e2 estimated_params['e_final'] = shape_info.observed_shape.e estimated_params['phi_final'] = shape_info.observed_shape.beta elif method == "raw_numerical": image_array = galsim_img.array Ix, Iy = utils.get_first_moments_from_image(image_array, self.pixel_scale) Ixx, Ixy, Iyy = utils.get_second_moments_from_image(image_array, self.pixel_scale) estimated_params['apFlux'] = np.sum(image_array) estimated_params['x'] = Ix estimated_params['y'] = Iy trace = Ixx + Iyy estimated_params['e1'] = (Ixx - Iyy)/trace estimated_params['e2'] = 2.0*Ixy/trace estimated_params['trace'] = trace estimated_params['e_final'], estimated_params['phi_final'] = utils.e1e2_to_ephi(estimated_params['e1'], estimated_params['e2']) else: raise ValueError("Please enter a valid method, either 'hsm' or 'raw_numerical'") return estimated_params
[docs] def draw_emulated_system(self, estimated_params): """ Draws the emulated system, i.e. draws the aggregate system from properties HSM derived from the image, which was in turn drawn from the catalog's truth properties. Only runs when DEBUG == True. Returns a GalSim Image object of the emulated system """ if not self.DEBUG: raise ValueError("Only runs in debug mode") system = galsim.Gaussian(flux=estimated_params['apFlux'], half_light_radius=estimated_params['hlr'])\ .shift(float(estimated_params['x']), float(estimated_params['y']))\ .shear(e1=estimated_params['e1'], e2=estimated_params['e2']) emulatedImg = system.drawImage(nx=self.nx, ny=self.ny, scale=self.pixel_scale, method='no_pixel') return emulatedImg
[docs] def create_source_row(self, derived_params, objectId, obs_info): ''' Returns a dictionary of lens system's properties computed the image of one lens system and the observation conditions, which makes up a row of the source table. Keyword arguments: derived_params -- derived lens properties obs_info -- a row of the observation history df Returns A dictionary with properties derived from HSM estimation (See code for which properties) ''' histID, MJD, band, PSF_FWHM, sky_mag = obs_info derived_params['apFluxErr'] = utils.mag_to_flux(sky_mag-22.5)/5.0 # because Fb = 5 \sigma_b if self.add_moment_noise: derived_params['trace'] += utils.add_noise(mean=constants.get_second_moment_err(), stdev=constants.get_second_moment_err_std(), measurement=derived_params['trace']) derived_params['x'] += utils.add_noise(mean=constants.get_first_moment_err(), stdev=constants.get_first_moment_err_std(), measurement=derived_params['x']) derived_params['y'] += utils.add_noise(mean=constants.get_first_moment_err(), stdev=constants.get_first_moment_err_std(), measurement=derived_params['y']) if self.add_flux_noise: derived_params['apFlux'] += utils.add_noise(mean=0.0, stdev=derived_params['apFluxErr']) # flux rms not skyErr derived_params['apMag'] = utils.flux_to_mag(derived_params['apFlux'], from_unit='nMgy') derived_params['apMagErr'] = (2.5/np.log(10.0)) * derived_params['apFluxErr']/derived_params['apFlux'] row = {'MJD': MJD, 'ccdVisitId': histID, 'filter': band, 'x': derived_params['x'], 'y': derived_params['y'], 'apFlux': derived_params['apFlux'], 'apFluxErr': derived_params['apFluxErr'], 'apMag': derived_params['apMag'], 'apMagErr': derived_params['apMagErr'], 'trace': derived_params['trace'], 'e1': derived_params['e1'], 'e2': derived_params['e2'], 'e_final': derived_params['e_final'], 'phi_final': derived_params['phi_final'], 'psf_fwhm': PSF_FWHM, 'objectId': objectId} return row
[docs] def make_source_table_rowbyrow(self, save_file, method="analytical"): import time """ Returns a source table generated from all the lens systems in the catalog under all the observation conditions in the observation history, and saves it as a .csv file. Keyword arguments: save_file -- path into which output source table will be saved method -- how to calculate moments for each row (See method estimate_parameters for details about each option) """ start = time.time() print("Began making the source catalog.") df = pd.DataFrame(columns=self.source_columns) #ellipticity_upper_limit = desc.slrealizer.get_ellipticity_cut() print("Number of systems: %d, number of observations: %d" %(self.num_systems, self.num_obs)) hsm_failed = 0 for j in xrange(self.num_obs): for i in xrange(self.num_systems): row = self.create_source_row(lens_info=self.get_lens_info(rownum=i), obs_info=self.observation.loc[j], method=method) if row == None: hsm_failed += 1 else: df = df.append(row, ignore_index=True) df = df.infer_objects() df = df[self.source_columns] df.set_index('objectId', inplace=True) df.to_csv(save_file, index=True) end = time.time() if method == 'hsm': print("Done making the source table which has %d row(s) in %0.2f hours, after getting %d errors from HSM failure." %(len(df), (end - start)/3600.0, hsm_failed)) else: print("Done making the source table with %s method in %0.2f minutes." %(method, (end - start)/60.0)) # desc.slrealizer.dropbox_upload(dir, 'source_catalog_new.csv') self.sourceTable = df if self.DEBUG: return df
[docs] def make_object_table(self, object_table_path, source_table_path=None, include_std=False): """ Generates the object table from the given source table at source_table_path by averaging the properties for each filter, and saves it as object_table_path. """ import time import gc if object_table_path is None: raise ValueError("Must provide save path of the output object table.") if source_table_path is not None: print("Reading in the source table at %s ..." %source_table_path) obj = pd.read_csv(source_table_path) obj.set_index('objectId', inplace=True) elif self.sourceTable is not None: print("Reading in Pandas Dataframe of most recent source table generated... ") obj = self.sourceTable.copy() else: raise ValueError("Must provide a source table path or generate a source table at least once using this Realizer object.") start = time.time() obj.drop(['ccdVisitId', 'psf_fwhm'], axis=1, inplace=True) # Define (filter-nonspecific) properties to go in object table columns keepCols = list(obj.columns.values) keepCols.remove('filter') # Pivot the filter values to the column axis obj = obj.pivot_table(values=keepCols, index=[obj.index, 'MJD'], columns='filter', aggfunc='first') # Collapse multi-indexed column using filter_property formatting obj.columns = obj.columns.map('{0[1]}_{0[0]}'.format) gc.collect() #if self.DEBUG: print(obj.columns.values) # Take mean, optional std of properties across observed times for each object obj.reset_index(inplace=True) obj.drop('MJD', axis=1, inplace=True) obj = obj.groupby('objectId', sort=False) means = obj.mean() if include_std: stds = obj.std() obj = means.join(stds, lsuffix='', rsuffix='-std') else: obj = means gc.collect() # Drop examples with missing values obj.dropna(how='any', inplace=True) # Get x, y values relative to the r-band for b in 'ugriz': obj[b + '_' + 'x'] = obj[b + '_' + 'x'] - obj['r_x'] obj[b + '_' + 'y'] = obj[b + '_' + 'y'] - obj['r_y'] end = time.time() # Save as csv file obj.to_csv(object_table_path, index=False) print("Done making the object table in %0.2f seconds." %(end-start))
#if self.DEBUG: #print("Object table columns: ", obj.columns) #desc.slrealizer.dropbox_upload(save_dir, 'object_catalog_new.csv') #this uploads to the desc account
[docs] def include_quasar_variability(self, save_output=False, input_source_path=None, output_source_path=None): """ Takes a source table and adds the intrinsic variability of the quasar images using the generative model introduced in MacLeod et al (2010) Keyword arguments: save_output -- whether to save the output to disk [default: False] input_source_path -- path of input source table to be altered [default: None] output_source_path -- path of output source table containing time variability [default: None] """ import gc import time start = time.time() if input_source_path is None: try: print("Reading in the most recent source table...") src = self.source_table except ValueError: print("Realizer has not generated a source table yet.") else: try: print("Reading in the source table at %s" %input_source_path) src = pd.read_csv(input_source_path) except ValueError: print("Please input a valid path to the source table.") if self.DEBUG: NUM_TIMES = src['MJD'].nunique() NUM_OBJECTS = src['objectId'].nunique() print(NUM_TIMES, NUM_OBJECTS) for q in range(4): magnitude_type = 'q_mag_' + str(q) # Filter-specific dictionary of apMag values to replace src apMag apMag_filter = {} for b in 'ugriz': ######################### # Adding useful columns # ######################### # Filter-specific version of src with only the columns we need src_timevar = src.query('filter == @b')[['filter', 'objectId', 'MJD', magnitude_type, ]].copy() # Total number of time steps in this filter NUM_TIMES_FILTER = src_timevar['MJD'].nunique() # Set MJD relative to zero src_timevar['MJD_diff'] = src_timevar['MJD'] - np.min(src_timevar['MJD'].values) # Map ccdVisitId to integers starting from 0 sorted_obs_id = np.sort(src_timevar['MJD'].unique()) time_map = dict(zip(sorted_obs_id, range(NUM_TIMES_FILTER))) src_timevar['time_index'] = src_timevar['MJD'].map(time_map).astype(int) # Add a column of time elapsed since last observation, d_time src_timevar.sort_values(['objectId', 'filter', 'MJD', 'MJD_diff'], axis=0, inplace=True) src_timevar['d_time'] = src_timevar['MJD_diff'] - src_timevar['MJD_diff'].shift(+1) src_timevar['d_time'].fillna(0.0, inplace=True) src_timevar['d_time'] = np.clip(src_timevar['d_time'], a_min=0.0, a_max=None) gc.collect() ############################## # Computing time variability # ############################## # Parameters of the generative model (hand-picked) MU = 0.0 TAU = 20.0 #np.power(10.0, 2.4) # days S_INF = 0.14 # mag # Add a column to store the variability and initialize to zero src_timevar['intrinsic_mag'] = 0.0 # Pivot to get time sequence to be horizontal src_timepivot = src_timevar.pivot_table(index=['filter', 'objectId'], columns=['time_index',], values=['d_time', magnitude_type, 'intrinsic_mag', ]) # Update 'intrinsic_mag' column for t in range(1, NUM_TIMES_FILTER): src_timepivot.loc[b, :]['intrinsic_mag'][t] = np.random.normal(loc=src_timepivot['intrinsic_mag'][t - 1].values*np.exp(-src_timepivot['d_time'][t].values/TAU) + MU*(1.0 - np.exp(-src_timepivot['d_time'][t].values/TAU)), scale=0.5*S_INF**2.0*(1.0 - np.exp(-2.0*src_timepivot['d_time'][t].values/TAU))) ######################## # Final column sorting # ######################## # Add computed variability to magnitude_type column src_timepivot[magnitude_type] = src_timepivot[magnitude_type] + src_timepivot['intrinsic_mag'] # Pivot the time sequence back, to be vertical src_timepivot = src_timepivot.stack(dropna=False) # Drop columns we'll no longer use src_timepivot.drop(['intrinsic_mag', 'd_time'], axis=1, inplace=True) # As precaution, sort before storing updated apMag src_timepivot.reset_index(inplace=True) src_timepivot.sort_values(by=['filter', 'objectId', 'time_index'], inplace=True) # Store filter-specific apMag values apMag_filter[b] = src_timepivot[magnitude_type].values gc.collect() # Update apMag in source table, filter by filter for b in 'ugriz': src.loc[src['filter']==b, magnitude_type] = apMag_filter[b] gc.collect() if self.DEBUG: print("Result of adding time variability: ") print("Number of observations: ", src['MJD'].nunique()) print("Number of objects: ", src['objectId'].nunique()) src.set_index('objectId', inplace=True) end = time.time() print("Done adding time variability with %d row(s) in %0.2f seconds using vectorization." %(len(src), end-start)) if save_output: print("Saving the new source table with time variability at %s" %output_source_path) src.to_csv(output_source_path) self.source_table = src
def _include_moments(self, inplace=True, input_dict=None): """ Adds columns of first and second moments (analytically computed) to self.source_table (if inplace=True) or some other dictionary Keyword arguments: inplace -- whether to alter the self.source_table. If input_dict is None, will be assumed to be True. [default: True] input_dict -- object of type dict that will be used to compute the moments. If inplace is True, will be ignored. [default: None] Returns (only if inplace is False): a new dictionary that is a version of the original input_dict with moment-related columns added """ return_dict = False if inplace or input_dict is None: src = self.source_table elif input_dict is not None: is_dictionary = (type(input_dict) is dict) valid_columns = ['lens_flux', 'apFlux', 'e', 'beta', 'psf_fwhm'] valid_columns += [value + '_%d' %idx for value in ['q_flux', 'XIMG', 'YIMG'] for idx in range(4)] has_valid_columns = all(col in input_dict for col in valid_columns) if is_dictionary and has_valid_columns: src = input_dict return_dict = True else: raise ValueError("Keyword input_dict must be a dictionary and contain all the required keys.") # Calculate flux ratios (for weighted moments) src['lensFluxRatio'] = src['lens_flux']/src['apFlux'] for q in range(4): src['qFluxRatio_' + str(q)] = src['q_flux_' + str(q)]/src['apFlux'] if not return_dict: src.drop(['lens_flux'] + ['q_flux_' + str(q) for q in range(4)], axis=1, inplace=True) ################# # FIRST MOMENTS # ################# src['x'], src['y'] = 0.0, 0.0 for q in range(4): src['x'] += src['qFluxRatio_' + str(q)]*src['XIMG_' + str(q)] src['y'] += src['qFluxRatio_' + str(q)]*src['YIMG_' + str(q)] if self.add_moment_noise: src['x'] += utils.add_noise(mean=get_first_moment_err(), stdev=get_first_moment_err_std(), shape=src['x'].shape, measurement=src['x']) src['y'] += utils.add_noise(mean=get_first_moment_err(), stdev=get_first_moment_err_std(), shape=src['y'].shape, measurement=src['y']) ################## # SECOND MOMENTS # ################## # Read in lens shape parameters src['minor_to_major'] = np.power((1.0 - src['e'])/(1.0 + src['e']), 0.5) # q parameter in galsim.shear src['beta'] = np.radians(src['beta']) # beta parameter in galsim.shear # Arbitrarily set REFF_T to 1.0 #src['sigmasq_lens'] = np.power(utils.hlr_to_sigma(src['REFF_T']), 2.0) src['sigmasq_lens'] = np.power(utils.hlr_to_sigma(1.0), 2.0) # Initialize with lens contributions src['lam1'] = src['sigmasq_lens']/src['minor_to_major'] src['lam2'] = src['sigmasq_lens']*src['minor_to_major'] src['lens_Ixx'] = src['lam1']*np.power(np.cos(src['beta']), 2.0) + src['lam2']*np.power(np.sin(src['beta']), 2.0) src['lens_Iyy'] = src['lam1']*np.power(np.sin(src['beta']), 2.0) + src['lam2']*np.power(np.cos(src['beta']), 2.0) src['lens_Ixy'] = (src['lam1'] - src['lam2'])*np.cos(src['beta'])*np.sin(src['beta']) src['Ixx'] = src['lensFluxRatio']*(src['lens_Ixx'] + np.power(src['x'], 2.0)) src['Iyy'] = src['lensFluxRatio']*(src['lens_Iyy'] + np.power(src['y'], 2.0)) src['Ixy'] = src['lensFluxRatio']*(src['lens_Ixy'] - src['x']*src['y']) if not return_dict: src.drop(['lam1', 'lam2', 'lens_Ixx', 'lens_Iyy', 'lens_Ixy'], axis=1, inplace=True) # Add quasar contributions for q in range(4): src['Ixx'] += src['qFluxRatio_' + str(q)]*(np.power(src['XIMG_' + str(q)] - src['x'], 2.0)) src['Iyy'] += src['qFluxRatio_' + str(q)]*(np.power(src['YIMG_' + str(q)] - src['y'], 2.0)) src['Ixy'] += src['qFluxRatio_' + str(q)]*(src['XIMG_' + str(q)] - src['x'])\ *(src['YIMG_' + str(q)] - src['y']) # Add PSF src['sigmasq_psf'] = np.power(utils.fwhm_to_sigma(src['psf_fwhm']), 2.0) src['Ixx'] += src['sigmasq_psf'] src['Iyy'] += src['sigmasq_psf'] # Get trace and ellipticities src['trace'] = src['Ixx'] + src['Iyy'] if self.add_moment_noise: src['trace'] += utils.add_noise(mean=get_second_moment_err(), stdev=get_second_moment_err_std(), shape=src['trace'].shape, measurement=src['trace']) src['e1'] = (src['Ixx'] - src['Iyy'])/src['trace'] src['e2'] = 2.0*src['Ixy']/src['trace'] src['e_final'], src['phi_final'] = utils.e1e2_to_ephi(src['e1'], src['e2']) if inplace: self.source_table = src if return_dict: return src
[docs] def compare_truth_vs_emulated(self, lensID=None, rownum=None, save_dir=None): """ Draws two images of the lens system with the given rownum under randomly chosen observation conditions, one from the catalog info (truth image) and another from HSM's estimation of the truth image Keyword arguments: - lensID: an integer specifying the ID of the lens system in the OM10 catalog Returns: A tuple of - the truth image drawn from the catalog info - the emulated image drawn from HSM's derived info of aggregate system """ # Only works in debug mode. self.debug = True # Randomly select observation ID obs_rownum = random.randint(0, self.num_obs) # Render the truth image truth_img = self.draw_system(self.get_lens_info(lensID=lensID, rownum=rownum),\ self.get_observation(rownum=obs_rownum),\ save_dir) # Render the emulated image under observation conditions indexed by obs_rownum fig, axes = plt.subplots(2, figsize=(5, 10)) axes[0].imshow(truth_img.array, interpolation='none', aspect='auto') axes[0].set_title("TRUE MODEL IMAGE") estimated_params = self.estimate_hsm(image=truth_img, observation=self.observation.loc[obs_rownum]) emulated_img = self.draw_emulated_system(estimated_params) axes[1].imshow(img.array, interpolation='none', aspect='auto') axes[1].set_title("EMULATED IMAGE") if save_dir is not None: plt.savefig(save_dir + 'truth_vs_emulated.png') return truth_img, emulated_img