Source code for opsimsummary.opsim_out

"""
This module deals with representing the data in an OpSim output (to the extent
we will care about it). A description of the OpSim output can be found at
(opsim description)[https://www.lsst.org/scientists/simulations/opsim/summary-table-column-descriptions-v335]

In brief, we will use two tables from the OpSim output:
    - A Summary Table which has the desired information
    - A Proposal Table which contains a dictionary to interpreting the `propID` column
        of Summary.
"""
from __future__ import division, print_function, unicode_literals
__all__ = ['OpSimOutput']
import sys
import traceback
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import collections


[docs]class OpSimOutput(object): """ Class representing a subset of the output of the OpSim including information from the Summary and Proposal Tables with the subset taken over the proposals Attributes ---------- opsimversion: {'lsstv3'|'sstf'|'lsstv4'} version of OpSim corresponding to the output format. summary: `pd.DataFrame` selected records from the Summary Table of pointings propIDDict: dict dictionary with strings as keys and integers used in the Summary Table to denote these proposals proposalTable: `pd.DataFrame` the propsal table in the output subset: string subset of proposals included in this class propIDs : list of integers integers corresponding to the subset selected through proposals zeroDDFDithers : bool, defaults to True if True, set dithers in DDF to 0, by setting ditheredRA, ditheredDec to fieldRA, fieldDec. This should only be used for opsimversion='lsstv3'. For opsimversion='sstf' or 'lsstv4', this will be set to False despite inputs, since this is already done, and cannot be done with the inputs. """ def __init__(self, summary, propIDDict=None, proposalTable=None, subset=None, propIDs=None, zeroDDFDithers=True, opsimversion='lsstv3'): """ Constructor for the `OpSimOutput` class Parameters ---------- summary: `pd.DataFrame` a table describing a summary of observations which should have the following minimal columns (`ditheredRA`, `ditheredDec`, `expMJD`) as columns holding information on the pointing RA and dec of the telescope and the mjd of the observation and `obsHistID` a unique index for each entry the name of the index. Other columns which are essential in the context of use in simulations are (`FWHMeff`, `filtSkyBrightness`, `fiveSigmaDepth`, `propID`) describing the FWHM seeing, sky brightness in the filter of observation, and the five sigma depth of the observation and an integer index for different programs that an observation. proposalTable: `pd.DataFrame`, defaults to `None` modified table of proposals from the OpSim output propIDDict: dict, defaults to None dictionary with keys giving integers that identify proposals and values in the form of strings describing the program subset: {'wfd'|'ddf'|'combined'| '_all' | 'unique_all'}, defaults to `combined` a string that defines a subset of observations to be chosen based on the choice of proposals. `wfd` is the LSST `WFD`, `ddf` is the LSST `ddf`, `combined` is the combination of `WFD` and `DDF` while, `unique_all` keeps all of the unique observations. `_all` is the entire table of of observations from OpSim outputs. propIDs: sequence of integers zeroDDFDithers: bool, defaults to `True` If `True` changes the dithers in DDF fields to zero by setting the columns `ditheredRA`, `ditheredDec` the same as `fieldRA` and `fieldDec` opsimversion: {'lsstv3'|'sstf'|'lsstv4'} , defaults to 'lsstv3' a string to denote the version of OpSim outputs. `lsstv3` refers to outputs from OpSim version 3 (eg. enigma_1189, minion_1016). `sstf` refers to outputs from created at the start of the Observing Strategy Task Force available at the following [website](http://altsched.rothchild.me:8080), and `lsstv4` refers to outputs available from OpSim version 4. """ self.opsimversion = opsimversion self.allowed_subsets = self.get_allowed_subsets() self.subset = subset self.propIDDict = propIDDict self.proposalTable = proposalTable if opsimversion in ('sstf', 'lsstv4'): zeroDDFDithers = False ss = 'Warning: Input is zeroDDFDithers = True. But opsimversion is' ss += '{} for which this must be False. Setting to False and proceeding\n'.format(opsimversion) print(ss) # Check `summary` does not have `nan`s if not self.validate_pointings(summary, opsimVars=None): print('summary table has nans, exiting\n') sys.exit(1) if zeroDDFDithers: ddfPropID = self.propIDDict['ddf'] ddfidx = summary.query('propID == @ddfPropID').index summary.loc[ddfidx, 'ditheredRA'] = summary.loc[ddfidx, 'fieldRA'] summary.loc[ddfidx, 'ditheredDec'] = summary.loc[ddfidx, 'fieldDec'] self._opsimvars = None # Have a clear unambiguous ra, dec in radians following LSST convention # These are the columns `_ra`, `_dec` which should have the dithered # values in radians # If degrees do transformation to radians if self.opsimVars['angleUnit'] == 'degrees': summary.loc[:, '_ra'] = np.radians(summary['ditheredRA']) summary.loc[:, '_dec'] = np.radians(summary['ditheredDec']) print('Changing units for {0} from {1}'.format(opsimversion, 'degrees')) # If already in radians, make a copy elif self.opsimVars['angleUnit'] == 'radians': print('Keeping units for {0} from {1}'.format(opsimversion, 'radians')) summary.loc[:, '_ra'] = summary['ditheredRA'] summary.loc[:, '_dec'] = summary['ditheredDec'] else: raise ValueError('angle unit of ra and dec Columns not recognized\n') # Validate the format of the pointings to expectations given the version # of the OpSim output if self.validate_pointings(summary, self.opsimVars): self.summary = summary else: raise AssertionError('Pointings are not in required format') # Set the attribute `_propID` self._propID = propIDs
[docs] @staticmethod def get_opsimVariablesForVersion(opsimversion='lsstv3'): """Static method to returns a dictionary for the opsim version where the keys are names of quantities used in this codebase, and the values are the names of quantities in the OpSim output database Parameters ---------- opsimversion: string, defaults to `lsstv3` can be {`lsstv3`|`lsstv4`|`sstf`} Returns ------- dictionary: key, value pairs where keys are variable names used in `OpSimSummary` and values are variable names used in the OpSim database with the given version. Examples -------- >>> from opsimsummary import OpSimOutput >>> OpSimOutput.get_opsimVariablesForVersion('lsstv4') {'summaryTableName': 'SummaryAllProps', 'obsHistID': 'observationId', 'propName': 'propName', 'propIDName': 'propId', 'propIDNameInSummary': 'proposalId', 'ops_wfdname': 'WideFastDeep', 'ops_ddfname': 'DeepDrillingCosmology1', 'expMJD': 'observationStartMJD', 'FWHMeff': 'seeingFwhmEff', 'pointingRA': 'ditheredRA', 'pointingDec': 'ditheredDec', 'filtSkyBrightness': 'skyBrightness', 'angleUnit': 'degrees'} """ if opsimversion == 'lsstv3': x = dict(summaryTableName='Summary', obsHistID='obsHistID', propName='propConf', propIDName='propID', propIDNameInSummary='propID', ops_wfdname='conf/survey/Universal-18-0824B.conf', ops_ddfname='conf/survey/DDcosmology1.conf', expMJD='expMJD', FWHMeff='FWHMeff', pointingRA='ditheredRA', pointingDec='ditheredDec', filtSkyBrightness='filtSkyBrightness', angleUnit='radians') elif opsimversion == 'sstf': x = dict(summaryTableName='SummaryAllProps', obsHistID='observationId', propName='propName', propIDName='propId', propIDNameInSummary='proposalId', ops_wfdname='WideFastDeep', ops_ddfname='Deep Drilling', expMJD='observationStartMJD', FWHMeff='seeingFwhmEff', pointingRA='fieldRA', pointingDec='fieldDec', filtSkyBrightness='skyBrightness', angleUnit='degrees') elif opsimversion == 'lsstv4': x = dict(summaryTableName='SummaryAllProps', obsHistID='observationId', propName='propName', propIDName='propId', propIDNameInSummary='proposalId', ops_wfdname='WideFastDeep', ops_ddfname='DeepDrillingCosmology1', expMJD='observationStartMJD', FWHMeff='seeingFwhmEff', pointingRA='ditheredRA', pointingDec='ditheredDec', filtSkyBrightness='skyBrightness', angleUnit='degrees') else: raise NotImplementedError('`get_propIDDict` is not implemented for this `opsimversion`') return x
@property def opsimVars(self): """Dictionary where the keys are names of quantities used in `OpSimSummary`, and the values are the names of quantities in the OpSim output database used. """ if self._opsimvars is None: self._opsimvars = self.get_opsimVariablesForVersion(self.opsimversion) return self._opsimvars
[docs] @staticmethod def validate_pointings(summary, opsimVars=None, check_anycols=False): """ Validate a dataframe of pointings for further use. If `opsimVars` is `None` then only check that there are no `no.nan`s, else check that the table of pointings has the necessary format and units by checking that required columns indicated by `opsimVars` exist and have sensible values. Parameters ---------- summary: `pd.DataFrame` of pointings opsimVars: dictionary, defaults to `None` should be dictionary for each supported OpSim version availble from `OpSimOutput.get_opsimVariablesForVersion(opsimversion)` check_anycols: Bool, defaults to False if True, this will check all columns rather than fiveSigmaDepth for nans Returns ------- Bool (True|False) But exits on False. """ try: if opsimVars is not None: assert '_ra' in summary.columns assert '_dec' in summary.columns assert np.fabs(summary['_ra'].max()) <= 2.0 * np.pi assert np.fabs(summary['_dec'].min()) >= -1.0 * np.pi if check_anycols: assert summary.isnull().values.any() == False else: # We only check the fiveSigmaDepth column assert 'fiveSigmaDepth' in summary.columns.values assert all(summary['fiveSigmaDepth'].isnull().values == False) except AssertionError: _, _, tb = sys.exc_info() traceback.print_tb(tb) # Fixed format tb_info = traceback.extract_tb(tb) filename, line, func, text = tb_info[-1] print ('pointings are not in required format') print(summary.head()) print('An error occurred on line {} in statement {}'.format(line, text)) sys.exit(1) return True
[docs] @staticmethod def get_dithercolumns(summary, opsimversion, method='default', ddfId=5, rng=np.random.RandomState(1), wfd_ditherscale=1.75, ddf_ditherscale=0.2): """ Use a `method` prescription to obtain dithered values of pointings starting from a fixed pointing. Parameters ---------- summary : `pd.DataFrame` indexed by `obsHistID` and having the columns `fieldRA`, `fieldDec` opsimversion : string, defaults to `lsstv3` version of the OpSim producing the database. method : string {'default|FlatSky'} only implemented rng : randomState kwargs : """ # start off with a fieldRA, fieldDec, propID df = summary[['fieldRA', 'fieldDec', 'propID']] OpSimVars = OpSimOutput.get_opsimVariablesForVersion(opsimversion) angleUnit = OpSimVars['angleUnit'] if method == 'default': # Simply write the fieldRA to ditheredRA df.rename(columns=dict(fieldRA='ditheredRA', fieldDec='ditheredDec'), inplace=True) elif method == 'FlatSky': # Choose chip size, random directional DDF dithers # Choose focal plane radius size, random directional dithers elsewhere # Very roughly these scales are 1.75 deg, and 0.2 deg df.loc[:, 'factor'] = wfd_ditherscale df.query('propID == @ddfId').loc[:, 'factor'] = ddf_ditherscale if angleUnit == 'degrees': pass elif angleUnit == 'radians': df.loc[:, 'factor'] = df.factor.apply(np.radians) else: raise NotImplementedError("Don't recognize angleUnit") # Random directions df.loc[:, 'random_angs'] = rng.uniform(high=2.0*np.pi, size=len(df)) # Use the flat sky approximation df.loc[:, 'ditheredRA'] = df['fieldRA'] + \ df['factor'] * np.cos(df['random_angs']) df.loc[:, 'ditheredDec'] = df['fieldDec'] + \ df['factor'] * np.sin(df['random_angs']) else: raise NotImplementedError('method {} has not been implemented yet\n'.format(method)) if angleUnit == 'degrees': assert all(df.ditheredRA.values < 370.0) maxval = 360. elif angleUnit == 'radians': maxval = 2.0 * np.pi assert all(df.ditheredRA.values < maxval + 0.2) mask = df.ditheredRA.values > maxval df.ditheredRA[mask] = df.ditheredRA[mask] - maxval return df[['ditheredRA', 'ditheredDec']]
[docs] @classmethod def fromOpSimDB(cls, dbname, subset='combined', opsimversion='lsstv3', zeroDDFDithers=True, user_propIDs=None, dithercolumns=None, add_dithers=False, tableNames=('Summary', 'Proposal'), filterNull=False, **kwargs): """ Convenience method to instantitate the `OpSimOutput` class directly from an `OpSim` output rather than providing the elementary inputs in the class constructor. Parameters ---------- dbname : string absolute path to database subset : string, optional, defaults to 'combined' one of {'_all', 'unique_all', 'wfd', 'ddf', 'combined'} determines a sequence of propIDs for selecting observations appropriate for the OpSim database in use opsimversion : {'lsstv3'|'sstf'|'lsstv4'} version of OpSim corresponding to the output format. zeroDDFDithers : bool, defaults to True if True, set dithers in DDF to 0, by setting ditheredRA, ditheredDec to fieldRA, fieldDec dithercolumns: `pd.DataFrame`, defaults to `None` a pandas dataframe with the columns `ditheredRA`, `ditheredDec` and index `obsHistID`, when not `None` this is used to create `opsimVars[pointingRA]` and `opsimVars[pointingDec]` deleting the these columns if they existed. add_dithers : Bool, defaults to `False` if `True` add dithers by generate ourselves by invoking `cls.get_dithers` and options through `**kwargs`. Even if `False`, becomes `True` if `opsimVars['pointingRA'] is not in the list of `summary[columns]` so that it needs to be created, and dithercolumns is `None`. user_propIDs : sequence of integers, defaults to `None` proposal ID values. If not `None`, overrides the use of subset tableNames : tuple of strings, defaults to ('Summary', 'Proposal') names of tables read from the OpSim database filterNull : Bool, defaults to False if True, the summary table should be filtered to rows that do not contain `NULL` values in the `fiveSigmaDepth` column. kwargs: dict of options relating to changing the methods of adding dithers. keywords are rng of type `np.random.RandomState`, `ddf_ditherscale`, `wfd_ditherscale`, `method`. If not provided, the parameters take default values. """ # Because this is in the class method, I am using the staticmethod # rather than the property, but note that the property is calculated # through this method. So this gives the same thing opsimVars = cls.get_opsimVariablesForVersion(opsimversion) # Set tablenames tableNames=(opsimVars['summaryTableName'], 'Proposal') # Check that subset parameter is legal allowed_subsets = cls.get_allowed_subsets() subset = subset.lower() if subset not in allowed_subsets: raise NotImplementedError('subset {} not implemented'.\ format(subset)) engine = cls._get_sql_engine(dbname) propDict, propIDs, proposals = cls._get_propIDs(tableNames, engine, opsimversion, subset, user_propIDs=user_propIDs) summary = cls._read_summary_table_raw(engine, opsimVars, propIDs, subset) if len(summary) == 0: return cls(propIDDict=propDict, summary=summary, zeroDDFDithers=zeroDDFDithers, proposalTable=proposals, subset=subset, opsimversion=opsimversion) # filter read in summary table print('We have filterNull set to', filterNull) if filterNull: print('With given option, filtering the raw summary table of NaNs') num_orig = len(summary) summary = summary[np.isfinite(summary['fiveSigmaDepth'])] print('This option reduced the number of rows from {0} to {1}'.format(num_orig, len(summary))) print('checking that summary table read in\n') if cls.validate_pointings(summary, opsimVars=None): print('Reading in raw tables successful') # Standardize names of summary table columns replacedict = dict() replacedict[opsimVars['obsHistID']] = 'obsHistID' replacedict[opsimVars['propIDNameInSummary']] = 'propID' replacedict[opsimVars['expMJD']] = 'expMJD' replacedict[opsimVars['FWHMeff']] = 'FWHMeff' replacedict[opsimVars['filtSkyBrightness']] = 'filtSkyBrightness' summary = summary.rename(columns=replacedict) if cls.validate_pointings(summary, opsimVars=None): print('replacing names works') # Drop Duplicates if subset != '_all': # Drop duplicates unless this is to write out the entire OpSim summary = cls.dropDuplicates(summary, propDict, opsimversion) # Set Standard Index summary.set_index('obsHistID', inplace=True) if cls.validate_pointings(summary, opsimVars=None): print('dropping duplicates works') # At this stage the summary table is read in, # and the standard index is set # In `lsstv3` minion like baselines, the pointingRA are `ditheredRA` etc. # In `sstf` versions, the pointing coordinates are `fieldRA` etc. # in `lsstv4`, the pointing coordinates are unsupplied but `ditheredRA` etc. if 'ditheredra' not in list(x.lower() for x in summary.columns): # eg. has to be done in `lsstv4` and `sstf` unless supplied add_dithers = True if add_dithers: if dithercolumns is not None: print('Trying to join input dithercolumns\n') # If provided with dithers in a dataFrame, use them # Check that dithercolumns are available in input assert 'ditheredRA' in dithercolumns.columns assert 'ditheredDec' in dithercolumns.columns assert 'obsHistID' == dithercolumns.index.name # if the column names already exist in the table remove them if 'ditheredra' in list(x.lower() for x in summary.columns): del summary['ditheredRA'] del summary['ditheredDec'] # Assumption : I have the dither columns in a `pd.DataFrame` # with minimal columns `ditheredRA` and `ditheredDec` and # index name `obsHistID` which indexes the visits in the # Summary Table summary = summary.join(dithercolumns) elif add_dithers: print('creating dither columns \n') # No dither column provided ditherdict = dict(method='default', ddfID=propDict['ddf'], ddf_ditherscale=1.75, wfd_ditherscale=0.2, rng=np.random.RandomState(1)) method = 'default' ddfID = propDict['ddf'] ddf_ditherscale = 1.75 wfd_ditherscale = 0.2 rng = np.random.RandomState(1) if kwargs: for key in kwargs: ditherdict[key] = kwargs[key] dithercolumns = cls.get_dithercolumns(summary[['fieldRA', 'fieldDec', 'propID']], opsimversion=opsimversion, method=ditherdict['method'], ddfId=ditherdict['ddfID'], ddf_ditherscale=ditherdict['ddf_ditherscale'], wfd_ditherscale=ditherdict['wfd_ditherscale'], rng=rng) print(dithercolumns.ditheredRA.max()) #print('max ra values are {}.'format(dithercolumns.ditheredRA.max())) if cls.validate_pointings(dithercolumns, opsimVars=None, check_anycols=True): print('dithercolumns good!') # print('max ra values are {}.'format(dithercolumns.ditheredRA.max())) try: summary = summary.join(dithercolumns) print(len(summary), len(dithercolumns)) if cls.validate_pointings(summary, opsimVars=None): print('join good!') except: pass else: raise NotImplementedError('What did you do ?????') else: # let pass without further action pass if cls.validate_pointings(summary, opsimVars=None): print('joining dithers works') return cls(propIDDict=propDict, summary=summary, zeroDDFDithers=zeroDDFDithers, proposalTable=proposals, subset=subset, opsimversion=opsimversion)
@staticmethod def _read_summary_table_raw(engine, opsimVars, propIDs, subset): # Do the actual sql queries or table reads for observations summaryTableName = opsimVars['summaryTableName'] # Note OpSim version 4 has different names for the same variable # in the Proposal Table and Summary Table. propIDNameInSummary = opsimVars['propIDNameInSummary'] if subset in ('_all', 'unique_all'): # In this case read everything (ie. table read) summary = pd.read_sql_table(summaryTableName, con=engine) elif subset in ('ddf', 'wfd', 'combined'): print('Not doing all observations here ') # In this case use sql queries rather than reading the whole table # obtain propIDs in strings for sql queries pidString = ', '.join(list(str(pid) for pid in propIDs)) print(pidString, subset) sql_query = 'SELECT * FROM {0} WHERE {1}'.format(summaryTableName, propIDNameInSummary ) sql_query += ' in ({})'.format(pidString) # If propIDs were passed to the method, this would be used print(sql_query) summary = pd.read_sql_query(sql_query, con=engine) else: raise NotImplementedError() return summary @staticmethod def _get_propIDs(tableNames, engine, opsimversion, subset, user_propIDs=None): """return a sequence of `proposalId` which determine the subset of observations from tha `summary` table. Parameters ---------- tableNames : engine : opsimversion : subset : Returns ------- propIDs : a sequence of integers Notes: The `proposalId` in the `proposal` table indexes science programs. """ # Read the proposal table to find out which propID corresponds to # the subsets requested proposals = pd.read_sql_table(tableNames[1], con=engine) propDict = OpSimOutput.get_propIDDict(proposals, opsimversion=opsimversion) # Seq of propIDs consistent with subset _propIDs = OpSimOutput.propIDVals(subset, propDict, proposals) # If propIDs and subset were both provided, override subset propIDs propIDs = OpSimOutput._overrideSubsetPropID(user_propIDs, _propIDs) return propDict, propIDs, proposals @staticmethod def _get_sql_engine(dbname): # Prepend the abs path with sqlite for use with sqlalchemy if not dbname.startswith('sqlite'): dbname = 'sqlite:///' + dbname print(' reading from database {}'.format(dbname)) engine = create_engine(dbname, echo=False) return engine
[docs] @staticmethod def dropDuplicates(df, propIDDict, opsimversion): """ drop duplicates ensuring keeping identity of ddf visits Parameters ---------- df : `pd.DataFrame` propIDDict : dict Returns ------- `pd.DataFrame` with the correct propID and duplicates dropped """ if opsimversion == 'sstf': return df # As duplicates are dropped in order, reorder IDs so that # DDF is lowest, WFD next lowest, everything else as is minPropID = df.propID.min() ddfID = propIDDict['ddf'] wfdID = propIDDict['wfd'] ddfPropID = minPropID - 1 wfdPropID = minPropID - 2 orig_propID = df.propID.values df['orig_propID'] = orig_propID # if np.__version__ >= 1.13: # ddfmask = np.isin(df.propID, ddfID) # wfdmask = np.isin(df.propID, wfdID) # else: ddfmask = np.in1d(df.propID, ddfID) wfdmask = np.in1d(df.propID, wfdID) df.loc[ddfmask, 'propID'] = ddfPropID df.loc[wfdmask, 'propID'] = wfdPropID # drop duplicates keeping the lowest transformed propIDs so that all # WFD visits remain, DDF visits which were duplicates of WFD visits are # dropped, etc. df = df.drop_duplicates(subset='obsHistID', keep='first', inplace=False) #df = df.drop_duplicates(subset='obsHistID', # keep='first')#.set_index('obsHistID') # reset the propIDs to values in the OpSim output # ddfmask = df.propID == ddfPropID # wfdmask = df.propID == wfdPropID # df.loc[ddfmask, 'propID'] = ddfID # df.loc[wfdmask, 'propID'] = wfdID del df['propID'] df.rename(columns=dict(orig_propID='propID'), inplace=True) df.sort_values(by='expMJD', inplace=True) return df
@classmethod def _fromOpSimHDF(cls, hdfName, subset='combined', tableNames=('Summary', 'Proposal'), propIDs=None): """ Construct an instance of a subset of the OpSim Output from a serialization in the format of hdf Parameters ---------- hdfName : subset : tableNames : propIDs : """ raise NotImplementedError('Not quite working at this moment') allowed_subsets = cls.get_allowed_subsets() subset = subset.lower() if subset not in allowed_subsets: raise NotImplementedError('subset {} not implemented'.\ format(subset)) # The hdf representation is assumed to be a faithful representation of # the OpSim output summarydf = pd.read_hdf(hdfName, key='Summary') if 'obsHistID' not in summarydf.columns: summarydf.reset_index(inplace=True) if 'obsHistID' not in summarydf.columns: raise NotImplementedError('obsHistID is not in columns') try: proposals = pd.read_hdf(hdfName, key='Proposal') print('read in proposal') propDict = cls.get_propIDDict(proposal) print('read in proposal') print(subset, propDict) _propIDs = cls.propIDVals(subset, propDict, proposals) except: print('Proposal not read') pass propIDs = cls._overrideSubsetPropID(propIDs, _propIDs) if propIDs is not None: if not isinstance(propIDs, list): propIDs = propIDs.tolist() print('propIDs', propIDs, type(propIDs), type(propIDs[0])) print('summarydf cols', summarydf.columns) query_str = 'propID == @propIDs' print('query_str', query_str) print(' Num entries ', len(summarydf)) summary = summarydf.query(query_str) else: summary = summarydf if propIDs is None and subset not in ('_all', 'unique_all'): raise ValueError('No sensible propID and subset combination found') if subset != '_all': # Usually drop the OpSim duplicates summary.drop_duplicates(subset='obsHistID', inplace=True) summary.set_index('obsHistID', inplace=True) return cls(propIDDict=propDict, summary=summary, proposalTable=proposals, subset=subset) @property def propIds(self): """ list of values in propID Column of the Summary Table of OpSim to be considered for this class, either because they were directly provided or through the subset argument. """ if self._propID is not None: return self._propID elif self.subset is not None and self.propIDDict is not None: return self.propIDVals(self.subset, self.propIDDict, self.proposalTable) def _writeOpSimHDF(self, hdfName): """ Serialize the OpSim output to hdf format in a welldefined way The output hdf file has two keys: 'Summary' and 'Proposal' """ if self.subset != '_all': raise ValueError('Should be Done only for self.subset == _all') self.summary.to_hdf(hdfName, key='Summary', append=False) self.proposalTable.to_hdf(hdfName, key='Proposal', append=False) @staticmethod def _overrideSubsetPropID(propIDs, _propIDs): if propIDs is None: propIDs = _propIDs else: if np.asarray(propIDs).sort() != np.asarray(_propIDs).sort(): raise Warning('argument propIDs and _propIDs do not match') return propIDs
[docs] @staticmethod def get_allowed_subsets(): """Provide a sequence of implemented subset values""" return ('_all', 'ddf', 'wfd', 'combined', 'unique_all')
[docs] @staticmethod def get_propIDDict(proposalDF, opsimversion='lsstv3'): """ Return a dictionary with keys 'ddf', ad 'wfd' with the proposal IDs corresponding to deep drilling fields (ddf) and universal cadence (wfd) Parameters ---------- proposalDF : `pd.DataFrame`, mandatory a dataframe with the Proposal Table of the OpSim Run. opsimversion: {'lsstv3'|'sstf'|'lsstv4'}, defaults to 'lsstv3' version of opsim from which output is drawn Returns ------- dictionary with keys 'wfd' and 'ddf' with values given by integers corresponding to propIDs for these proposals """ oss_wfdName = 'wfd' oss_ddfName = 'ddf' df = proposalDF mydict = dict() if opsimversion == 'lsstv3': propName = 'propConf' propIDName = 'propID' ops_wfdname = 'conf/survey/Universal-18-0824B.conf' ops_ddfname = 'conf/survey/DDcosmology1.conf' elif opsimversion == 'sstf': propName = 'propName' propIDName = 'propId' ops_wfdname = 'WideFastDeep' ops_ddfname = 'Deep Drilling' elif opsimversion == 'lsstv4': propName = 'propName' propIDName = 'propId' ops_wfdname = 'WideFastDeep' ops_ddfname = 'DeepDrillingCosmology1' else: raise NotImplementedError('`get_propIDDict` is not implemented for this `opsimversion`') # Rename version based proposal names to internal values for idx, row in df.iterrows(): # remember in enigma outputs, these came with `..` in the beginning if ops_wfdname in row[propName]: df.loc[idx, propName] = oss_wfdName elif ops_ddfname in row[propName]: df.loc[idx, propName] = oss_ddfName else: pass pdict = dict(df.set_index(propName)[propIDName]) # To support multiple proposals for key in pdict: if isinstance(pdict[key], collections.Iterable): pdict[key] = pdict[key].values return pdict
[docs] @staticmethod def propIDVals(subset, propIDDict, proposalTable): """ Parameters: ---------- subset : string must be member of OpSimOutput.allowed_subsets() propIDDict : dictionary, mandatory must have subset as a key, and an integer or seq of ints as values proposalTable : `pd.DataFrame` Dataframe representing the proposal table in the OpSim datbase output Returns: ------- list of propID values (integers) associated with the subset """ if subset is None: raise ValueError('subset arg in propIDVals cannot be None') if subset.lower() in ('ddf', 'wfd'): x = [propIDDict[subset.lower()]] elif subset.lower() == 'combined': x = [propIDDict['ddf'], propIDDict['wfd']] elif subset.lower() in ('_all', 'unique_all'): if proposalTable is not None: x = proposalTable.propID.values else: return None else: raise NotImplementedError('value of subset Not recognized') # unroll lists l = list() for elem in x: if isinstance(elem, collections.Iterable): for e in elem: l.append(e) else: l.append(elem) return l
def OpSimDfFromFile(fname, ftype='hdf', subset='Combined'): """ read a serialized form of the OpSim output into `pd.DataFrame` and return a subset of interest Parameters ---------- fname : string, mandatory absolute path to serialized form of the OpSim database ftype : {'sqliteDB', 'ASCII', 'hdf'} The kind of serialized version being read from. 'sqliteDB' : `LSST` project supplied OpSim output format for baseline cadences (eg. enigma_1189, minion_1016, etc.) 'ASCII' : `LSST` project supplied OpSim output format used in older OpSim outputs eg. OpSim v 2.168 output 'hdf' : `hdf` files written out by `OpSimSummary` subset : {'Combined', 'DDF', 'WFD' , 'All'}, defaults to 'Combined' Type of OpSim output desired in the dataframe 'Combined' : unique pointings in WFD + DDF 'WFD' : Unique pointings in WFD 'DDF' : Unique pointings in DDF Cosmology 'All' : Entire Summary Table From OpSim """ print('This seems to have changed since first written, fixing not a priority') raise NotImplementedError('This seems to have changed since first written') if ftype == 'sqlite': dbname = 'sqlite:///' + fname engine = create_engine(dbname, echo=False) proposalTable = pd.read_sql_table('Proposal', con=engine) # if subset == 'DDF': # sql elif ftype == 'hdf' : pass elif ftype == 'ASCII': pass else: raise NotImplementedError('ftype {} not implemented'.format(ftype))