"""
This module deals with representing the data in an OpSim output (to the extent
we will care about it). A description of the OpSim output can be found at
(opsim description)[https://www.lsst.org/scientists/simulations/opsim/summary-table-column-descriptions-v335]
In brief, we will use two tables from the OpSim output:
- A Summary Table which has the desired information
- A Proposal Table which contains a dictionary to interpreting the `propID` column
of Summary.
"""
from __future__ import division, print_function, unicode_literals
__all__ = ['OpSimOutput']
import sys
import traceback
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import collections
[docs]class OpSimOutput(object):
"""
Class representing a subset of the output of the OpSim including
information from the Summary and Proposal Tables with the subset taken over
the proposals
Attributes
----------
opsimversion: {'lsstv3'|'sstf'|'lsstv4'}
version of OpSim corresponding to the output format.
summary: `pd.DataFrame`
selected records from the Summary Table of pointings
propIDDict: dict
dictionary with strings as keys and integers used in the Summary
Table to denote these proposals
proposalTable: `pd.DataFrame`
the propsal table in the output
subset: string
subset of proposals included in this class
propIDs : list of integers
integers corresponding to the subset selected through proposals
zeroDDFDithers : bool, defaults to True
if True, set dithers in DDF to 0, by setting ditheredRA,
ditheredDec to fieldRA, fieldDec. This should only be used for
opsimversion='lsstv3'. For opsimversion='sstf' or 'lsstv4', this
will be set to False despite inputs, since this is already done, and
cannot be done with the inputs.
"""
def __init__(self, summary, propIDDict=None, proposalTable=None,
subset=None, propIDs=None, zeroDDFDithers=True,
opsimversion='lsstv3'):
"""
Constructor for the `OpSimOutput` class
Parameters
----------
summary: `pd.DataFrame`
a table describing a summary of observations which should have the
following minimal columns (`ditheredRA`, `ditheredDec`, `expMJD`)
as columns holding information on the pointing RA and dec of the
telescope and the mjd of the observation and `obsHistID` a unique
index for each entry the name of the index. Other columns
which are essential in the context of use in simulations are
(`FWHMeff`, `filtSkyBrightness`, `fiveSigmaDepth`, `propID`)
describing the FWHM seeing, sky brightness in the filter of
observation, and the five sigma depth of the observation and
an integer index for different programs that an observation.
proposalTable: `pd.DataFrame`, defaults to `None`
modified table of proposals from the OpSim output
propIDDict: dict, defaults to None
dictionary with keys giving integers that identify proposals and
values in the form of strings describing the program
subset: {'wfd'|'ddf'|'combined'| '_all' | 'unique_all'}, defaults to
`combined` a string that defines a subset of observations to be
chosen based on the choice of proposals. `wfd` is the LSST `WFD`,
`ddf` is the LSST `ddf`, `combined` is the combination of
`WFD` and `DDF` while, `unique_all` keeps all of the unique
observations. `_all` is the entire table of of observations from
OpSim outputs.
propIDs: sequence of integers
zeroDDFDithers: bool, defaults to `True`
If `True` changes the dithers in DDF fields to zero by setting the
columns `ditheredRA`, `ditheredDec` the same as `fieldRA` and
`fieldDec`
opsimversion: {'lsstv3'|'sstf'|'lsstv4'} , defaults to 'lsstv3'
a string to denote the version of OpSim outputs. `lsstv3`
refers to outputs from OpSim version 3 (eg. enigma_1189, minion_1016).
`sstf` refers to outputs from created at the start of the Observing
Strategy Task Force available at the following
[website](http://altsched.rothchild.me:8080),
and `lsstv4` refers to outputs available from OpSim version 4.
"""
self.opsimversion = opsimversion
self.allowed_subsets = self.get_allowed_subsets()
self.subset = subset
self.propIDDict = propIDDict
self.proposalTable = proposalTable
if opsimversion in ('sstf', 'lsstv4'):
zeroDDFDithers = False
ss = 'Warning: Input is zeroDDFDithers = True. But opsimversion is'
ss += '{} for which this must be False. Setting to False and proceeding\n'.format(opsimversion)
print(ss)
# Check `summary` does not have `nan`s
if not self.validate_pointings(summary, opsimVars=None):
print('summary table has nans, exiting\n')
sys.exit(1)
if zeroDDFDithers:
ddfPropID = self.propIDDict['ddf']
ddfidx = summary.query('propID == @ddfPropID').index
summary.loc[ddfidx, 'ditheredRA'] = summary.loc[ddfidx, 'fieldRA']
summary.loc[ddfidx, 'ditheredDec'] = summary.loc[ddfidx, 'fieldDec']
self._opsimvars = None
# Have a clear unambiguous ra, dec in radians following LSST convention
# These are the columns `_ra`, `_dec` which should have the dithered
# values in radians
# If degrees do transformation to radians
if self.opsimVars['angleUnit'] == 'degrees':
summary.loc[:, '_ra'] = np.radians(summary['ditheredRA'])
summary.loc[:, '_dec'] = np.radians(summary['ditheredDec'])
print('Changing units for {0} from {1}'.format(opsimversion, 'degrees'))
# If already in radians, make a copy
elif self.opsimVars['angleUnit'] == 'radians':
print('Keeping units for {0} from {1}'.format(opsimversion, 'radians'))
summary.loc[:, '_ra'] = summary['ditheredRA']
summary.loc[:, '_dec'] = summary['ditheredDec']
else:
raise ValueError('angle unit of ra and dec Columns not recognized\n')
# Validate the format of the pointings to expectations given the version
# of the OpSim output
if self.validate_pointings(summary, self.opsimVars):
self.summary = summary
else:
raise AssertionError('Pointings are not in required format')
# Set the attribute `_propID`
self._propID = propIDs
[docs] @staticmethod
def get_opsimVariablesForVersion(opsimversion='lsstv3'):
"""Static method to returns a dictionary for the opsim version where the keys
are names of quantities used in this codebase, and the values are the names of
quantities in the OpSim output database
Parameters
----------
opsimversion: string, defaults to `lsstv3`
can be {`lsstv3`|`lsstv4`|`sstf`}
Returns
-------
dictionary: key, value pairs where keys are variable names used in `OpSimSummary`
and values are variable names used in the OpSim database with the given
version.
Examples
--------
>>> from opsimsummary import OpSimOutput
>>> OpSimOutput.get_opsimVariablesForVersion('lsstv4')
{'summaryTableName': 'SummaryAllProps', 'obsHistID': 'observationId',
'propName': 'propName', 'propIDName': 'propId',
'propIDNameInSummary': 'proposalId', 'ops_wfdname': 'WideFastDeep',
'ops_ddfname': 'DeepDrillingCosmology1',
'expMJD': 'observationStartMJD', 'FWHMeff': 'seeingFwhmEff',
'pointingRA': 'ditheredRA', 'pointingDec': 'ditheredDec',
'filtSkyBrightness': 'skyBrightness', 'angleUnit': 'degrees'}
"""
if opsimversion == 'lsstv3':
x = dict(summaryTableName='Summary',
obsHistID='obsHistID',
propName='propConf',
propIDName='propID',
propIDNameInSummary='propID',
ops_wfdname='conf/survey/Universal-18-0824B.conf',
ops_ddfname='conf/survey/DDcosmology1.conf',
expMJD='expMJD',
FWHMeff='FWHMeff',
pointingRA='ditheredRA',
pointingDec='ditheredDec',
filtSkyBrightness='filtSkyBrightness',
angleUnit='radians')
elif opsimversion == 'sstf':
x = dict(summaryTableName='SummaryAllProps',
obsHistID='observationId',
propName='propName',
propIDName='propId',
propIDNameInSummary='proposalId',
ops_wfdname='WideFastDeep',
ops_ddfname='Deep Drilling',
expMJD='observationStartMJD',
FWHMeff='seeingFwhmEff',
pointingRA='fieldRA',
pointingDec='fieldDec',
filtSkyBrightness='skyBrightness',
angleUnit='degrees')
elif opsimversion == 'lsstv4':
x = dict(summaryTableName='SummaryAllProps',
obsHistID='observationId',
propName='propName',
propIDName='propId',
propIDNameInSummary='proposalId',
ops_wfdname='WideFastDeep',
ops_ddfname='DeepDrillingCosmology1',
expMJD='observationStartMJD',
FWHMeff='seeingFwhmEff',
pointingRA='ditheredRA',
pointingDec='ditheredDec',
filtSkyBrightness='skyBrightness',
angleUnit='degrees')
else:
raise NotImplementedError('`get_propIDDict` is not implemented for this `opsimversion`')
return x
@property
def opsimVars(self):
"""Dictionary where the keys are names of quantities used in
`OpSimSummary`, and the values are the names of quantities in
the OpSim output database used.
"""
if self._opsimvars is None:
self._opsimvars = self.get_opsimVariablesForVersion(self.opsimversion)
return self._opsimvars
[docs] @staticmethod
def validate_pointings(summary, opsimVars=None, check_anycols=False):
"""
Validate a dataframe of pointings for further use. If
`opsimVars` is `None` then only check that there are no `no.nan`s,
else check that the table of pointings has the necessary format and
units by checking that required columns indicated by `opsimVars` exist
and have sensible values.
Parameters
----------
summary: `pd.DataFrame` of pointings
opsimVars: dictionary, defaults to `None`
should be dictionary for each supported OpSim version availble
from `OpSimOutput.get_opsimVariablesForVersion(opsimversion)`
check_anycols: Bool, defaults to False
if True, this will check all columns rather than fiveSigmaDepth for nans
Returns
-------
Bool (True|False) But exits on False.
"""
try:
if opsimVars is not None:
assert '_ra' in summary.columns
assert '_dec' in summary.columns
assert np.fabs(summary['_ra'].max()) <= 2.0 * np.pi
assert np.fabs(summary['_dec'].min()) >= -1.0 * np.pi
if check_anycols:
assert summary.isnull().values.any() == False
else:
# We only check the fiveSigmaDepth column
assert 'fiveSigmaDepth' in summary.columns.values
assert all(summary['fiveSigmaDepth'].isnull().values == False)
except AssertionError:
_, _, tb = sys.exc_info()
traceback.print_tb(tb) # Fixed format
tb_info = traceback.extract_tb(tb)
filename, line, func, text = tb_info[-1]
print ('pointings are not in required format')
print(summary.head())
print('An error occurred on line {} in statement {}'.format(line, text))
sys.exit(1)
return True
[docs] @staticmethod
def get_dithercolumns(summary,
opsimversion,
method='default',
ddfId=5,
rng=np.random.RandomState(1),
wfd_ditherscale=1.75,
ddf_ditherscale=0.2):
"""
Use a `method` prescription to obtain dithered values of pointings
starting from a fixed pointing.
Parameters
----------
summary : `pd.DataFrame`
indexed by `obsHistID` and having the columns `fieldRA`, `fieldDec`
opsimversion : string, defaults to `lsstv3`
version of the OpSim producing the database.
method : string
{'default|FlatSky'} only implemented
rng : randomState
kwargs :
"""
# start off with a fieldRA, fieldDec, propID
df = summary[['fieldRA', 'fieldDec', 'propID']]
OpSimVars = OpSimOutput.get_opsimVariablesForVersion(opsimversion)
angleUnit = OpSimVars['angleUnit']
if method == 'default':
# Simply write the fieldRA to ditheredRA
df.rename(columns=dict(fieldRA='ditheredRA',
fieldDec='ditheredDec'),
inplace=True)
elif method == 'FlatSky':
# Choose chip size, random directional DDF dithers
# Choose focal plane radius size, random directional dithers elsewhere
# Very roughly these scales are 1.75 deg, and 0.2 deg
df.loc[:, 'factor'] = wfd_ditherscale
df.query('propID == @ddfId').loc[:, 'factor'] = ddf_ditherscale
if angleUnit == 'degrees':
pass
elif angleUnit == 'radians':
df.loc[:, 'factor'] = df.factor.apply(np.radians)
else:
raise NotImplementedError("Don't recognize angleUnit")
# Random directions
df.loc[:, 'random_angs'] = rng.uniform(high=2.0*np.pi,
size=len(df))
# Use the flat sky approximation
df.loc[:, 'ditheredRA'] = df['fieldRA'] + \
df['factor'] * np.cos(df['random_angs'])
df.loc[:, 'ditheredDec'] = df['fieldDec'] + \
df['factor'] * np.sin(df['random_angs'])
else:
raise NotImplementedError('method {} has not been implemented yet\n'.format(method))
if angleUnit == 'degrees':
assert all(df.ditheredRA.values < 370.0)
maxval = 360.
elif angleUnit == 'radians':
maxval = 2.0 * np.pi
assert all(df.ditheredRA.values < maxval + 0.2)
mask = df.ditheredRA.values > maxval
df.ditheredRA[mask] = df.ditheredRA[mask] - maxval
return df[['ditheredRA', 'ditheredDec']]
[docs] @classmethod
def fromOpSimDB(cls, dbname,
subset='combined',
opsimversion='lsstv3',
zeroDDFDithers=True,
user_propIDs=None,
dithercolumns=None,
add_dithers=False,
tableNames=('Summary', 'Proposal'),
filterNull=False,
**kwargs):
"""
Convenience method to instantitate the `OpSimOutput` class directly
from an `OpSim` output rather than providing the elementary inputs in
the class constructor.
Parameters
----------
dbname : string
absolute path to database
subset : string, optional, defaults to 'combined'
one of {'_all', 'unique_all', 'wfd', 'ddf', 'combined'}
determines a sequence of propIDs for selecting observations
appropriate for the OpSim database in use
opsimversion : {'lsstv3'|'sstf'|'lsstv4'}
version of OpSim corresponding to the output format.
zeroDDFDithers : bool, defaults to True
if True, set dithers in DDF to 0, by setting ditheredRA,
ditheredDec to fieldRA, fieldDec
dithercolumns: `pd.DataFrame`, defaults to `None`
a pandas dataframe with the columns `ditheredRA`, `ditheredDec` and
index `obsHistID`, when not `None` this is used to create
`opsimVars[pointingRA]` and `opsimVars[pointingDec]` deleting the
these columns if they existed.
add_dithers : Bool, defaults to `False`
if `True` add dithers by generate ourselves by invoking
`cls.get_dithers` and options through `**kwargs`.
Even if `False`, becomes `True` if `opsimVars['pointingRA']
is not in the list of `summary[columns]` so that it needs to be
created, and dithercolumns is `None`.
user_propIDs : sequence of integers, defaults to `None`
proposal ID values. If not `None`, overrides the use of subset
tableNames : tuple of strings, defaults to ('Summary', 'Proposal')
names of tables read from the OpSim database
filterNull : Bool, defaults to False
if True, the summary table should be filtered to rows that do not
contain `NULL` values in the `fiveSigmaDepth` column.
kwargs: dict
of options relating to changing the methods of adding dithers.
keywords are rng of type `np.random.RandomState`, `ddf_ditherscale`,
`wfd_ditherscale`, `method`. If not provided, the parameters take
default values.
"""
# Because this is in the class method, I am using the staticmethod
# rather than the property, but note that the property is calculated
# through this method. So this gives the same thing
opsimVars = cls.get_opsimVariablesForVersion(opsimversion)
# Set tablenames
tableNames=(opsimVars['summaryTableName'], 'Proposal')
# Check that subset parameter is legal
allowed_subsets = cls.get_allowed_subsets()
subset = subset.lower()
if subset not in allowed_subsets:
raise NotImplementedError('subset {} not implemented'.\
format(subset))
engine = cls._get_sql_engine(dbname)
propDict, propIDs, proposals = cls._get_propIDs(tableNames, engine,
opsimversion,
subset,
user_propIDs=user_propIDs)
summary = cls._read_summary_table_raw(engine, opsimVars, propIDs, subset)
if len(summary) == 0:
return cls(propIDDict=propDict,
summary=summary,
zeroDDFDithers=zeroDDFDithers,
proposalTable=proposals, subset=subset,
opsimversion=opsimversion)
# filter read in summary table
print('We have filterNull set to', filterNull)
if filterNull:
print('With given option, filtering the raw summary table of NaNs')
num_orig = len(summary)
summary = summary[np.isfinite(summary['fiveSigmaDepth'])]
print('This option reduced the number of rows from {0} to {1}'.format(num_orig, len(summary)))
print('checking that summary table read in\n')
if cls.validate_pointings(summary, opsimVars=None):
print('Reading in raw tables successful')
# Standardize names of summary table columns
replacedict = dict()
replacedict[opsimVars['obsHistID']] = 'obsHistID'
replacedict[opsimVars['propIDNameInSummary']] = 'propID'
replacedict[opsimVars['expMJD']] = 'expMJD'
replacedict[opsimVars['FWHMeff']] = 'FWHMeff'
replacedict[opsimVars['filtSkyBrightness']] = 'filtSkyBrightness'
summary = summary.rename(columns=replacedict)
if cls.validate_pointings(summary, opsimVars=None):
print('replacing names works')
# Drop Duplicates
if subset != '_all':
# Drop duplicates unless this is to write out the entire OpSim
summary = cls.dropDuplicates(summary, propDict, opsimversion)
# Set Standard Index
summary.set_index('obsHistID', inplace=True)
if cls.validate_pointings(summary, opsimVars=None):
print('dropping duplicates works')
# At this stage the summary table is read in,
# and the standard index is set
# In `lsstv3` minion like baselines, the pointingRA are `ditheredRA` etc.
# In `sstf` versions, the pointing coordinates are `fieldRA` etc.
# in `lsstv4`, the pointing coordinates are unsupplied but `ditheredRA` etc.
if 'ditheredra' not in list(x.lower() for x in summary.columns):
# eg. has to be done in `lsstv4` and `sstf` unless supplied
add_dithers = True
if add_dithers:
if dithercolumns is not None:
print('Trying to join input dithercolumns\n')
# If provided with dithers in a dataFrame, use them
# Check that dithercolumns are available in input
assert 'ditheredRA' in dithercolumns.columns
assert 'ditheredDec' in dithercolumns.columns
assert 'obsHistID' == dithercolumns.index.name
# if the column names already exist in the table remove them
if 'ditheredra' in list(x.lower() for x in summary.columns):
del summary['ditheredRA']
del summary['ditheredDec']
# Assumption : I have the dither columns in a `pd.DataFrame`
# with minimal columns `ditheredRA` and `ditheredDec` and
# index name `obsHistID` which indexes the visits in the
# Summary Table
summary = summary.join(dithercolumns)
elif add_dithers:
print('creating dither columns \n')
# No dither column provided
ditherdict = dict(method='default',
ddfID=propDict['ddf'],
ddf_ditherscale=1.75,
wfd_ditherscale=0.2,
rng=np.random.RandomState(1))
method = 'default'
ddfID = propDict['ddf']
ddf_ditherscale = 1.75
wfd_ditherscale = 0.2
rng = np.random.RandomState(1)
if kwargs:
for key in kwargs:
ditherdict[key] = kwargs[key]
dithercolumns = cls.get_dithercolumns(summary[['fieldRA',
'fieldDec',
'propID']],
opsimversion=opsimversion,
method=ditherdict['method'],
ddfId=ditherdict['ddfID'],
ddf_ditherscale=ditherdict['ddf_ditherscale'],
wfd_ditherscale=ditherdict['wfd_ditherscale'],
rng=rng)
print(dithercolumns.ditheredRA.max())
#print('max ra values are {}.'format(dithercolumns.ditheredRA.max()))
if cls.validate_pointings(dithercolumns, opsimVars=None, check_anycols=True):
print('dithercolumns good!')
# print('max ra values are {}.'format(dithercolumns.ditheredRA.max()))
try:
summary = summary.join(dithercolumns)
print(len(summary), len(dithercolumns))
if cls.validate_pointings(summary, opsimVars=None):
print('join good!')
except:
pass
else:
raise NotImplementedError('What did you do ?????')
else:
# let pass without further action
pass
if cls.validate_pointings(summary, opsimVars=None):
print('joining dithers works')
return cls(propIDDict=propDict,
summary=summary,
zeroDDFDithers=zeroDDFDithers,
proposalTable=proposals, subset=subset,
opsimversion=opsimversion)
@staticmethod
def _read_summary_table_raw(engine, opsimVars, propIDs, subset):
# Do the actual sql queries or table reads for observations
summaryTableName = opsimVars['summaryTableName']
# Note OpSim version 4 has different names for the same variable
# in the Proposal Table and Summary Table.
propIDNameInSummary = opsimVars['propIDNameInSummary']
if subset in ('_all', 'unique_all'):
# In this case read everything (ie. table read)
summary = pd.read_sql_table(summaryTableName, con=engine)
elif subset in ('ddf', 'wfd', 'combined'):
print('Not doing all observations here ')
# In this case use sql queries rather than reading the whole table
# obtain propIDs in strings for sql queries
pidString = ', '.join(list(str(pid) for pid in propIDs))
print(pidString, subset)
sql_query = 'SELECT * FROM {0} WHERE {1}'.format(summaryTableName,
propIDNameInSummary
)
sql_query += ' in ({})'.format(pidString)
# If propIDs were passed to the method, this would be used
print(sql_query)
summary = pd.read_sql_query(sql_query, con=engine)
else:
raise NotImplementedError()
return summary
@staticmethod
def _get_propIDs(tableNames, engine, opsimversion, subset,
user_propIDs=None):
"""return a sequence of `proposalId` which determine the subset of
observations from tha `summary` table.
Parameters
----------
tableNames :
engine :
opsimversion :
subset :
Returns
-------
propIDs : a sequence of integers
Notes: The `proposalId` in the `proposal` table indexes science
programs.
"""
# Read the proposal table to find out which propID corresponds to
# the subsets requested
proposals = pd.read_sql_table(tableNames[1], con=engine)
propDict = OpSimOutput.get_propIDDict(proposals, opsimversion=opsimversion)
# Seq of propIDs consistent with subset
_propIDs = OpSimOutput.propIDVals(subset, propDict, proposals)
# If propIDs and subset were both provided, override subset propIDs
propIDs = OpSimOutput._overrideSubsetPropID(user_propIDs, _propIDs)
return propDict, propIDs, proposals
@staticmethod
def _get_sql_engine(dbname):
# Prepend the abs path with sqlite for use with sqlalchemy
if not dbname.startswith('sqlite'):
dbname = 'sqlite:///' + dbname
print(' reading from database {}'.format(dbname))
engine = create_engine(dbname, echo=False)
return engine
[docs] @staticmethod
def dropDuplicates(df, propIDDict, opsimversion):
"""
drop duplicates ensuring keeping identity of ddf visits
Parameters
----------
df : `pd.DataFrame`
propIDDict : dict
Returns
-------
`pd.DataFrame` with the correct propID and duplicates dropped
"""
if opsimversion == 'sstf':
return df
# As duplicates are dropped in order, reorder IDs so that
# DDF is lowest, WFD next lowest, everything else as is
minPropID = df.propID.min()
ddfID = propIDDict['ddf']
wfdID = propIDDict['wfd']
ddfPropID = minPropID - 1
wfdPropID = minPropID - 2
orig_propID = df.propID.values
df['orig_propID'] = orig_propID
# if np.__version__ >= 1.13:
# ddfmask = np.isin(df.propID, ddfID)
# wfdmask = np.isin(df.propID, wfdID)
# else:
ddfmask = np.in1d(df.propID, ddfID)
wfdmask = np.in1d(df.propID, wfdID)
df.loc[ddfmask, 'propID'] = ddfPropID
df.loc[wfdmask, 'propID'] = wfdPropID
# drop duplicates keeping the lowest transformed propIDs so that all
# WFD visits remain, DDF visits which were duplicates of WFD visits are
# dropped, etc.
df = df.drop_duplicates(subset='obsHistID', keep='first', inplace=False)
#df = df.drop_duplicates(subset='obsHistID',
# keep='first')#.set_index('obsHistID')
# reset the propIDs to values in the OpSim output
# ddfmask = df.propID == ddfPropID
# wfdmask = df.propID == wfdPropID
# df.loc[ddfmask, 'propID'] = ddfID
# df.loc[wfdmask, 'propID'] = wfdID
del df['propID']
df.rename(columns=dict(orig_propID='propID'), inplace=True)
df.sort_values(by='expMJD', inplace=True)
return df
@classmethod
def _fromOpSimHDF(cls, hdfName, subset='combined',
tableNames=('Summary', 'Proposal'),
propIDs=None):
"""
Construct an instance of a subset of the OpSim
Output from a serialization in the format of hdf
Parameters
----------
hdfName :
subset :
tableNames :
propIDs :
"""
raise NotImplementedError('Not quite working at this moment')
allowed_subsets = cls.get_allowed_subsets()
subset = subset.lower()
if subset not in allowed_subsets:
raise NotImplementedError('subset {} not implemented'.\
format(subset))
# The hdf representation is assumed to be a faithful representation of
# the OpSim output
summarydf = pd.read_hdf(hdfName, key='Summary')
if 'obsHistID' not in summarydf.columns:
summarydf.reset_index(inplace=True)
if 'obsHistID' not in summarydf.columns:
raise NotImplementedError('obsHistID is not in columns')
try:
proposals = pd.read_hdf(hdfName, key='Proposal')
print('read in proposal')
propDict = cls.get_propIDDict(proposal)
print('read in proposal')
print(subset, propDict)
_propIDs = cls.propIDVals(subset, propDict, proposals)
except:
print('Proposal not read')
pass
propIDs = cls._overrideSubsetPropID(propIDs, _propIDs)
if propIDs is not None:
if not isinstance(propIDs, list):
propIDs = propIDs.tolist()
print('propIDs', propIDs, type(propIDs), type(propIDs[0]))
print('summarydf cols', summarydf.columns)
query_str = 'propID == @propIDs'
print('query_str', query_str)
print(' Num entries ', len(summarydf))
summary = summarydf.query(query_str)
else:
summary = summarydf
if propIDs is None and subset not in ('_all', 'unique_all'):
raise ValueError('No sensible propID and subset combination found')
if subset != '_all':
# Usually drop the OpSim duplicates
summary.drop_duplicates(subset='obsHistID', inplace=True)
summary.set_index('obsHistID', inplace=True)
return cls(propIDDict=propDict, summary=summary,
proposalTable=proposals, subset=subset)
@property
def propIds(self):
"""
list of values in propID Column of the Summary Table of OpSim
to be considered for this class, either because they were directly
provided or through the subset argument.
"""
if self._propID is not None:
return self._propID
elif self.subset is not None and self.propIDDict is not None:
return self.propIDVals(self.subset, self.propIDDict, self.proposalTable)
def _writeOpSimHDF(self, hdfName):
"""
Serialize the OpSim output to hdf format in a welldefined way
The output hdf file has two keys: 'Summary' and 'Proposal'
"""
if self.subset != '_all':
raise ValueError('Should be Done only for self.subset == _all')
self.summary.to_hdf(hdfName, key='Summary', append=False)
self.proposalTable.to_hdf(hdfName, key='Proposal', append=False)
@staticmethod
def _overrideSubsetPropID(propIDs, _propIDs):
if propIDs is None:
propIDs = _propIDs
else:
if np.asarray(propIDs).sort() != np.asarray(_propIDs).sort():
raise Warning('argument propIDs and _propIDs do not match')
return propIDs
[docs] @staticmethod
def get_allowed_subsets():
"""Provide a sequence of implemented subset values"""
return ('_all', 'ddf', 'wfd', 'combined', 'unique_all')
[docs] @staticmethod
def get_propIDDict(proposalDF, opsimversion='lsstv3'):
"""
Return a dictionary with keys 'ddf', ad 'wfd' with the proposal IDs
corresponding to deep drilling fields (ddf) and universal cadence (wfd)
Parameters
----------
proposalDF : `pd.DataFrame`, mandatory
a dataframe with the Proposal Table of the OpSim Run.
opsimversion: {'lsstv3'|'sstf'|'lsstv4'}, defaults to 'lsstv3'
version of opsim from which output is drawn
Returns
-------
dictionary with keys 'wfd' and 'ddf' with values given by integers
corresponding to propIDs for these proposals
"""
oss_wfdName = 'wfd'
oss_ddfName = 'ddf'
df = proposalDF
mydict = dict()
if opsimversion == 'lsstv3':
propName = 'propConf'
propIDName = 'propID'
ops_wfdname = 'conf/survey/Universal-18-0824B.conf'
ops_ddfname = 'conf/survey/DDcosmology1.conf'
elif opsimversion == 'sstf':
propName = 'propName'
propIDName = 'propId'
ops_wfdname = 'WideFastDeep'
ops_ddfname = 'Deep Drilling'
elif opsimversion == 'lsstv4':
propName = 'propName'
propIDName = 'propId'
ops_wfdname = 'WideFastDeep'
ops_ddfname = 'DeepDrillingCosmology1'
else:
raise NotImplementedError('`get_propIDDict` is not implemented for this `opsimversion`')
# Rename version based proposal names to internal values
for idx, row in df.iterrows():
# remember in enigma outputs, these came with `..` in the beginning
if ops_wfdname in row[propName]:
df.loc[idx, propName] = oss_wfdName
elif ops_ddfname in row[propName]:
df.loc[idx, propName] = oss_ddfName
else:
pass
pdict = dict(df.set_index(propName)[propIDName])
# To support multiple proposals
for key in pdict:
if isinstance(pdict[key], collections.Iterable):
pdict[key] = pdict[key].values
return pdict
[docs] @staticmethod
def propIDVals(subset, propIDDict, proposalTable):
"""
Parameters:
----------
subset : string
must be member of OpSimOutput.allowed_subsets()
propIDDict : dictionary, mandatory
must have subset as a key, and an integer or seq of ints
as values
proposalTable : `pd.DataFrame`
Dataframe representing the proposal table in the OpSim datbase
output
Returns:
-------
list of propID values (integers) associated with the subset
"""
if subset is None:
raise ValueError('subset arg in propIDVals cannot be None')
if subset.lower() in ('ddf', 'wfd'):
x = [propIDDict[subset.lower()]]
elif subset.lower() == 'combined':
x = [propIDDict['ddf'], propIDDict['wfd']]
elif subset.lower() in ('_all', 'unique_all'):
if proposalTable is not None:
x = proposalTable.propID.values
else:
return None
else:
raise NotImplementedError('value of subset Not recognized')
# unroll lists
l = list()
for elem in x:
if isinstance(elem, collections.Iterable):
for e in elem:
l.append(e)
else:
l.append(elem)
return l
def OpSimDfFromFile(fname, ftype='hdf', subset='Combined'):
"""
read a serialized form of the OpSim output into `pd.DataFrame`
and return a subset of interest
Parameters
----------
fname : string, mandatory
absolute path to serialized form of the OpSim database
ftype : {'sqliteDB', 'ASCII', 'hdf'}
The kind of serialized version being read from.
'sqliteDB' : `LSST` project supplied OpSim output format for
baseline cadences (eg. enigma_1189, minion_1016, etc.)
'ASCII' : `LSST` project supplied OpSim output format used in
older OpSim outputs eg. OpSim v 2.168 output
'hdf' : `hdf` files written out by `OpSimSummary`
subset : {'Combined', 'DDF', 'WFD' , 'All'}, defaults to 'Combined'
Type of OpSim output desired in the dataframe
'Combined' : unique pointings in WFD + DDF
'WFD' : Unique pointings in WFD
'DDF' : Unique pointings in DDF Cosmology
'All' : Entire Summary Table From OpSim
"""
print('This seems to have changed since first written, fixing not a priority')
raise NotImplementedError('This seems to have changed since first written')
if ftype == 'sqlite':
dbname = 'sqlite:///' + fname
engine = create_engine(dbname, echo=False)
proposalTable = pd.read_sql_table('Proposal', con=engine)
# if subset == 'DDF':
# sql
elif ftype == 'hdf' :
pass
elif ftype == 'ASCII':
pass
else:
raise NotImplementedError('ftype {} not implemented'.format(ftype))