Source code for imaginglss.model.datarelease

"""
Python code to look at the imaging data, containing
interfaces to deal with "bricks" and "catalogs".
This is the "highest level" interface to the
imaging data and makes use of several lower
level objects.

"""
from __future__ import print_function

import os
import os.path
import numpy
import glob
import re
from collections import namedtuple

from ..utils import fits

from . import brickindex
from . import imagerepo
from . import catalogue
from . import schema
from .sfdmap import SFDMap
__author__ = "Yu Feng and Martin White"
__version__ = "0.9"
__email__  = "yfeng1@berkeley.edu or mjwhite@lbl.gov"

import warnings


[docs]class Lazy(object): """ Lazy initialization of object attributes. """ def __init__(self, calculate_function): self._calculate = calculate_function def __get__(self, obj, _=None): if obj is None: return self value = self._calculate(obj) setattr(obj, self._calculate.func_name, value) return value
[docs]def contains(haystack, needle): """ test if needle is in haystack. Parameters ---------- haystack : array_like Sorted array needle : array_like items to look for Returns ------- mask : array_like mask[i] is true only if needle[i] is in haystack; Examples -------- >>> contains([1, 2, 3], [2]) [True] """ haystack = numpy.asarray(haystack) ind = haystack.searchsorted(needle) if len(haystack) == 0: # build a False array of the right shape return ind != ind ind.clip(0, len(haystack) - 1, ind) return haystack[ind] == needle
[docs]class Footprint(object): """ footprint of a data release. Use indexing to construct sub-regions of the footprint, for example Examples -------- >>> print datarelease.footprint[:100] Footprint(.....) Attributes ---------- bricks : list of model.brick.Brick A list of Bricks that are covered by the footprint range : tuple The range of RA and DEC of all bricks (ramin, ramax, decmin, decmax) area : float Covered outline area in square degrees """ def __init__(self, bricks, brickindex): self.bricks = bricks self.area = sum([b.area for b in bricks]) self._covered_brickids = numpy.array( [b.index for b in bricks], dtype='i8') # range of ra dec of covered bricks FootPrintRange = namedtuple('FootPrintRange', ['ramin', 'ramax', 'decmin', 'decmax', 'area']) if len(bricks) == 0: self.range = FootPrintRange(ramin=0, ramax=0, decmin=0, decmax=0, area=0) else: ramin=min([brick.ra1 for brick in self.bricks]) ramax=max([brick.ra2 for brick in self.bricks]) decmin=min([brick.dec1 for brick in self.bricks]) decmax=max([brick.dec2 for brick in self.bricks]) deg = numpy.pi / 180. self.range = FootPrintRange( ramin=ramin, ramax=ramax,decmin=decmin,decmax=decmax, area = (numpy.sin(decmax * deg) \ - numpy.sin(decmin * deg )) * (ramax - ramin) * deg \ * 129600 / numpy.pi / (4 * numpy.pi) ) self.brickindex = brickindex def __len__(self): return len(self.bricks) def __getitem__(self, index): return Footprint(self.bricks[index], self.brickindex) def __repr__(self): return "Footprint: len(bricks)=%d , area=%g degrees, range=%s" % ( len(self.bricks), self.area, str(self.range) )
[docs] def intersect(self, other): """ Returns the intersection with another footprint. """ bricks = list(set(self.bricks).intersection(set(other.bricks))) return Footprint(bricks, self.brickindex)
[docs] def union(self, other): """ Returns the union with another footprint. """ bricks = list(set(self.bricks + other.bricks)) return Footprint(bricks, self.brickindex)
[docs] def random_sample(self, Npoints, rng): """ Generate uniformly distributed points within the boundary that lie in the footprint. The random points are generated by first producing random points with int the ra and dec range of the footprint, then remove points that are not in any bricks. Parameters ---------- Npoints : int numpy of random points to sample rng : :py:class:`numpy.random.RandomState` a random number generator Returns ------- coord : array_like (2, Npoints) (RA, DEC) of the random points Notes ----- Internally, the random points are generated in batches of 1 million points. If the footprint is sparse in the bounding ra, dec range, this algorithm becomes extremely inefficient. """ coord = numpy.empty((2, Npoints)) ramin,ramax,dcmin,dcmax,area = self.range Nmake = int(area / self.area * Npoints) start = 0 while start != Npoints: u1,u2= rng.uniform(size=(2, min([Nmake, 1024 * 1024])) ) # cmin = numpy.sin(dcmin*numpy.pi/180) cmax = numpy.sin(dcmax*numpy.pi/180) # RA = ramin + u1*(ramax-ramin) DEC = 90-numpy.arccos(cmin+u2*(cmax-cmin))*180./numpy.pi # Filter out those not in any bricks: only very few points remain coord1 = self.filter((RA, DEC)) # Are we full? coord1 = coord1[:, :min(len(coord1.T), Npoints - start)] sl = slice(start, start + len(coord1.T)) coord[:, sl] = coord1 start = start + len(coord1.T) Nmake = int(area / self.area * (Npoints - start) + 1) return coord
[docs] def filter(self, coord): """ Remove coordinates that are not covered by the footprint Parameters ---------- coord : array_like must be compatible with (RA, DEC) Returns ------- coord_in_footprint : array_like items in the input coord that is in the footprint """ coord = numpy.array(coord) bid = self.brickindex.query_internal(coord) mask = contains(self._covered_brickids, bid) return coord[:, mask]
[docs]class DataRelease(object): """ The highest level interface into the data for a given imaging data release. Uses several "helper" classes and has methods for looking at pixelized data or catalogs arranged in bricks. Attributes ---------- brickindex : :py:class:`~model.brickindex.BrickIndex` an index object of all of the bricks (covering the entire sky) bands : dict a dictionary translating from band name to integer used in Tractor catalogue catalogue : :py:class:`~model.catalogue.CachedCatalogue` the concatenated tractor catalogue, accessed by attributes. extinction : array_like an array storing the extinction coeffcients. The array matches the defination of `DECAM_FLUX` column of the catalogue, u, g, r, i, z, Y. images : :py:class:`~model.imagerepo.ImageRepo` Image repositories. These image repositories are used by py:meth:`readout`. footprint : :py:class:`Footprint` the footprint of the data release. Examples -------- >>> dr = DataRelease() >>> dr.images['depth']['r'] # r band depth images. >>> dr.images['image']['r'] # r band coadd images. >>> dr.images['model']['r'] # r band tractor model images. """ def __init__(self, root, cache, version, dustdir): root = os.path.normpath(root) self.root = root self.cache = cache self.sfdmap = SFDMap(dustdir=dustdir) if not hasattr(schema, version): raise KeyError("Data Release of version %s is not supported" % version) self.version = version try: os.makedirs(self.cache) except : pass myschema = getattr(schema, self.version) self.bands = {'u':0, 'g':1, 'r':2, 'i':3, 'z':4, 'Y':5} brickdata = fits.read_table(os.path.join(self.root, myschema.BRICKS_FILENAME)) self.brickindex = brickindex.BrickIndex(brickdata) # E(B-V) to ugrizY bands, SFD98; used in tractor self.extinction = numpy.array([3.995, 3.214, 2.165, 1.592, 1.211, 1.064], dtype='f8')\ .view(dtype=[(band, 'f8') for band in 'ugrizY'])[0] try: _covered_brickids = numpy.fromfile( os.path.join(self.cache, 'covered_brickids.i8'), dtype='i8') except IOError: _covered_brickids = [ ] for roots, dirnames, filenames in \ os.walk(os.path.join(self.root, 'tractor'), followlinks=True): for filename in filenames: try: _covered_brickids.append( myschema.parse_filename(filename, self.brickindex)) except ValueError: pass _covered_brickids = numpy.array(_covered_brickids, dtype='i8') _covered_brickids.tofile(os.path.join(self.cache, 'covered_brickids.i8')) # the list of covered bricks must be sorted. _covered_brickids.sort() self._covered_brickids = _covered_brickids bricks = [self.brickindex.get_brick(bid) for bid in _covered_brickids] self.footprint = Footprint(bricks, self.brickindex) # build the footprint property self.catalogue = catalogue.BigFileCatalogue(os.path.join(self.cache, 'catalogue'), aliases=myschema.CATALOGUE_ALIASES) self.init_from_state() self.schema = myschema
[docs] def create_footprint(self, extent): """ Create a footprint based on the extent. Parameters ---------- extent : tuple, or None RA1, RA2, DEC1, DEC2. If None the full catalogue is returned """ bricks = self.brickindex.query_region(extent) return Footprint(bricks, self.brickindex)
[docs] def create_catalogue(self, footprint): """ Create a catalogue based on the footprint. Parameters ---------- footprint : Footprint created with :py:meth`DataRelease.create_footprint` """ myschema = getattr(schema, self.version) return catalogue.Catalogue(bricks=footprint.bricks, format_filename=lambda x: os.path.join(self.root, myschema.format_catalogue_filename(x)), aliases=myschema.CATALOGUE_ALIASES, columns=myscheme.CATALOGUE_COLUMNS)
[docs] def init_from_state(self): myschema = getattr(schema, self.version) bricks = [self.brickindex.get_brick(bid) for bid in self._covered_brickids] self.footprint = Footprint(bricks, self.brickindex) # build the footprint property self.images = {} image_filenames = myschema.format_image_filenames() for image in image_filenames: if isinstance(image_filenames[image], dict): self.images[image] = {} for band in image_filenames[image]: self.images[image][band] = imagerepo.ImageRepo(self.root, image_filenames[image][band], image_hdu=myschema.IMAGE_HDU) else: self.images[image] = imagerepo.ImageRepo(self.root, image_filenames[image], image_hdu=myschema.IMAGE_HDU)
def __getstate__(self): d = self.__dict__.copy() del d['footprint'] del d['images'] return d def __setstate__(self, state): self.__dict__.update(state) self.init_from_state()
[docs] def readout(self, coord, repo, default=numpy.nan, ignore_missing=False): """ Readout pixels from an image. Parameters ---------- coord : array_like coordinates of the pixels, (RA, DEC) repo : ImageRepo the images to read from. default : scalar value to return if the pixel is not in the footprint. image_missing : boolean When ignore_missing is True, missing brick files are treated as not in the footprint. Notes ----- This is here, because we want to query multiple images at the same time. It is also convenient to have it here to make use of brickindex. (ImageRepo is then just a stub with no business logic) Otherwise it makes more sense to have readout in ImageRepo. """ coord = numpy.array(coord) if coord.ndim == 1: view = coord.reshape(2, 1) else: view = coord RA, DEC = view images = numpy.empty(RA.shape, dtype='f4') images[...] = default bid = self.brickindex.query_internal(view) mask = contains(self._covered_brickids, bid) ra = RA[mask] dec = DEC[mask] if len(ra) == 0: # do not try to work if no point is within the # survey return images view, invarg = self.brickindex.optimize((ra, dec), return_inverse=True) bid = self.brickindex.query_internal(view) pixels = numpy.empty(len(bid), 'f8') pixels[:] = default ubid = numpy.unique(bid) for b in ubid: if b not in self._covered_brickids: continue brick = self.brickindex.get_brick(b) first = bid.searchsorted(b, side='left') last = bid.searchsorted(b, side='right') sl = slice(first, last) try: img = brick.readout(view[:, sl], repo, default=default) #print( 'readout', b, img) pixels[sl] = img except IOError as e: #print( 'readout', b, self.brickindex.get_brick(b), 'error', e, view) if not ignore_missing: raise else: #warnings.warn(str(e), stacklevel=2) pass # images[mask] = pixels[invarg] return images.reshape(coord[0].shape)
[docs] def read_depths(self, coord, bands=[]): """ Read the depth of given bands, return as an array Returns ------- array of dtype DECAM_DEPTH and DECAM_MW_TRANSMISSION. Notes ----- only columns corresponding to band in the bands parameter are filled. the other columns are zeros. """ dtype = numpy.dtype( [('DECAM_DEPTH', ('f4', 6)), ('DECAM_MW_TRANSMISSION', ('f4', 6))] ) coord = numpy.array(coord) output = numpy.zeros(coord[0].shape, dtype) ebv = self.sfdmap.ebv(coord[0], coord[1]) for band in bands: ind = self.bands[band] output['DECAM_DEPTH'][..., ind] = \ self.readout(coord, self.images['depth'][band], default=+0.0, ignore_missing=True) output['DECAM_MW_TRANSMISSION'][..., ind] = \ 10 ** (- ebv * self.extinction[band] / 2.5) return output