Source code for litholog.bed

import operator
import numpy as np
import matplotlib as mpl
from scipy import interpolate

from striplog import Interval

from litholog import utils


[docs]class Bed(Interval):
    """
    Represents an individual bed or layer.

    Essentially a ``striplog.Interval`` with some additional restrictions and logic.

    Beds are required to have a ``top``, ``base``, and ``data`` (which can be an array or dict-like).
    """
    def __init__(self, top, base, data, keys=None, **kwargs):
        """
        Parameters
        ----------
        top : float
            Top depth or elevation of bed.
        base : float
            Base depth or elevation of bed.
        data : array or dict-like
            Object from which to populate ``Bed`` data. If an array, must be 1-D or 2-D.
            Supported dict-like types include ``dict`` subclasses, ``pd.Series`` and ``namedtuple`` instances.
        keys : list(str), optional
            If ``data`` is an array, ``keys`` can be a list of string keys corresponding to array columns.
            Columns with a single unique value are collapsed to that value, while multi-value columns are kept as arrays.
            If ``data`` is an array and ``keys`` is ``None``, whole array is added under ``'_values'`` key.
        **kwargs
            Any additional keyword args for ``striplog.Interval`` constructor. (``components``, etc.)
        """

        if hasattr(data, 'to_dict'):        # handle `pd.Series` (e.g., from `df.iterrows()`)
            data = data.to_dict()
        elif hasattr(data, '_asdict'):      # handle `namedtuple` (e.g., from `df.itertuples()`)
            data = data._asdict()

        # `data` must either be dict subclass or np.ndarray now
        if isinstance(data, dict):
            assert all(type(k) is str for k in data.keys()), 'Only string keys allowed for dict-like Bed `data`'
            self.data = data
            _ = self.values     # just to catch bad data

        elif isinstance(data, (np.ndarray, np.generic)):
            data = np.expand_dims(data, 0) if data.ndim == 1 else data
            assert data.ndim == 2, 'Array Bed `data` cannot have more than 2 dimensions'

            if keys:
                assert len(keys) == data.shape[-1], 'Number of `keys` must match columns in array `data`'
            else:
                keys = ['_values']

            self.data = {k : None for k in keys}
            self.values = data

        else:
            raise TypeError(f'Bed `data` type must be array or dict-like, not {type(data)}')

        Interval.__init__(self, top, base=base, data=self.data, **kwargs)


    @property
    def lithology(self):
        """Just shorthand."""
        try:
            return self.primary.lithology
        except AttributeError:
            raise AttributeError(f'{self} doesnt have a lithology component')

    @property
    def values(self):
        if '_values' in self.data.keys():
            return self.data['_values']
        else:
            lens = [utils.safelen(v) for v in self.data.values()]
            assert len(set(lens)) <= 2, f'Lengths of `.data` values must be [1,N] only, found: {set(lens)} {self.data}'
            # TODO: double check that this is safe and works right
            try:
                return np.vstack([utils.saferep(v, max(lens)) for v in self.data.values()]).T
            except AssertionError:
                raise AssertionError(f'Error on: {self.data}')

    @values.setter
    def values(self, new_values):
        assert new_values.ndim == 2, f'`values` can only be set with 2D array, not {new_values.ndim}D'
        if '_values' in self.data.keys():
            # only check shape if `values` already exist
            if self['_values']:
                assert new_values.shape[1] == self.nfeatures, '`values` shape must have `nfeatures` columns'
            self.data['_values'] = new_values
        else:
            assert new_values.shape[1] == len(self.data.keys()), 'Number of columns must match len(self.data.keys())'
            for i, k in enumerate(self.data.keys()):
                new_col = new_values[:, i]
                self.data[k] = new_col[0] if np.unique(new_col).size == 1 else new_col


[docs]    def get_values(self, exclude_keys=[]):
        """
        Getter for ``values`` that allows dropping ``exclude_keys`` (e.g., sample depths) from array
        """
        values = self.values
        if not exclude_keys:
            return values

        exclude_idxs = [i for i, k in enumerate(self.data.keys()) if k in exclude_keys]
        return np.delete(values, exclude_idxs, axis=1)


    @property
    def nsamples(self):
        """
        The number of sample rows in ``values``.
        """
        return self.values.shape[0]

    @property
    def nfeatures(self):
        """
        The number of columns in ``values``.
        """
        return self.values.shape[1]


    def __getitem__(self, key):
        """
        Make the Bed instance indexable by ``data`` key or ``values`` column index.
        TODO: support for slices?
        """
        if type(key) is int and '_values' in self.data.keys():
            return self.values[:, key]
        else:
            return self.data.get(key)


[docs]    def resample_data(self, depth_key, step, kind='linear'):
        """
        Resample data to approximately ``step``, but preserving at least top/base samples.

        Parameters
        ----------
        depth_key : str, int, or None
            Dict key or column index pointing to sample depths
        step : float
            Depth step at which to (approximately) resample data values
        kind : one of {'linear','slinear','quadratic','cubic',...}, optional
            Kind of interpolation to use, default='linear'. See ``scipy.intepolate.interp1d`` docs.
        """
        old_ds = self[depth_key]
        single_sample = True if utils.safelen(old_ds) == 1 else False

        new_ds = np.linspace(self.top.z, self.base.z, num=max(2, int((abs(self.top.z-self.base.z) // step))))

        if type(depth_key) is str:
            try:
                depth_idx = list(self.data.keys()).index(depth_key)
            except IndexError:
                raise ValueError(f'`depth_key` {depth_key} not in keys {self.data.keys()}')
        elif type(depth_key) is int:
            assert depth_key < self.nfeatures, f'`depth_key` {depth_key} too large for values shape {self.values.shape}'
            depth_idx = depth_key
        else:
            raise ValueError(f'`depth_key` must be `str` or `int`, not {type(depth_key)}')

        old_xs = np.delete(self.values, depth_idx, axis=1)

        if single_sample:
            old_ds = np.array([self.top.z, self.base.z])
            old_xs = np.repeat(old_xs, 2, axis=0)

        fill_value = (old_xs[0,:], old_xs[-1,:]) if self.order is 'depth' else (old_xs[-1,:], old_xs[0,:])
        interp_fn = interpolate.interp1d(old_ds, old_xs, kind=kind, axis=0, bounds_error=False, fill_value=fill_value)

        self.values = np.insert(interp_fn(new_ds), depth_idx, new_ds, axis=1)


[docs]    def max_field(self, key):
        """
        Return the maximum value of ``data[key]``, or ``None`` if it doesn't exist.
        """
        try:
            return max(self[key])
        except TypeError:
            return self[key]


[docs]    def min_field(self, key):
        """
        Return the minimum value of ``data[key]``, or ``None`` if it doesn't exist.
        """
        try:
            return min(self[key])
        except TypeError:
            return self[key]


[docs]    def spans(self, d, eps=1e-3):
        """
        Determines if position ``d`` is within this ``Bed``.
        Overridden from ``striplog.Interval`` to accomodate small tolerance ``eps``

        Parameters
        ----------
        d : float
            Position (depth or elevation) to evaluate.

        Returns
        -------
        in_bed : bool
            True if ``d`` is within the ``Bed``, False otherwise.
        """
        o = {'depth': operator.le, 'elevation': operator.ge}[self.order]
        adjusted_top = self.top.z - eps if self.order is 'depth' else self.top.z + eps
        adjusted_base = self.base.z + eps if self.order is 'depth' else self.base.z - eps
        return (o(d, adjusted_base) and o(adjusted_top, d))


[docs]    def compatible_with(self, other):
        """
        Check that ``self.data`` and ``other.data`` have compatible ``values`` shapes and matching ``data`` key order.

        NOTE: Should both have to be constructed from similar dtypes, or just have concatable `values`?
        """
        keys_match = all([sk == ok for sk, ok in zip(self.data.keys(), other.data.keys())])
        shapes_match = self.values.shape[1] == other.values.shape[1]

        return keys_match and shapes_match


[docs]    def as_patch(self, legend,
                width_field=None,
                depth_field=None,
                min_width=0.,
                max_width=1.5,
                **kwargs):
        """
        Representation of the ``Bed`` as a ``matplotlib.patches`` object [``Polygon`` or ``Rectangle``].

        Parameters
        ----------
        legend : ``striplog.Legend``
            Legend to get a matching ``striplog.Decor`` from.
        width_field : str or int, optional
            ``data`` key or ``values`` column index to use as width field.
        depth_field : str or int, optional
            Data key or ``values`` column index to use as positions of ``width_field`` samples.
            If not provided and ``width_field`` values are iterable, created from ``np.linspace(top, base)``.
            Ignored if ``width_field`` is a scalar. Sizes must match if both fields return iterables.

        Returns
        -------
        patch : instance from ``matplotlib.patches``
        """
        decor = legend.get_decor(self.primary)

        ws = self[width_field]
        if ws is None or np.isnan(ws).all():
            ws = decor.width or 1
        #ws = (ws - min_width) / (max_width - min_width)

        # if we don't have depths, assumed samples are evenly spaced b/t `top` and `base`
        ds = self[depth_field] if depth_field else np.linspace(self.top.z, self.base.z, num=utils.safelen(ws))
        #print(self.top.z, self.base.z, '\n', ws, ds)

        patch_kwargs = {
            'fc' : kwargs.get('fc') or decor.colour,
            'lw' : kwargs.get('lw', 1),
            'hatch' : decor.hatch,
            'ec' : kwargs.get('ec', 'k'),
            **kwargs
        }

        # if `ws` is iterable, then make and return a Polygon
        if hasattr(ws, '__iter__'):
            assert len(ws) == len(ds), f'Must have equal number of width and depth sample values {self.data}'

            if not all(self.spans(d) for d in ds):
                raise ValueError(f'Depth sample values {ds} must fall between Bed top {self.top.z} and base {self.base.z}')

            return self._as_polygon(np.array(ws), np.array(ds), min_width, **patch_kwargs)

        # if `ws` is scalar, then make and return a plain Rectangle
        else:
            return self._as_polygon(np.array([ws, ws]), np.array([self.top.z, self.base.z]), min_width, **patch_kwargs)


    def _as_polygon(self, ws, ds, min_width, **kwargs):
        """
        Return the instance as a multi-width Polygon with the RHS defined by ``ws`` and ``ds``.

        ``min_width`` should be a reference to the ``width_field`` value at the y-axis (LHS).
        """
        # make sure that `ws` and `ds` are sorted for drawing
        idxs = np.argsort(ds) if self.order=='depth' else np.argsort(ds)[::-1]
        ws, ds = ws[idxs], ds[idxs]

        # extend sample points to `top` and `base` if necessary
        atol = 1e-3 # hard-coded absolute tolerance
        if np.isclose(ds[0], self.top.z, atol=atol):
            ds = [self.top.z] + list(ds)
            ws = [ws[0]] + list(ws)
        if np.isclose(ds[-1], self.base.z, atol=atol):
            ds = list(ds) + [self.base.z]
            ws = list(ws) + [ws[-1]]

        # add the two points along the y-axis
        ds = np.array([self.top.z] + list(ds) + [self.base.z])
        ws = np.array([min_width] + list(ws) + [min_width])

        return mpl.patches.Polygon(np.vstack((ws, ds)).T, closed=True, **kwargs)