import typing as ty
import re
from enum import Enum
from arcana.core.utils import class_location, resolve_class
[docs]class DataSpace(Enum):
"""
Base class for all "data space" enums. DataSpace enums specify
the relationships between rows of a dataset.
For example in imaging studies, scannings sessions are typically organised
by analysis group (e.g. test & control), membership within the group (i.e
matched subjects) and time-points (for longitudinal studies). We can
visualise the rows arranged in a 3-D grid along the `group`, `member`, and
`timepoint` dimensions. Note that datasets that only contain one group or
time-point can still be represented in the same space, and just be of
depth=1 along those dimensions.
All dimensions should be included as members of a DataSpace subclass
enum with orthogonal binary vector values, e.g.
member = 0b001
group = 0b010
timepoint = 0b100
In this space, an imaging session row is uniquely defined by its member,
group and timepoint ID. The most commonly present dimension should be given
the least frequent bit (e.g. imaging datasets will not always have
different groups or time-points but will always have different members
(equivalent to subjects when there is one group).
In addition to the data items stored in the data rows for each session,
some items only vary along a particular dimension of the grid. The
"row_frequency" of these rows can be specified using the "basis" members
(i.e. member, group, timepoint) in contrast to the `session` row_frequency,
which is the combination of all three
session = 0b111
Additionally, some data is stored in aggregated rows that across a plane
of the grid. These frequencies should also be added to the enum (all
combinations of the basis frequencies must be included) and given intuitive
names if possible, e.g.
subject = 0b011 - uniquely identified subject within in the dataset.
batch = 0b110 - separate group+timepoint combinations
matchedpoint = 0b101 - matched members and time-points aggregated across groups
Finally, for items that are singular across the whole dataset there should
also be a dataset-wide member with value=0:
dataset = 0b000
"""
def __str__(self):
return self.name
@classmethod
def leaf(cls):
return max(cls)
@classmethod
def axes(cls):
return cls.leaf().span()
def span(self):
"""Returns the basis dimensions in the data tree that the given
enum-member projects into.
For example in `Clinical` data trees, the following frequencies can
be decomposed into the following basis dims:
dataset -> []
group -> [group]
member -> [member]
timepoint -> [timepoint]
subject -> [group, member]
batch -> [timepoint, group]
matchedpoint -> [timepoint, member]
session -> [timepoint, group, member]
"""
# Check which bits are '1', and append them to the list of levels
cls = type(self)
return [cls(b) for b in sorted(self.nonzero_bits(), reverse=True)]
def nonzero_bits(self):
v = self.value
nonzero = []
while v:
w = v & (v - 1)
nonzero.append(w ^ v)
v = w
return nonzero
def __iter__(self):
"Iterate over bit string"
bit = (max(type(self)).value + 1) >> 1
while bit > 0:
yield bool(self.value & bit)
bit >>= 1
def is_basis(self):
return len(self._nonzero_bits()) == 1
def __eq__(self, other):
return self.value == other.value
def __lt__(self, other):
return self.value < other.value
def __le__(self, other):
return self.value <= other.value
def __xor__(self, other):
return type(self)(self.value ^ other.value)
def __and__(self, other):
return type(self)(self.value & other.value)
def __or__(self, other):
return type(self)(self.value | other.value)
def __invert__(self):
return type(self)(~self.value)
def __hash__(self):
return self.value
def __bool__(self):
return bool(self.value)
def bin(self):
return bin(self.value)
@classmethod
def union(cls, freqs: ty.Sequence[Enum]):
"Returns the union between data row_frequency values"
union = cls(0)
for f in freqs:
union |= f
return union
@classmethod
def default(cls):
return max(cls)
def is_parent(self, child, if_match=False):
"""Checks to see whether the current row_frequency is a "parent" of the
other data row_frequency, i.e. all the base row_frequency of self appear in
the "child".
Parameters
----------
child : DataSpace
The data row_frequency to check parent/child relationship with
if_match : bool
Treat matching frequencies as "parents" of each other
Returns
-------
bool
True if self is parent of child
"""
return ((self & child) == self) and (child != self or if_match)
def tostr(self):
return f"{class_location(self)}[{str(self)}]"
@classmethod
def fromstr(cls, s):
class_loc, val = re.match(r"(.*)\[([^\]]+)\]", s).groups()
return resolve_class(class_loc)[val]