diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 17a1c7a..381b231 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..58fb0f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,9 @@ name = "undate" description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals" readme = "README.md" license = { text = "Apache-2" } -requires-python = ">= 3.9" +requires-python = ">= 3.10" dynamic = ["version"] -dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] +dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "rdflib"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, @@ -31,7 +31,6 @@ keywords = [ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", diff --git a/src/undate/converters/cidoc_crm.py b/src/undate/converters/cidoc_crm.py new file mode 100644 index 0000000..ab08c9f --- /dev/null +++ b/src/undate/converters/cidoc_crm.py @@ -0,0 +1,73 @@ +import rdflib + +from undate import Undate + +#: CIDOC-CRM namespace +CIDOC_CRM = rdflib.Namespace("http://www.cidoc-crm.org/cidoc-crm/") +ISMI_DATE_TYPE = rdflib.Namespace( + "http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/" +) +ISMI_CALENDAR_TYPE = rdflib.Namespace( + "http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/" +) + + +class TimeSpan(rdflib.resource.Resource): + @property + def identified_by(self): + # by default, rdflib resource value method will return another Resource + return self.value(CIDOC_CRM.P1_is_identified_by) + + @property + def label(self): + # for ISMI records, label is under the crm identifier/appelation + # other examples have it directly under the time span as RDFS.label + return self.identified_by.value(rdflib.RDFS.label) + + @property + def calendar(self): + # for ISMI records, calendar type is associated with identifier + return self.identified_by.value(CIDOC_CRM.P2_has_type).identifier + + @property + def type(self): + # CIDOC-CRM type + return self.value(CIDOC_CRM.P2_has_type).identifier + + @property + def at_some_time_within(self): + return self.value(CIDOC_CRM.P82_at_some_time_within) + + @property + def begin_of_the_begin(self): + return self.value(CIDOC_CRM.P82a_begin_of_the_begin) + + @property + def end_of_the_end(self): + return self.value(CIDOC_CRM.P82b_end_of_the_end) + + @property + def note(self): + return self.value(CIDOC_CRM.P3_has_note) + + def to_undate(self): + # convert to an undate object, if possible + match self.type: + # day precision + case ISMI_DATE_TYPE.day: + # at_some_time_within is xsd:date; use toPython method + # to convert to datetime.date and then convert to undate + return Undate.to_undate(self.at_some_time_within.toPython()) + # TODO: should we set label before returning? + + # for ISMI dates, could we parse the label and preserve calendar information? + + @classmethod + def time_spans_from_graph(cls, graph): + """Find and return all entities with CIDOC-CRM type E52 Time-Span + within the rdflib graph and yield them as :class:`TimeSpan` + resources.""" + for timespan_uri in graph.subjects( + predicate=rdflib.RDF.type, object=CIDOC_CRM["E52_Time-Span"] + ): + yield cls(graph, timespan_uri) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33ec200..8e6cd2f 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -1,5 +1,3 @@ -import datetime - # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union @@ -25,8 +23,8 @@ class UndateInterval: latest: Union[Undate, None] label: Union[str, None] - # TODO: let's think about adding an optional precision / length /size field - # using DatePrecision + # TODO: think about adding an optional precision / length /size field + # using DatePrecision for intervals of any standard duration (decade, century) def __init__( self, @@ -34,22 +32,21 @@ def __init__( latest: Optional[Undate] = None, label: Optional[str] = None, ): - # for now, assume takes two undate objects; - # support conversion from datetime - if earliest and not isinstance(earliest, Undate): - # NOTE: some overlap with Undate._comparison_type method - # maybe support conversion from other formats later - if isinstance(earliest, datetime.date): - earliest = Undate.from_datetime_date(earliest) - else: + # takes two undate objects; allows conversion from supported types + if earliest: + try: + earliest = Undate.to_undate(earliest) + except TypeError as err: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" - ) - if latest and not isinstance(latest, Undate): - if isinstance(latest, datetime.date): - latest = Undate.from_datetime_date(latest) - else: - raise ValueError(f"latest date {latest} cannot be converted to Undate") + ) from err + if latest: + try: + latest = Undate.to_undate(latest) + except TypeError as err: + raise ValueError( + f"latest date {latest} cannot be converted to Undate" + ) from err # check that the interval is valid if latest and earliest and latest <= earliest: @@ -78,6 +75,9 @@ def __repr__(self) -> str: return "" % self def __eq__(self, other) -> bool: + # currently doesn't support comparison with any other types + if not isinstance(other, UndateInterval): + return NotImplemented # consider interval equal if both dates are equal return self.earliest == other.earliest and self.latest == other.latest @@ -122,3 +122,62 @@ def duration(self) -> Timedelta: # is there any meaningful way to calculate duration # if one year is known and the other is not? raise NotImplementedError + + def __contains__(self, other: object) -> bool: + """Determine if another interval or date falls within this + interval.""" + # support comparison with another interval + if isinstance(other, UndateInterval): + # if two intervals are strictly equal, don't consider + # either one as containing the other + if self == other: + return False + # otherwise compare based on earliest/latest bounds + other_earliest = other.earliest + other_latest = other.latest + else: + # otherwise, try to convert to an Undate + try: + other = Undate.to_undate(other) + other_latest = other_earliest = other + except TypeError: + # if conversion fails, then we don't support comparison + raise + + # if either bound of the current interval is None, + # then it is an open interval and we don't need to check the other value. + # if the other value is set, then check that it falls within the + # bounds of this interval + return ( + self.earliest is None + or other_earliest is not None + and other_earliest >= self.earliest + ) and ( + self.latest is None + or other_latest is not None + and other_latest <= self.latest + ) + + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: + """Determine the intersection or overlap between two :class:`UndateInterval` + objects and return a new interval. Returns None if there is no overlap. + """ + try: + # when both values are defined, return the inner bounds; + # if not, return whichever is not None, or None + earliest = ( + max(self.earliest, other.earliest) + if self.earliest and other.earliest + else self.earliest or other.earliest + ) + latest = ( + min(self.latest, other.latest) + if self.latest and other.latest + else self.latest or other.latest + ) + + # if this results in an invalid interval, initialization + # will throw an exception + return UndateInterval(earliest, latest) + except ValueError: + return None diff --git a/src/undate/undate.py b/src/undate/undate.py index 2008914..1b9671e 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -2,11 +2,13 @@ import datetime from enum import auto + import re from typing import TYPE_CHECKING if TYPE_CHECKING: from undate.interval import UndateInterval + try: # StrEnum was only added in python 3.11 from enum import StrEnum @@ -72,6 +74,10 @@ def __init__( label: Optional[str] = None, calendar: Optional[Union[str, Calendar]] = None, ): + # everything is optional but something is required + if all([val is None for val in [year, month, day]]): + raise ValueError("At least one of year, month, or day must be specified") + # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length self.initial_values: Dict[str, Optional[Union[int, str]]] = { @@ -242,23 +248,19 @@ def format(self, format) -> str: raise ValueError(f"Unsupported format '{format}'") - def _comparison_type(self, other: object) -> "Undate": + @classmethod + def _comparison_type(cls, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises - NotImplemented error. Currently only supports conversion - from :class:`datetime.date` + NotImplementedError exception. Uses :meth:`to_undate` for conversion. """ - - # support datetime.date by converting to undate - if isinstance(other, datetime.date): - other = Undate.from_datetime_date(other) - - # recommended to support comparison with arbitrary objects - if not isinstance(other, Undate): + # convert if possible; return NotImplemented if not + try: + return cls.to_undate(other) + except TypeError: + # recommended to support comparison with arbitrary objects return NotImplemented - return other - def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates @@ -268,6 +270,8 @@ def __eq__(self, other: object) -> bool: other = self._comparison_type(other) if other is NotImplemented: + # return NotImplemented to indicate comparison is not supported + # with this type return NotImplemented # if both dates are fully known, then earliest/latest check @@ -359,10 +363,23 @@ def __contains__(self, other: object) -> bool: ] ) - @staticmethod - def from_datetime_date(dt_date: datetime.date): - """Initialize an :class:`Undate` object from a :class:`datetime.date`""" - return Undate(dt_date.year, dt_date.month, dt_date.day) + @classmethod + def to_undate(cls, other: object) -> "Undate": + """Converted arbitrary object to Undate, if possible. Raises TypeError + if conversion is not possible. + + Currently suppports: + - :class:`datetime.date` or :class:`datetime.datetime` + + """ + match other: + case Undate(): + return other + case datetime.date() | datetime.datetime(): + return Undate(other.year, other.month, other.day) + + case _: + raise TypeError(f"Conversion from {type(other)} is not supported") @property def known_year(self) -> bool: diff --git a/tests/test_converters/test_cidoc_crm.py b/tests/test_converters/test_cidoc_crm.py new file mode 100644 index 0000000..cbcb4d0 --- /dev/null +++ b/tests/test_converters/test_cidoc_crm.py @@ -0,0 +1,73 @@ +import pathlib +import types + +import pytest +import rdflib + +from undate import Undate, DatePrecision +from undate.converters import cidoc_crm + + +# TODO: move or copy example ismi data to test for use as a fixture +ISMI_DATA_PATH = ( + pathlib.Path(__file__) + / ".." + / ".." + / ".." + / "examples" + / "use-cases" + / "ismi" + / "data" + / "ismi-crm-date-samples.ttl" +) + +DATE1_URI = rdflib.URIRef("http://content.mpiwg-berlin.mpg.de/ns/ismi/date1") + + +@pytest.fixture +def ismi_data(): + g = rdflib.Graph() + g.parse(ISMI_DATA_PATH) + return g + + +class TestTimeSpan: + def test_properties(self, ismi_data): + # initialize a time span rdflib.resource for date1 in the sample data + # TODO: convert to a fixture + # g = rdflib.Graph() + # g.parse(ISMI_DATA_PATH) + # g.parse(data=sample_data) + + time_span = cidoc_crm.TimeSpan(ismi_data, DATE1_URI) + assert time_span.type == cidoc_crm.ISMI_DATE_TYPE.day + assert time_span.label == rdflib.term.Literal("901 Rabīʿ I 14 (islamic)") + assert time_span.calendar == cidoc_crm.ISMI_CALENDAR_TYPE.islamic + assert time_span.at_some_time_within == rdflib.term.Literal( + "1495-12-11", datatype=rdflib.XSD.date + ) + assert time_span.note == rdflib.term.Literal( + "day-precision date in islamic calendar" + ) + + def test_time_spans_from_graph(self, ismi_data): + time_spans = cidoc_crm.TimeSpan.time_spans_from_graph(ismi_data) + assert isinstance(time_spans, types.GeneratorType) + time_spans = list(time_spans) + # fixture has 9 time spans + assert len(time_spans) == 9 + assert isinstance(time_spans[0], cidoc_crm.TimeSpan) + assert time_spans[0].identifier == DATE1_URI + + def test_to_undate(self, ismi_data): + time_span = cidoc_crm.TimeSpan(ismi_data, DATE1_URI) + ts_undate = time_span.to_undate() + assert isinstance(ts_undate, Undate) + # 1495-12-11"^^xsd:date ; + assert ts_undate.year == "1495" + assert ts_undate.month == "12" + assert ts_undate.day == "11" + assert ts_undate.precision == DatePrecision.DAY + + # if we round trip the date it comes out the same + assert ts_undate.format("ISO8601") == str(time_span.at_some_time_within) diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5210e98..3262e46 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,6 +1,5 @@ import pytest from undate.converters.edtf import EDTFDateConverter -from undate.date import DatePrecision from undate import Undate, UndateInterval @@ -64,8 +63,8 @@ def test_to_string(self): # if converter can't generate a string for the date, # it should return a value error - empty_undate = Undate() - empty_undate.precision = DatePrecision.DECADE - with pytest.raises(ValueError): - EDTFDateConverter().to_string(empty_undate) + # empty_undate = Undate() # undate with no date information no longer supported + # empty_undate.precision = DatePrecision.DECADE + # with pytest.raises(ValueError): + # EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement diff --git a/tests/test_interval.py b/tests/test_interval.py index dea8710..40713b1 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -82,6 +82,12 @@ def test_eq(self): ) assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + def test_eq_type_check(self): + # doesn't currently support comparison with anything else + interval = UndateInterval(Undate(900)) + # returns NotImplemented if comparison with this type is not supported + assert interval.__eq__("foo") == NotImplemented + def test_not_eq(self): assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( Undate(2022), Undate(2024) @@ -143,3 +149,83 @@ def test_duration(self): # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError): UndateInterval(Undate(2000), Undate(month=10)).duration() + + def test_intersection(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + # no intersection + assert century11th.intersection(century20th) is None + # should work in either direction + assert century20th.intersection(century11th) is None + + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # intersection of an interval completely contained in another + # returns an interval equivalent to the smaller one + assert century20th.intersection(decade1990s) == decade1990s + assert decade1990s.intersection(century20th) == decade1990s + + # partial overlap + nineties_oughts = UndateInterval(Undate(1990), Undate(2009)) + assert century20th.intersection(nineties_oughts) == UndateInterval( + Undate(1990), Undate(2000) + ) + + # intersections between half open intervals + after_c11th = UndateInterval(Undate(1001), None) + assert after_c11th.intersection(century20th) == century20th + assert after_c11th.intersection(decade1990s) == decade1990s + + before_20th = UndateInterval(None, Undate(1901)) + assert before_20th.intersection(decade1990s) is None + assert before_20th.intersection(century11th) == century11th + assert before_20th.intersection(after_c11th) == UndateInterval( + Undate(1001), Undate(1901) + ) + + def test_contains(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # an interval doesn't contain itself + for interval in [century11th, century20th, decade1990s]: + assert interval not in interval + + # checking if an interval is within another interval + assert decade1990s in century20th + assert decade1990s not in century11th + assert century11th not in decade1990s + assert century20th not in decade1990s + # a specific date can be contained by an interval + y2k = Undate(2000) + assert y2k in century20th + assert y2k not in century11th + # partially known date should work too + april_someyear = Undate("198X", 4) + assert april_someyear in century20th + assert april_someyear not in century11th + # conversion from datetime.date also works + assert datetime.date(1922, 5, 1) in century20th + # unsupported types result in a type error + with pytest.raises(TypeError): + assert "nineteen-eighty-four" in century20th + + # contains check with half-open intervals + after_c11th = UndateInterval(Undate(1001), None) + before_20th = UndateInterval(None, Undate(1901)) + # neither of them contains the other + assert after_c11th not in before_20th + assert before_20th not in after_c11th + # nor are they contained by a smaller range + assert after_c11th not in decade1990s + assert before_20th not in decade1990s + + # all of our previous test dates are in the 1900s, + # so they are after the 11th century and not before the 20th + for period in [decade1990s, y2k, april_someyear]: + assert period in after_c11th + assert period not in before_20th + + # fully open interval - is this even meaningful? + whenever = UndateInterval(None, None) + assert decade1990s in whenever + assert whenever not in whenever diff --git a/tests/test_undate.py b/tests/test_undate.py index 8f8a5c8..a9087c2 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,4 +1,4 @@ -from datetime import date +from datetime import date, datetime import pytest @@ -132,18 +132,30 @@ def test_calendar(self): def test_init_invalid(self): with pytest.raises(ValueError): - Undate("19xx") + Undate("19??") + + with pytest.raises(ValueError, match="At least one of year, month, or day"): + Undate() def test_invalid_date(self): # invalid month should raise an error with pytest.raises(ValueError): Undate(1990, 22) - def test_from_datetime_date(self): - undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + def test_to_undate(self): + undate_from_date = Undate.to_undate(date(2001, 3, 5)) assert isinstance(undate_from_date, Undate) assert undate_from_date == Undate(2001, 3, 5) + now = datetime.now() + undate_from_dt = Undate.to_undate(now) + assert isinstance(undate_from_dt, Undate) + assert undate_from_dt == Undate(now.year, now.month, now.day) + + # unsupported type + with pytest.raises(TypeError): + Undate.to_undate("foo") + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string @@ -156,10 +168,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # NOTE: no longer supported to inistalize undate with no date information # force method to hit conditional for date precision - some_century = Undate() - some_century.precision = DatePrecision.CENTURY - assert some_century.year is None + # some_century = Undate() + # some_century.precision = DatePrecision.CENTURY + # assert some_century.year is None def test_month_property(self): # one, two digit month