Skip to content

Commit e91b7ba

Browse files
committed
Allow any Hijri year (drop 3+ digit year constraint and year-month-day)
1 parent b6b6376 commit e91b7ba

File tree

3 files changed

+14
-12
lines changed

3 files changed

+14
-12
lines changed

src/undate/converters/calendars/hijri/hijri.lark

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
%import common.WS
22
%ignore WS
33

4-
hijri_date: year | month year | day month year | year month | year month day
5-
// NOTE: ISMI sample dates are year month day
6-
// if we can assume years are 3 digits minimum, we can support year month day AND we can use faster LALR parser
4+
// only support day month year format for now
5+
// parser requires numeric day and year to be distinguished based on order
6+
hijri_date: day month year | month year | year
77

88
// TODO: handle date ranges?
99

1010
// TODO: add support for qualifiers?
1111
// PGP dates use qualifiers like "first decade of" (for beginning of month)
1212
// "first third of", seasons (can look for more examples)
1313

14-
// TODO: is there a minimum year we need to support?
15-
// if we can assume 3+ digit years we can distinguish between days and years,
16-
year: /\d{3,}/
14+
year: /\d+/
1715

1816
// months
1917
month: month_1

src/undate/converters/calendars/hijri/parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55
grammar_path = pathlib.Path(__file__).parent / "hijri.lark"
66

77
with open(grammar_path) as grammar:
8-
# NOTE: LALR parser is faster but requires assumption of 3+ digit years
9-
hijri_parser = Lark(grammar.read(), start="hijri_date", strict=True, parser="lalr")
8+
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
9+
hijri_parser = Lark(grammar.read(), start="hijri_date", strict=True)

tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,11 @@
3939
"7 Jumādā I 1243",
4040
"29 Muḥarram 1243",
4141
"30 Muḥarram 1243",
42-
# year month, if we can assume 3+ digit years
43-
"901 Rabīʿ I",
44-
# year month day
45-
"901 Rabīʿ I 12",
42+
"Rabīʿ I 901",
43+
"12 Rabīʿ I 901",
44+
# two and 1 digit years
45+
"12 Rabīʿ I 90",
46+
"12 Rabīʿ I 9",
4647
]
4748

4849

@@ -63,6 +64,9 @@ def test_should_parse(date_string):
6364
"Foo 383",
6465
# wrong format
6566
"2024-10-02",
67+
# year month day not supported
68+
"901 Rabīʿ I",
69+
"901 Rabīʿ I 12",
6670
]
6771

6872

0 commit comments

Comments
 (0)