From 335c1f9218f7bc47ce48b39b7a5bd26a8f760d72 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:40:47 +1100 Subject: [PATCH 01/62] added some actions --- .github/workflows/lint.yaml | 29 +++++++++++++++++++++++++++++ .github/workflows/test.yaml | 28 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 .github/workflows/lint.yaml create mode 100644 .github/workflows/test.yaml diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..fcf128a --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,29 @@ +name: golangci-lint +on: + push: + tags: + - v* + branches: + - master + - main + pull_request: +jobs: + golangci: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Cache-Go + uses: actions/cache@v1 + with: + path: | + ~/go/pkg/mod # Module download cache + ~/.cache/go-build # Build cache (Linux) + ~/Library/Caches/go-build # Build cache (Mac) + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + - name: golangci-lint + uses: golangci/golangci-lint-action@v2 + with: + version: latest diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..a882731 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,28 @@ +on: [push, pull_request] +name: Test +jobs: + test: + strategy: + matrix: + go-version: [1.19.x] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Install Go + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go-version }} + - name: Checkout code + uses: actions/checkout@v2 + - name: Cache-Go + uses: actions/cache@v1 + with: + path: | + ~/go/pkg/mod # Module download cache + ~/.cache/go-build # Build cache (Linux) + ~/Library/Caches/go-build # Build cache (Mac) + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + - name: Test + run: go test ./... From 26d95ba3e65c512b6f7ea23f7ffaedc7189d6e28 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:46:27 +1100 Subject: [PATCH 02/62] My issue --- parseany_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/parseany_test.go b/parseany_test.go index 7fea1e6..2759fc8 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -21,6 +21,7 @@ type dateTest struct { } var testInputs = []dateTest{ + {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 19:00:46 +1100 AEDT"}, {in: "oct 7, 1970", out: "1970-10-07 00:00:00 +0000 UTC"}, {in: "oct 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, {in: "Oct 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, From 5335e6fe23159b2ccc2ca2b60d74e0482ef964ca Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:49:18 +1100 Subject: [PATCH 03/62] Error return value is not checked (errcheck) --- bench_test.go | 4 ++-- go.mod | 2 +- parseany.go | 11 ++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/bench_test.go b/bench_test.go index 0c6739a..575d795 100644 --- a/bench_test.go +++ b/bench_test.go @@ -28,7 +28,7 @@ func BenchmarkShotgunParse(b *testing.B) { for i := 0; i < b.N; i++ { for _, dateStr := range testDates { // This is the non dateparse traditional approach - parseShotgunStyle(dateStr) + _ = parseShotgunStyle(dateStr) } } } @@ -37,7 +37,7 @@ func BenchmarkParseAny(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { for _, dateStr := range testDates { - ParseAny(dateStr) + _ = ParseAny(dateStr) } } } diff --git a/go.mod b/go.mod index 071cd5e..3376d23 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/araddon/dateparse -go 1.12 +go 1.19 require ( github.com/mattn/go-runewidth v0.0.10 // indirect diff --git a/parseany.go b/parseany.go index b9668b2..3214143 100644 --- a/parseany.go +++ b/parseany.go @@ -234,7 +234,10 @@ func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) { func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *parser, err error) { - p = newParser(datestr, loc, opts...) + p, err = newParser(datestr, loc, opts...) + if err != nil { + return + } if p.retryAmbiguousDateWithSwap { // month out of range signifies that a day/month swap is the correct solution to an ambiguous date // this is because it means that a day is being interpreted as a month and overflowing the valid value for that @@ -2008,7 +2011,7 @@ func RetryAmbiguousDateWithSwap(retryAmbiguousDateWithSwap bool) ParserOption { } } -func newParser(dateStr string, loc *time.Location, opts ...ParserOption) *parser { +func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) { p := &parser{ stateDate: dateStart, stateTime: timeIgnore, @@ -2021,7 +2024,9 @@ func newParser(dateStr string, loc *time.Location, opts ...ParserOption) *parser // allow the options to mutate the parser fields from their defaults for _, option := range opts { - option(p) + if err := option(p); err != nil { + return nil, fmt.Sprintf("option error: %w", err) + } } return p } From 14cb70eacb3a918a0871e4696b2f32c2d15ff210 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:50:03 +1100 Subject: [PATCH 04/62] field `offsetlen` is unused (unused) --- parseany.go | 1 - 1 file changed, 1 deletion(-) diff --git a/parseany.go b/parseany.go index 3214143..f20e8fd 100644 --- a/parseany.go +++ b/parseany.go @@ -1985,7 +1985,6 @@ type parser struct { msi int mslen int offseti int - offsetlen int tzi int tzlen int t *time.Time From 2fb4c46691d362a2a2ef7715f6da39054ee72f28 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:50:42 +1100 Subject: [PATCH 05/62] S1021: should merge variable declaration with assignment on next line (gosimple) --- parseany_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parseany_test.go b/parseany_test.go index 2759fc8..5760e37 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -10,8 +10,7 @@ import ( func TestOne(t *testing.T) { time.Local = time.UTC - var ts time.Time - ts = MustParse("2020-07-20+08:00") + var ts time.Time = MustParse("2020-07-20+08:00") assert.Equal(t, "2020-07-19 16:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } From 5143d47e3e263c4fee712bf3cb87cd54edbe0298 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:51:14 +1100 Subject: [PATCH 06/62] S1023: redundant break statement (gosimple) --- parseany.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/parseany.go b/parseany.go index f20e8fd..a37bec5 100644 --- a/parseany.go +++ b/parseany.go @@ -1442,7 +1442,6 @@ iterRunes: if datestr[i-1] == 'm' { p.extra = i - 2 p.trimExtra() - break } case '+', '-', '(': // This really doesn't seem valid, but for some reason when round-tripping a go date @@ -1452,7 +1451,6 @@ iterRunes: p.extra = i - 1 p.stateTime = timeWsOffset p.trimExtra() - break default: switch { case unicode.IsDigit(r): From 57a1767ebd6d936dea7700633b1a9f977e17b984 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:51:48 +1100 Subject: [PATCH 07/62] SA4006: this value of `err` is never used (staticcheck) --- parseany.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index a37bec5..02074e3 100644 --- a/parseany.go +++ b/parseany.go @@ -253,7 +253,7 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par // turn off the retry to avoid endless recursion retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap) - p, err = parseTime(datestr, time.Local, modifiedOpts...) + p, _ = parseTime(datestr, time.Local, modifiedOpts...) } } From ad0ab84f6bfab649b0039583ce81c8c06d4ba58f Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:53:01 +1100 Subject: [PATCH 08/62] Lint action out of date. --- .github/workflows/lint.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index fcf128a..1a998e5 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -24,6 +24,6 @@ jobs: restore-keys: | ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - name: golangci-lint - uses: golangci/golangci-lint-action@v2 + uses: golangci/golangci-lint-action@v3 with: version: latest From a8e238d5d1b0afd42e78b3ebb5dd7facd7a071a3 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:55:15 +1100 Subject: [PATCH 09/62] Go mod tidy --- go.mod | 9 ++++++++- go.sum | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 3376d23..baa649f 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,14 @@ module github.com/araddon/dateparse go 1.19 require ( - github.com/mattn/go-runewidth v0.0.10 // indirect github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4 github.com/stretchr/testify v1.7.0 ) + +require ( + github.com/davecgh/go-spew v1.1.0 // indirect + github.com/mattn/go-runewidth v0.0.10 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rivo/uniseg v0.1.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/go.sum b/go.sum index 40bf744..370a49d 100644 --- a/go.sum +++ b/go.sum @@ -8,7 +8,6 @@ github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4 h1:8qmTC5ByIXO3GP/IzBkxcZ/99VITvnIETDhdFz/om7A= github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg= -github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= From e654ac7b35439e0f54497a819b012a4b3f855322 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:56:17 +1100 Subject: [PATCH 10/62] Bug fixes. --- parseany.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/parseany.go b/parseany.go index 02074e3..79cb927 100644 --- a/parseany.go +++ b/parseany.go @@ -170,15 +170,14 @@ func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Tim // Set Location to time.Local. Same as ParseIn Location but lazily uses // the global time.Local variable for Location argument. // -// denverLoc, _ := time.LoadLocation("America/Denver") -// time.Local = denverLoc +// denverLoc, _ := time.LoadLocation("America/Denver") +// time.Local = denverLoc // -// t, err := dateparse.ParseLocal("3/1/2014") +// t, err := dateparse.ParseLocal("3/1/2014") // // Equivalent to: // -// t, err := dateparse.ParseIn("3/1/2014", denverLoc) -// +// t, err := dateparse.ParseIn("3/1/2014", denverLoc) func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) { p, err := parseTime(datestr, time.Local, opts...) if err != nil { @@ -204,9 +203,8 @@ func MustParse(datestr string, opts ...ParserOption) time.Time { // ParseFormat parse's an unknown date-time string and returns a layout // string that can parse this (and exact same format) other date-time strings. // -// layout, err := dateparse.ParseFormat("2013-02-01 00:00:00") -// // layout = "2006-01-02 15:04:05" -// +// layout, err := dateparse.ParseFormat("2013-02-01 00:00:00") +// // layout = "2006-01-02 15:04:05" func ParseFormat(datestr string, opts ...ParserOption) (string, error) { p, err := parseTime(datestr, nil, opts...) if err != nil { @@ -2022,10 +2020,10 @@ func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parse // allow the options to mutate the parser fields from their defaults for _, option := range opts { if err := option(p); err != nil { - return nil, fmt.Sprintf("option error: %w", err) + return nil, fmt.Errorf("option error: %w", err) } } - return p + return p, nil } func (p *parser) nextIs(i int, b byte) bool { From cefe5b3dbe7d5586690a7a6850bf21313cd3e631 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:56:43 +1100 Subject: [PATCH 11/62] More typo changes --- bench_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bench_test.go b/bench_test.go index 575d795..4322b86 100644 --- a/bench_test.go +++ b/bench_test.go @@ -7,7 +7,6 @@ import ( ) /* - go test -bench Parse BenchmarkShotgunParse 50000 37588 ns/op 13258 B/op 167 allocs/op @@ -21,14 +20,13 @@ BenchmarkParseAny-4 200000 8627 ns/op 144 B/op 3 allo BenchmarkShotgunParse-8 50000 33940 ns/op 13136 B/op 169 allocs/op BenchmarkParseAny-8 200000 10146 ns/op 912 B/op 29 allocs/op BenchmarkParseDateString-8 10000 123077 ns/op 208 B/op 13 allocs/op - */ func BenchmarkShotgunParse(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { for _, dateStr := range testDates { // This is the non dateparse traditional approach - _ = parseShotgunStyle(dateStr) + _, _ = parseShotgunStyle(dateStr) } } } @@ -37,7 +35,7 @@ func BenchmarkParseAny(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { for _, dateStr := range testDates { - _ = ParseAny(dateStr) + _, _ = ParseAny(dateStr) } } } From 4345a38e91b61174f1140f5313b8e55f3c9f3302 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 15:57:15 +1100 Subject: [PATCH 12/62] Another error --- parseany_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/parseany_test.go b/parseany_test.go index 5760e37..e4f9de9 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -488,7 +488,10 @@ func TestPStruct(t *testing.T) { denverLoc, err := time.LoadLocation("America/Denver") assert.Equal(t, nil, err) - p := newParser("08.21.71", denverLoc) + p, err := newParser("08.21.71", denverLoc) + if err != nil { + t.Fatalf("Parser build error: %s", err) + } p.setMonth() assert.Equal(t, 0, p.moi) From 515cd8176782b7e9ba7d31866085bbd56294d2e9 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:02:13 +1100 Subject: [PATCH 13/62] S1023: redundant break statement (gosimple) --- parseany.go | 1 - 1 file changed, 1 deletion(-) diff --git a/parseany.go b/parseany.go index 79cb927..c92ceec 100644 --- a/parseany.go +++ b/parseany.go @@ -1595,7 +1595,6 @@ iterRunes: // 00:00:00.000 +0300 +0300 p.extra = i - 1 p.trimExtra() - break default: if unicode.IsLetter(r) { // 00:07:31.945167 +0000 UTC From eabb56b497f00476613f1579f7dde062ae4375dd Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:04:58 +1100 Subject: [PATCH 14/62] Text should be lowercase --- parseany.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index c92ceec..c5d0716 100644 --- a/parseany.go +++ b/parseany.go @@ -133,7 +133,7 @@ const ( var ( // ErrAmbiguousMMDD for date formats such as 04/02/2014 the mm/dd vs dd/mm are // ambiguous, so it is an error for strict parse rules. - ErrAmbiguousMMDD = fmt.Errorf("This date has ambiguous mm/dd vs dd/mm type format") + ErrAmbiguousMMDD = fmt.Errorf("this date has ambiguous mm/dd vs dd/mm type format") ) func unknownErr(datestr string) error { From c5b562ac1ae744092d1a6ca7a2588526ee3d7c93 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:06:01 +1100 Subject: [PATCH 15/62] Added go releaser --- .github/workflows/releaser.yml | 39 ++++++++++++++++++++++++++++++++++ .goreleaser.yml | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 .github/workflows/releaser.yml create mode 100644 .goreleaser.yml diff --git a/.github/workflows/releaser.yml b/.github/workflows/releaser.yml new file mode 100644 index 0000000..cc06e22 --- /dev/null +++ b/.github/workflows/releaser.yml @@ -0,0 +1,39 @@ +name: releaser + +on: + push: + tags: + - 'v*.*.*' + +jobs: + goreleaser: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.19 + - name: Cache-Go + uses: actions/cache@v1 + with: + path: | + ~/go/pkg/mod # Module download cache + ~/.cache/go-build # Build cache (Linux) + ~/Library/Caches/go-build # Build cache (Mac) + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + - name: Test + run: go test ./... + - name: Run GoReleaser + uses: goreleaser/goreleaser-action@v2 + with: + distribution: goreleaser + version: latest + args: release --rm-dist + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.goreleaser.yml b/.goreleaser.yml new file mode 100644 index 0000000..dab3f5c --- /dev/null +++ b/.goreleaser.yml @@ -0,0 +1,38 @@ +project_name: dateparse +builds: + - + id: "dateparse" + binary: "dateparse" + dir: dateparse + - + id: "example" + binary: "example" + dir: example +archives: + - + format_overrides: + - goos: windows + format: zip +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ .Tag }}-next" +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' +nfpms: + - + vendor: dateparse + homepage: https://github.com/araddon/dateparse + maintainer: n/a + description: NA + formats: + - apk + - deb + - rpm + release: 1 + section: default + priority: extra From 094aad3f21a5f6f2c2c21bbd109fbf0c32800e24 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:09:32 +1100 Subject: [PATCH 16/62] Commented code --- parseany.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/parseany.go b/parseany.go index c5d0716..042c2fb 100644 --- a/parseany.go +++ b/parseany.go @@ -468,8 +468,6 @@ iterRunes: switch r { case ':': p.set(p.offseti, "-07:00") - // case ' ': - // return nil, unknownErr(datestr) } case dateYearDashAlphaDash: From 53a8cbdf09066b72c98a5d3b61d478a2aa6337e5 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:10:45 +1100 Subject: [PATCH 17/62] Unnecessary bracket --- parseany.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index 042c2fb..73b0585 100644 --- a/parseany.go +++ b/parseany.go @@ -269,7 +269,7 @@ iterRunes: //r := rune(datestr[i]) r, bytesConsumed := utf8.DecodeRuneInString(datestr[i:]) if bytesConsumed > 1 { - i += (bytesConsumed - 1) + i += bytesConsumed - 1 } // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, datestr) From 544b5426f4e03bcc4bc30e85a876848e6cf8871c Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:20:46 +1100 Subject: [PATCH 18/62] Test improvements.. I think --- parseany_test.go | 51 +++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/parseany_test.go b/parseany_test.go index e4f9de9..7582465 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -10,7 +10,7 @@ import ( func TestOne(t *testing.T) { time.Local = time.UTC - var ts time.Time = MustParse("2020-07-20+08:00") + var ts = MustParse("2020-07-20+08:00") assert.Equal(t, "2020-07-19 16:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } @@ -435,28 +435,35 @@ func TestParse(t *testing.T) { assert.NotEqual(t, nil, err) for _, th := range testInputs { - if len(th.loc) > 0 { - loc, err := time.LoadLocation(th.loc) - if err != nil { - t.Fatalf("Expected to load location %q but got %v", th.loc, err) + t.Run(th.in, func(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Fatalf("error: %s", r) + } + }() + if len(th.loc) > 0 { + loc, err := time.LoadLocation(th.loc) + if err != nil { + t.Fatalf("Expected to load location %q but got %v", th.loc, err) + } + ts, err = ParseIn(th.in, loc) + if err != nil { + t.Fatalf("expected to parse %q but got %v", th.in, err) + } + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + } else { + ts = MustParse(th.in) + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } } - ts, err = ParseIn(th.in, loc) - if err != nil { - t.Fatalf("expected to parse %q but got %v", th.in, err) - } - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - panic("whoops") - } - } else { - ts = MustParse(th.in) - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - panic("whoops") - } - } + }) } // some errors From c5a1edc7107fb3e49b4174e15c4a68e080e04694 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:24:05 +1100 Subject: [PATCH 19/62] My addition last --- parseany_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parseany_test.go b/parseany_test.go index 7582465..5d86f31 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -20,7 +20,6 @@ type dateTest struct { } var testInputs = []dateTest{ - {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 19:00:46 +1100 AEDT"}, {in: "oct 7, 1970", out: "1970-10-07 00:00:00 +0000 UTC"}, {in: "oct 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, {in: "Oct 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, @@ -417,6 +416,8 @@ var testInputs = []dateTest{ {in: "1384216367111", out: "2013-11-12 00:32:47.111 +0000 UTC"}, {in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC"}, {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC"}, + + {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, } func TestParse(t *testing.T) { From 268a69008173d53ed659b40a278c29c454b850ff Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:26:18 +1100 Subject: [PATCH 20/62] So people don't have to check the string they can use the new errors.Is function --- parseany.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/parseany.go b/parseany.go index 73b0585..dd0ea85 100644 --- a/parseany.go +++ b/parseany.go @@ -133,11 +133,12 @@ const ( var ( // ErrAmbiguousMMDD for date formats such as 04/02/2014 the mm/dd vs dd/mm are // ambiguous, so it is an error for strict parse rules. - ErrAmbiguousMMDD = fmt.Errorf("this date has ambiguous mm/dd vs dd/mm type format") + ErrAmbiguousMMDD = fmt.Errorf("this date has ambiguous mm/dd vs dd/mm type format") + ErrCouldntFindFormat = fmt.Errorf("could not find format for") ) func unknownErr(datestr string) error { - return fmt.Errorf("Could not find format for %q", datestr) + return fmt.Errorf("%w %q", ErrCouldntFindFormat, datestr) } // ParseAny parse an unknown date format, detect the layout. From bf3a5b3040cd0f3aa6c17112d3f2d7b0a740985b Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:27:43 +1100 Subject: [PATCH 21/62] Skip white space - to delete strategically --- parseany.go | 116 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 109 insertions(+), 7 deletions(-) diff --git a/parseany.go b/parseany.go index dd0ea85..915051c 100644 --- a/parseany.go +++ b/parseany.go @@ -383,6 +383,9 @@ iterRunes: } case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } // 18 January 2018 // 8 January 2018 // 8 jan 2018 @@ -446,6 +449,9 @@ iterRunes: p.stateDate = dateYearDashDashOffset p.setDay() case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.daylen = i - p.dayi p.stateDate = dateYearDashDashWs p.stateTime = timeStart @@ -506,6 +512,9 @@ iterRunes: // 29-Jun-2016 dd-month(alpha)-yyyy switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } // we need to find if this was 4 digits, aka year // or 2 digits which makes it ambiguous year/day length := i - (p.moi + p.molen + 1) @@ -538,7 +547,12 @@ iterRunes: // I honestly don't know if this format ever shows up as yyyy/ switch r { - case ' ', ':': + case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } + fallthrough + case ':': p.stateTime = timeStart if p.daylen == 0 { p.daylen = i - p.dayi @@ -566,7 +580,12 @@ iterRunes: } // We aren't breaking because we are going to re-use this case // to find where the date starts, and possible time begins - case ' ', ':': + case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } + fallthrough + case ':': p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari @@ -604,6 +623,9 @@ iterRunes: // Note no break, we are going to pass by and re-enter this dateDigitSlash // and look for ending (space) or not (just date) case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari @@ -625,6 +647,9 @@ iterRunes: switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari @@ -662,6 +687,9 @@ iterRunes: // 12 Feb 2006, 19:17:22 switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.yeari = i + 1 //p.yearlen = 4 p.dayi = 0 @@ -696,6 +724,9 @@ iterRunes: i++ break iterRunes case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.yearlen = i - p.yeari p.setYear() break iterRunes @@ -710,6 +741,9 @@ iterRunes: // weekday %Y年%m月%e日 %A %I:%M %p // 2013年07月18日 星期四 10:27 上午 if r == ' ' { + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.stateDate = dateDigitChineseYearWs break } @@ -770,6 +804,9 @@ iterRunes: case r == ' ': // X // April 8, 2009 + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } if i > 3 { // Check to see if the alpha is name of month? or Day? month := strings.ToLower(datestr[0:i]) @@ -877,6 +914,9 @@ iterRunes: p.setDay() p.stateDate = dateAlphaWsDigitMore } else if r == ' ' { + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.daylen = i - p.dayi p.setDay() p.yeari = i + 1 @@ -900,6 +940,9 @@ iterRunes: p.yeari = 0 break iterRunes } else if r == ' ' { + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } // must be year format, not 15:04 p.yearlen = i - p.yeari p.setYear() @@ -913,6 +956,9 @@ iterRunes: // oct 1, 1970 // oct 7, '70 if r == ' ' { + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.yeari = i + 1 p.stateDate = dateAlphaWsDigitMoreWs } @@ -925,7 +971,12 @@ iterRunes: switch r { case '\'': p.yeari = i + 1 - case ' ', ',': + case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } + fallthrough + case ',': // x // May 8, 2009 5:57:51 PM // x @@ -941,7 +992,12 @@ iterRunes: // April 8, 2009 // April 8 2009 switch r { - case ' ', ',': + case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } + fallthrough + case ',': // x // June 8, 2009 // x @@ -973,6 +1029,9 @@ iterRunes: i++ break iterRunes case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.yearlen = i - p.yeari p.setYear() p.stateTime = timeStart @@ -1027,6 +1086,9 @@ iterRunes: } case r == ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } // x // January 02 2006, 15:04:05 p.daylen = i - p.dayi @@ -1050,6 +1112,9 @@ iterRunes: // oct. 7, '70 switch { case r == ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } // continue case unicode.IsDigit(r): p.stateDate = dateAlphaWsDigit @@ -1066,7 +1131,12 @@ iterRunes: p.dayi = i } switch r { - case ' ', '-': + case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } + fallthrough + case '-': if p.moi == 0 { p.moi = i + 1 p.daylen = i - p.dayi @@ -1088,7 +1158,12 @@ iterRunes: // Tue, 11 Jul 2017 16:28:13 +0200 (CEST) // Mon, 02-Jan-06 15:04:05 MST switch r { - case ' ', '-': + case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } + fallthrough + case '-': if p.dayi == 0 { p.dayi = i + 1 } else if p.moi == 0 { @@ -1215,7 +1290,10 @@ iterRunes: if p.nextIs(i, ' ') { // x // September 17, 2012 at 5:00pm UTC-05 - i++ // skip ' + i++ // skip ' + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.houri = 0 // reset hour } } else { @@ -1240,6 +1318,9 @@ iterRunes: p.set(i, "PM") } case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.coalesceTime(i) p.stateTime = timeWs case ':': @@ -1337,6 +1418,9 @@ iterRunes: p.stateTime = timeWsAlphaZoneOffset p.offseti = i case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } // 17:57:51 MST 2009 // 17:57:51 MST p.tzlen = i - p.tzi @@ -1361,6 +1445,9 @@ iterRunes: // 15:44:11 UTC+0100 2015 switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.set(p.offseti, "-0700") if p.yeari == 0 { p.yeari = i + 1 @@ -1423,6 +1510,9 @@ iterRunes: case ':': p.stateTime = timeWsOffsetColon case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.set(p.offseti, "-0700") p.yeari = i + 1 p.stateTime = timeWsOffsetWs @@ -1503,6 +1593,9 @@ iterRunes: // 06:20:00.000 UTC switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.mslen = i - p.msi p.stateTime = timePeriodWs case '+', '-': @@ -1533,6 +1626,9 @@ iterRunes: // 13:31:51.999 -07:00 MST switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.set(p.offseti, "-07:00") p.stateTime = timePeriodOffsetColonWs p.tzi = i + 1 @@ -1586,6 +1682,9 @@ iterRunes: case ':': p.stateTime = timePeriodWsOffsetColon case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.set(p.offseti, "-0700") case '+', '-': // This really doesn't seem valid, but for some reason when round-tripping a go date @@ -1615,6 +1714,9 @@ iterRunes: // 13:31:51.999 -07:00 MST switch r { case ' ': + for i+1 < len(datestr) && datestr[i+1] == ' ' { + i++ + } p.set(p.offseti, "-07:00") default: if unicode.IsLetter(r) { From 3a32cbb3d2ddb32f6fcd4d7ea241cfdce3c27d24 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:28:56 +1100 Subject: [PATCH 22/62] All of these did nothing --- parseany.go | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/parseany.go b/parseany.go index 915051c..078fc3e 100644 --- a/parseany.go +++ b/parseany.go @@ -383,9 +383,6 @@ iterRunes: } case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } // 18 January 2018 // 8 January 2018 // 8 jan 2018 @@ -449,9 +446,6 @@ iterRunes: p.stateDate = dateYearDashDashOffset p.setDay() case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.daylen = i - p.dayi p.stateDate = dateYearDashDashWs p.stateTime = timeStart @@ -512,9 +506,6 @@ iterRunes: // 29-Jun-2016 dd-month(alpha)-yyyy switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } // we need to find if this was 4 digits, aka year // or 2 digits which makes it ambiguous year/day length := i - (p.moi + p.molen + 1) @@ -548,9 +539,6 @@ iterRunes: switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } fallthrough case ':': p.stateTime = timeStart @@ -581,9 +569,6 @@ iterRunes: // We aren't breaking because we are going to re-use this case // to find where the date starts, and possible time begins case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } fallthrough case ':': p.stateTime = timeStart @@ -623,9 +608,6 @@ iterRunes: // Note no break, we are going to pass by and re-enter this dateDigitSlash // and look for ending (space) or not (just date) case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari @@ -647,9 +629,6 @@ iterRunes: switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari @@ -687,9 +666,6 @@ iterRunes: // 12 Feb 2006, 19:17:22 switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.yeari = i + 1 //p.yearlen = 4 p.dayi = 0 @@ -724,9 +700,6 @@ iterRunes: i++ break iterRunes case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.yearlen = i - p.yeari p.setYear() break iterRunes @@ -741,9 +714,6 @@ iterRunes: // weekday %Y年%m月%e日 %A %I:%M %p // 2013年07月18日 星期四 10:27 上午 if r == ' ' { - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.stateDate = dateDigitChineseYearWs break } @@ -804,9 +774,6 @@ iterRunes: case r == ' ': // X // April 8, 2009 - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } if i > 3 { // Check to see if the alpha is name of month? or Day? month := strings.ToLower(datestr[0:i]) From b1fd89e43f72b58e0aa987c45f818a8cf6b24bb6 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 16:31:37 +1100 Subject: [PATCH 23/62] The only required one. --- parseany.go | 56 +---------------------------------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/parseany.go b/parseany.go index 078fc3e..8933b64 100644 --- a/parseany.go +++ b/parseany.go @@ -881,9 +881,6 @@ iterRunes: p.setDay() p.stateDate = dateAlphaWsDigitMore } else if r == ' ' { - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.daylen = i - p.dayi p.setDay() p.yeari = i + 1 @@ -907,9 +904,6 @@ iterRunes: p.yeari = 0 break iterRunes } else if r == ' ' { - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } // must be year format, not 15:04 p.yearlen = i - p.yeari p.setYear() @@ -923,9 +917,6 @@ iterRunes: // oct 1, 1970 // oct 7, '70 if r == ' ' { - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.yeari = i + 1 p.stateDate = dateAlphaWsDigitMoreWs } @@ -939,9 +930,6 @@ iterRunes: case '\'': p.yeari = i + 1 case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } fallthrough case ',': // x @@ -960,9 +948,6 @@ iterRunes: // April 8 2009 switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } fallthrough case ',': // x @@ -996,9 +981,6 @@ iterRunes: i++ break iterRunes case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.yearlen = i - p.yeari p.setYear() p.stateTime = timeStart @@ -1053,9 +1035,6 @@ iterRunes: } case r == ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } // x // January 02 2006, 15:04:05 p.daylen = i - p.dayi @@ -1079,9 +1058,6 @@ iterRunes: // oct. 7, '70 switch { case r == ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } // continue case unicode.IsDigit(r): p.stateDate = dateAlphaWsDigit @@ -1099,9 +1075,6 @@ iterRunes: } switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } fallthrough case '-': if p.moi == 0 { @@ -1257,10 +1230,7 @@ iterRunes: if p.nextIs(i, ' ') { // x // September 17, 2012 at 5:00pm UTC-05 - i++ // skip ' - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } + i++ // skip ' p.houri = 0 // reset hour } } else { @@ -1285,9 +1255,6 @@ iterRunes: p.set(i, "PM") } case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.coalesceTime(i) p.stateTime = timeWs case ':': @@ -1385,9 +1352,6 @@ iterRunes: p.stateTime = timeWsAlphaZoneOffset p.offseti = i case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } // 17:57:51 MST 2009 // 17:57:51 MST p.tzlen = i - p.tzi @@ -1412,9 +1376,6 @@ iterRunes: // 15:44:11 UTC+0100 2015 switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.set(p.offseti, "-0700") if p.yeari == 0 { p.yeari = i + 1 @@ -1477,9 +1438,6 @@ iterRunes: case ':': p.stateTime = timeWsOffsetColon case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.set(p.offseti, "-0700") p.yeari = i + 1 p.stateTime = timeWsOffsetWs @@ -1560,9 +1518,6 @@ iterRunes: // 06:20:00.000 UTC switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.mslen = i - p.msi p.stateTime = timePeriodWs case '+', '-': @@ -1593,9 +1548,6 @@ iterRunes: // 13:31:51.999 -07:00 MST switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.set(p.offseti, "-07:00") p.stateTime = timePeriodOffsetColonWs p.tzi = i + 1 @@ -1649,9 +1601,6 @@ iterRunes: case ':': p.stateTime = timePeriodWsOffsetColon case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.set(p.offseti, "-0700") case '+', '-': // This really doesn't seem valid, but for some reason when round-tripping a go date @@ -1681,9 +1630,6 @@ iterRunes: // 13:31:51.999 -07:00 MST switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { - i++ - } p.set(p.offseti, "-07:00") default: if unicode.IsLetter(r) { From 19ef6a25eb4b3a152f7a7610e5292212e142e393 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 17:34:02 +1100 Subject: [PATCH 24/62] New failure - still white space --- parseany_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/parseany_test.go b/parseany_test.go index 5d86f31..b5345a5 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -418,6 +418,7 @@ var testInputs = []dateTest{ {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC"}, {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, + {in: "FRI, 16 AUG 2013 9:39:51 +1000", out: "2013-08-15 23:39:51 +0000 UTC"}, } func TestParse(t *testing.T) { From 8b765a53023b55d55b01e751341b0ad02b9defe0 Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 23:37:23 +1100 Subject: [PATCH 25/62] Skip white space --- parseany.go | 6 ++++-- parseany_test.go | 39 +++++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/parseany.go b/parseany.go index 8933b64..4f3b72d 100644 --- a/parseany.go +++ b/parseany.go @@ -1097,10 +1097,12 @@ iterRunes: // Thu, 4 Jan 2018 17:53:36 +0000 // Tue, 11 Jul 2017 16:28:13 +0200 (CEST) // Mon, 02-Jan-06 15:04:05 MST + var offset int switch r { case ' ': for i+1 < len(datestr) && datestr[i+1] == ' ' { i++ + offset++ } fallthrough case '-': @@ -1111,11 +1113,11 @@ iterRunes: p.setDay() p.moi = i + 1 } else if p.yeari == 0 { - p.molen = i - p.moi + p.molen = i - p.moi - offset p.set(p.moi, "Jan") p.yeari = i + 1 } else { - p.yearlen = i - p.yeari + p.yearlen = i - p.yeari - offset p.setYear() p.stateTime = timeStart break iterRunes diff --git a/parseany_test.go b/parseany_test.go index b5345a5..aeb201f 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -427,17 +427,20 @@ func TestParse(t *testing.T) { time.Local = time.UTC zeroTime := time.Time{}.Unix() - ts, err := ParseAny("INVALID") - assert.Equal(t, zeroTime, ts.Unix()) - assert.NotEqual(t, nil, err) + t.Run("Invalid", func(t *testing.T) { + ts, err := ParseAny("INVALID") + assert.Equal(t, zeroTime, ts.Unix()) + assert.NotEqual(t, nil, err) - assert.Equal(t, true, testDidPanic("NOT GONNA HAPPEN")) - // https://github.com/golang/go/issues/5294 - _, err = ParseAny(time.RFC3339) - assert.NotEqual(t, nil, err) + assert.Equal(t, true, testDidPanic("NOT GONNA HAPPEN")) + // https://github.com/golang/go/issues/5294 + _, err = ParseAny(time.RFC3339) + assert.NotEqual(t, nil, err) + }) for _, th := range testInputs { t.Run(th.in, func(t *testing.T) { + var ts time.Time defer func() { if r := recover(); r != nil { t.Fatalf("error: %s", r) @@ -470,16 +473,24 @@ func TestParse(t *testing.T) { // some errors - assert.Equal(t, true, testDidPanic(`{"ts":"now"}`)) + t.Run("", func(t *testing.T) { + assert.Equal(t, true, testDidPanic(`{"ts":"now"}`)) + }) - _, err = ParseAny("138421636711122233311111") // too many digits - assert.NotEqual(t, nil, err) + t.Run("too many digits", func(t *testing.T) { + _, err := ParseAny("138421636711122233311111") // too many digits + assert.NotEqual(t, nil, err) + }) - _, err = ParseAny("-1314") - assert.NotEqual(t, nil, err) + t.Run("negative number", func(t *testing.T) { + _, err := ParseAny("-1314") + assert.NotEqual(t, nil, err) + }) - _, err = ParseAny("2014-13-13 08:20:13,787") // month 13 doesn't exist so error - assert.NotEqual(t, nil, err) + t.Run("month doesn't exist", func(t *testing.T) { + _, err := ParseAny("2014-13-13 08:20:13,787") // month 13 doesn't exist so error + assert.NotEqual(t, nil, err) + }) } func testDidPanic(datestr string) (paniced bool) { From b0b5409675c652d98f67960dfed5ae31c8d9851b Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Wed, 15 Feb 2023 23:37:27 +1100 Subject: [PATCH 26/62] Unused code --- parseany.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/parseany.go b/parseany.go index 4f3b72d..0407b79 100644 --- a/parseany.go +++ b/parseany.go @@ -2156,17 +2156,6 @@ func (p *parser) trimExtra() { } } -// func (p *parser) remove(i, length int) { -// if len(p.format) > i+length { -// //append(a[:i], a[j:]...) -// p.format = append(p.format[0:i], p.format[i+length:]...) -// } -// if len(p.datestr) > i+length { -// //append(a[:i], a[j:]...) -// p.datestr = fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+length:]) -// } -// } - func (p *parser) parse() (time.Time, error) { if p.t != nil { return *p.t, nil From 01b692d1ce4d329cac9290001673b0446acf599d Mon Sep 17 00:00:00 2001 From: Arran Ubels Date: Thu, 16 Feb 2023 09:39:34 +1100 Subject: [PATCH 27/62] Another case. --- parseany.go | 14 +++++++++++--- parseany_test.go | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/parseany.go b/parseany.go index 0407b79..b427d68 100644 --- a/parseany.go +++ b/parseany.go @@ -1345,7 +1345,12 @@ iterRunes: // 15:44:11 UTC+0100 2015 switch r { case '+', '-': - p.tzlen = i - p.tzi + if datestr[p.tzi:i] == "GMT" { + p.tzi = 0 + p.tzlen = 0 + } else { + p.tzlen = i - p.tzi + } if p.tzlen == 4 { p.set(p.tzi, " MST") } else if p.tzlen == 3 { @@ -1679,10 +1684,13 @@ iterRunes: p.trimExtra() case timeWsAlphaZoneOffset: // 06:20:00 UTC-05 - if i-p.offseti < 4 { + switch i - p.offseti { + case 2, 3, 4: p.set(p.offseti, "-07") - } else { + case 5: p.set(p.offseti, "-0700") + case 6: + p.set(p.offseti, "-07:00") } case timePeriod: diff --git a/parseany_test.go b/parseany_test.go index aeb201f..0193b52 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -419,6 +419,7 @@ var testInputs = []dateTest{ {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, {in: "FRI, 16 AUG 2013 9:39:51 +1000", out: "2013-08-15 23:39:51 +0000 UTC"}, + {in: "Mon, 1 Dec 2008 14:48:22 GMT-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, } func TestParse(t *testing.T) { From 465140d619e8f46013682d15d4076365de586853 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Fri, 8 Dec 2023 18:31:28 -0700 Subject: [PATCH 28/62] Fix ineffective break statements --- bench_test.go | 2 +- parseany.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bench_test.go b/bench_test.go index 4322b86..1973308 100644 --- a/bench_test.go +++ b/bench_test.go @@ -68,7 +68,7 @@ var ( "2014-04-26", } - ErrDateFormat = fmt.Errorf("Invalid Date Format") + ErrDateFormat = fmt.Errorf("invalid date format") timeFormats = []string{ // ISO 8601ish formats diff --git a/parseany.go b/parseany.go index b427d68..ab817fe 100644 --- a/parseany.go +++ b/parseany.go @@ -715,7 +715,7 @@ iterRunes: // 2013年07月18日 星期四 10:27 上午 if r == ' ' { p.stateDate = dateDigitChineseYearWs - break + break iterRunes } case dateDigitDot: // This is the 2nd period @@ -1630,7 +1630,7 @@ iterRunes: if r == '=' && datestr[i-1] == 'm' { p.extra = i - 2 p.trimExtra() - break + break iterTimeRunes } case timePeriodWsOffsetColon: From 1b1e0b3d332c6d731644f191735fce318fef4cb1 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 11 Dec 2023 23:45:58 -0700 Subject: [PATCH 29/62] Add extensive format validation, bugfixes * Don't just assume we were given one of the valid formats. * Also consolidate the parsing states that occur after timePeriod. * Add subtests to make it easier to see what fails. * Additional tests for 4-char timezone names. * Fix https://github.com/araddon/dateparse/issues/117 * Fix https://github.com/araddon/dateparse/issues/150 * Fix https://github.com/araddon/dateparse/issues/157 * Fix https://github.com/araddon/dateparse/issues/145 * Fix https://github.com/araddon/dateparse/issues/108 * Fix https://github.com/araddon/dateparse/issues/137 * Fix https://github.com/araddon/dateparse/issues/130 * Fix https://github.com/araddon/dateparse/issues/123 * Fix https://github.com/araddon/dateparse/issues/109 * Fix https://github.com/araddon/dateparse/issues/98 * Addresses bug in https://github.com/araddon/dateparse/issues/100#issuecomment-1118868154 Adds test cases to verify the following are already fixed: * https://github.com/araddon/dateparse/issues/94 --- parseany.go | 869 +++++++++++++++++++++++++++-------------------- parseany_test.go | 364 ++++++++++++++------ 2 files changed, 767 insertions(+), 466 deletions(-) diff --git a/parseany.go b/parseany.go index ab817fe..700ffdf 100644 --- a/parseany.go +++ b/parseany.go @@ -75,11 +75,10 @@ const ( dateDigitChineseYearWs dateDigitWs dateDigitWsMoYear // 20 - dateDigitWsMolong dateAlpha dateAlphaWs dateAlphaWsDigit - dateAlphaWsDigitMore // 25 + dateAlphaWsDigitMore // 24 dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWsYear dateAlphaWsMonth @@ -89,7 +88,7 @@ const ( dateAlphaWsMore dateAlphaWsAtTime dateAlphaWsAlpha - dateAlphaWsAlphaYearmaybe // 35 + dateAlphaWsAlphaYearmaybe // 34 dateAlphaPeriodWsDigit dateWeekdayComma dateWeekdayAbbrevComma @@ -114,20 +113,11 @@ const ( timeWsYear // 15 timeOffset timeOffsetColon + timeOffsetColonAlpha timeAlpha timePeriod - timePeriodOffset // 20 - timePeriodOffsetColon - timePeriodOffsetColonWs - timePeriodWs - timePeriodWsAlpha - timePeriodWsOffset // 25 - timePeriodWsOffsetWs - timePeriodWsOffsetWsAlpha - timePeriodWsOffsetColon - timePeriodWsOffsetColonAlpha + timePeriodAMPM timeZ - timeZDigit ) var ( @@ -135,12 +125,17 @@ var ( // ambiguous, so it is an error for strict parse rules. ErrAmbiguousMMDD = fmt.Errorf("this date has ambiguous mm/dd vs dd/mm type format") ErrCouldntFindFormat = fmt.Errorf("could not find format for") + ErrUnexpectedTail = fmt.Errorf("unexpected content after date/time: ") ) func unknownErr(datestr string) error { return fmt.Errorf("%w %q", ErrCouldntFindFormat, datestr) } +func unexpectedTail(tail string) error { + return fmt.Errorf("%w %q", ErrUnexpectedTail, tail) +} + // ParseAny parse an unknown date format, detect the layout. // Normal parse. Equivalent Timezone rules as time.Parse(). // NOTE: please see readme on mmdd vs ddmm ambiguous dates. @@ -237,6 +232,13 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par if err != nil { return } + + // if this string is impossibly long, don't even try. longest date might be something like: + // 'Wednesday, 8 February 2023 19:00:46.999999999 +11:00 (AEDT) m=+0.000000001' + if len(datestr) > 75 { + return p, unknownErr(datestr) + } + if p.retryAmbiguousDateWithSwap { // month out of range signifies that a day/month swap is the correct solution to an ambiguous date // this is because it means that a day is being interpreted as a month and overflowing the valid value for that @@ -259,6 +261,11 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par }() } + // IMPORTANT: we may need to modify the datestr while we are parsing (e.g., to + // remove pieces of the string that should be ignored during golang parsing). + // We will iterate over the modified datestr, and whenever we update datestr, + // we need to make sure that i is adjusted accordingly to resume parsing in + // the correct place. In error messages though we'll use the original datestr. i := 0 // General strategy is to read rune by rune through the date looking for @@ -266,14 +273,13 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par // Hopefully we only need to read about 5 or 6 bytes before // we figure it out and then attempt a parse iterRunes: - for ; i < len(datestr); i++ { - //r := rune(datestr[i]) - r, bytesConsumed := utf8.DecodeRuneInString(datestr[i:]) + for ; i < len(p.datestr); i++ { + r, bytesConsumed := utf8.DecodeRuneInString(p.datestr[i:]) if bytesConsumed > 1 { i += bytesConsumed - 1 } - // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, datestr) + // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, p.datestr) switch p.stateDate { case dateStart: if unicode.IsDigit(r) { @@ -281,7 +287,7 @@ iterRunes: } else if unicode.IsLetter(r) { p.stateDate = dateAlpha } else { - return nil, unknownErr(datestr) + return p, unknownErr(datestr) } case dateDigit: @@ -309,20 +315,24 @@ iterRunes: // 2014/02/24 - Year first / p.yearlen = i // since it was start of datestr, i=len p.moi = i + 1 - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } p.stateDate = dateDigitYearSlash } else { // Either Ambiguous dd/mm vs mm/dd OR dd/month/yy // 08/May/2005 // 03/31/2005 // 31/03/2005 - if i+2 < len(p.datestr) && unicode.IsLetter(rune(datestr[i+1])) { + if i+2 < len(p.datestr) && unicode.IsLetter(rune(p.datestr[i+1])) { // 08/May/2005 p.stateDate = dateDigitSlashAlpha p.moi = i + 1 p.daylen = 2 p.dayi = 0 - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } continue } // Ambiguous dd/mm vs mm/dd the bane of date-parsing @@ -333,13 +343,17 @@ iterRunes: if p.molen == 0 { // 03/31/2005 p.molen = i - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.dayi = i + 1 } } else { if p.daylen == 0 { p.daylen = i - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.moi = i + 1 } } @@ -353,13 +367,17 @@ iterRunes: if i == 4 { p.yearlen = i p.moi = i + 1 - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } else { p.ambiguousMD = true if p.preferMonthFirst { if p.molen == 0 { p.molen = i - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.dayi = i + 1 } } @@ -373,12 +391,16 @@ iterRunes: if i == 4 { p.yearlen = i p.moi = i + 1 - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } else { p.ambiguousMD = true p.moi = 0 p.molen = i - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.dayi = i + 1 } @@ -401,7 +423,7 @@ iterRunes: // Chinese Year p.stateDate = dateDigitChineseYear case ',': - return nil, unknownErr(datestr) + return p, unknownErr(datestr) default: continue } @@ -425,7 +447,9 @@ iterRunes: p.molen = i - p.moi p.dayi = i + 1 p.stateDate = dateYearDashDash - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } default: if unicode.IsLetter(r) { p.stateDate = dateYearDashAlphaDash @@ -444,18 +468,24 @@ iterRunes: p.offseti = i p.daylen = i - p.dayi p.stateDate = dateYearDashDashOffset - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } case ' ': p.daylen = i - p.dayi p.stateDate = dateYearDashDashWs p.stateTime = timeStart - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } break iterRunes case 'T': p.daylen = i - p.dayi p.stateDate = dateYearDashDashT p.stateTime = timeStart - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } break iterRunes } @@ -486,7 +516,7 @@ iterRunes: p.stateDate = dateDigitDashAlpha p.moi = i } else { - return nil, unknownErr(datestr) + return p, unknownErr(datestr) } case dateDigitDashAlpha: // 13-Feb-03 @@ -515,7 +545,9 @@ iterRunes: // We now also know that part1 was the day p.dayi = 0 p.daylen = p.part1Len - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } else if length == 2 { // We have no idea if this is // yy-mon-dd OR dd-mon-yy @@ -527,7 +559,9 @@ iterRunes: // We now also know that part1 was the day p.dayi = 0 p.daylen = p.part1Len - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } p.stateTime = timeStart break iterRunes @@ -544,13 +578,17 @@ iterRunes: p.stateTime = timeStart if p.daylen == 0 { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } break iterRunes case '/': if p.molen == 0 { p.molen = i - p.moi - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.dayi = i + 1 } } @@ -574,7 +612,9 @@ iterRunes: p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } break iterRunes } @@ -595,13 +635,17 @@ iterRunes: if p.preferMonthFirst { if p.daylen == 0 { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.yeari = i + 1 } } else { if p.molen == 0 { p.molen = i - p.moi - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.yeari = i + 1 } } @@ -611,7 +655,9 @@ iterRunes: p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } break iterRunes } @@ -632,10 +678,14 @@ iterRunes: p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } else if p.daylen == 0 { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } break iterRunes case ':': @@ -643,13 +693,17 @@ iterRunes: // 2014:07:10 06:55:38.156283 if p.molen == 0 { p.molen = i - p.moi - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.dayi = i + 1 } } else if p.preferMonthFirst { if p.daylen == 0 { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.yeari = i + 1 } } @@ -670,17 +724,30 @@ iterRunes: //p.yearlen = 4 p.dayi = 0 p.daylen = p.part1Len - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.stateTime = timeStart if i > p.daylen+len(" Sep") { // November etc - // If len greather than space + 3 it must be full month - p.stateDate = dateDigitWsMolong + // If this is a legit full month, then change the string we're parsing + // to compensate for the longest month, and do the same with the format string. We + // must maintain a corresponding length/content and this is the easiest + // way to do this. + possibleFullMonth := strings.ToLower(p.datestr[(p.dayi + (p.daylen + 1)):i]) + if isMonthFull(possibleFullMonth) { + p.moi = p.dayi + p.daylen + 1 + p.molen = i - p.moi + p.fullMonth = possibleFullMonth + p.stateDate = dateDigitWsMoYear + } else { + return p, unknownErr(datestr) + } } else { // If len=3, the might be Feb or May? Ie ambigous abbreviated but // we can parse may with either. BUT, that means the // format may not be correct? - // mo := strings.ToLower(datestr[p.daylen+1 : i]) - p.moi = p.daylen + 1 + // mo := strings.ToLower(p.datestr[p.daylen+1 : i]) + p.moi = p.dayi + p.daylen + 1 p.molen = i - p.moi p.set(p.moi, "Jan") p.stateDate = dateDigitWsMoYear @@ -696,18 +763,18 @@ iterRunes: switch r { case ',': p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } i++ break iterRunes case ' ': p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } break iterRunes } - case dateDigitWsMolong: - // 18 January 2018 - // 8 January 2018 - case dateDigitChineseYear: // dateDigitChineseYear // 2014年04月08日 @@ -728,14 +795,18 @@ iterRunes: // 3.31.2014 p.daylen = i - p.dayi p.yeari = i + 1 - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.stateDate = dateDigitDotDot } else { // 2018.09.30 //p.molen = 2 p.molen = i - p.moi p.dayi = i + 1 - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } p.stateDate = dateDigitDotDot } } @@ -776,11 +847,13 @@ iterRunes: // April 8, 2009 if i > 3 { // Check to see if the alpha is name of month? or Day? - month := strings.ToLower(datestr[0:i]) + month := strings.ToLower(p.datestr[0:i]) if isMonthFull(month) { + p.moi = 0 + p.molen = i p.fullMonth = month // len(" 31, 2018") = 9 - if len(datestr[i:]) < 10 { + if len(p.datestr[i:]) < 10 { // April 8, 2009 p.stateDate = dateAlphaWsMonth } else { @@ -803,10 +876,10 @@ iterRunes: // Tue 05 May 2020, 05:05:05 // Mon Jan 2 15:04:05 2006 - maybeDay := strings.ToLower(datestr[0:i]) + maybeDay := strings.ToLower(p.datestr[0:i]) if isDay(maybeDay) { // using skip throws off indices used by other code; saner to restart - return parseTime(datestr[i+1:], loc) + return parseTime(p.datestr[i+1:], loc) } p.stateDate = dateAlphaWs } @@ -834,10 +907,10 @@ iterRunes: p.set(0, "Jan") } else if i == 4 { // gross - datestr = datestr[0:i-1] + datestr[i:] - return parseTime(datestr, loc, opts...) + newDatestr := p.datestr[0:i-1] + p.datestr[i:] + return parseTime(newDatestr, loc, opts...) } else { - return nil, unknownErr(datestr) + return p, unknownErr(datestr) } } @@ -878,11 +951,15 @@ iterRunes: // May 08 17:57:51 2009 if r == ',' { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.stateDate = dateAlphaWsDigitMore } else if r == ' ' { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.yeari = i + 1 p.stateDate = dateAlphaWsDigitYearmaybe p.stateTime = timeStart @@ -906,7 +983,9 @@ iterRunes: } else if r == ' ' { // must be year format, not 15:04 p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } break iterRunes } case dateAlphaWsDigitMore: @@ -938,7 +1017,9 @@ iterRunes: // May 8, 2009, 5:57:51 PM p.stateDate = dateAlphaWsDigitMoreWsYear p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } p.stateTime = timeStart break iterRunes } @@ -956,7 +1037,9 @@ iterRunes: // June 8 2009 if p.daylen == 0 { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': // st, rd, nd, st @@ -976,13 +1059,17 @@ iterRunes: switch r { case ',': p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } p.stateTime = timeStart i++ break iterRunes case ' ': p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } p.stateTime = timeStart break iterRunes } @@ -993,25 +1080,25 @@ iterRunes: switch r { case 't', 'T': if p.nextIs(i, 'h') || p.nextIs(i, 'H') { - if len(datestr) > i+2 { + if len(p.datestr) > i+2 { return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) } } case 'n', 'N': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { - if len(datestr) > i+2 { + if len(p.datestr) > i+2 { return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) } } case 's', 'S': if p.nextIs(i, 't') || p.nextIs(i, 'T') { - if len(datestr) > i+2 { + if len(p.datestr) > i+2 { return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) } } case 'r', 'R': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { - if len(datestr) > i+2 { + if len(p.datestr) > i+2 { return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) } } @@ -1028,7 +1115,9 @@ iterRunes: // January 02, 2006, 15:04:05 if p.nextIs(i, ' ') { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.yeari = i + 2 p.stateDate = dateAlphaWsMonthMore i++ @@ -1038,7 +1127,9 @@ iterRunes: // x // January 02 2006, 15:04:05 p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.yeari = i + 1 p.stateDate = dateAlphaWsMonthMore case unicode.IsDigit(r): @@ -1049,7 +1140,9 @@ iterRunes: // X // January 2nd, 2006, 15:04:05 p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.stateDate = dateAlphaWsMonthSuffix i-- } @@ -1080,7 +1173,9 @@ iterRunes: if p.moi == 0 { p.moi = i + 1 p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } else if p.yeari == 0 { p.yeari = i + 1 p.molen = i - p.moi @@ -1100,7 +1195,7 @@ iterRunes: var offset int switch r { case ' ': - for i+1 < len(datestr) && datestr[i+1] == ' ' { + for i+1 < len(p.datestr) && p.datestr[i+1] == ' ' { i++ offset++ } @@ -1110,7 +1205,9 @@ iterRunes: p.dayi = i + 1 } else if p.moi == 0 { p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } p.moi = i + 1 } else if p.yeari == 0 { p.molen = i - p.moi - offset @@ -1118,7 +1215,9 @@ iterRunes: p.yeari = i + 1 } else { p.yearlen = i - p.yeari - offset - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } p.stateTime = timeStart break iterRunes } @@ -1128,23 +1227,25 @@ iterRunes: break iterRunes } } - p.coalesceDate(i) + if !p.coalesceDate(i) { + return p, unknownErr(datestr) + } if p.stateTime == timeStart { // increment first one, since the i++ occurs at end of loop if i < len(p.datestr) { i++ } // ensure we skip any whitespace prefix - for ; i < len(datestr); i++ { - r := rune(datestr[i]) + for ; i < len(p.datestr); i++ { + r := rune(p.datestr[i]) if r != ' ' { break } } iterTimeRunes: - for ; i < len(datestr); i++ { - r := rune(datestr[i]) + for ; i < len(p.datestr); i++ { + r := rune(p.datestr[i]) // gou.Debugf("i=%d r=%s state=%d iterTimeRunes %s %s", i, string(r), p.stateTime, p.ds(), p.ts()) @@ -1171,20 +1272,7 @@ iterRunes: // 00:07:31.945167 // 18:31:59.257000000 // 00:00:00.000 - // timePeriodOffset - // 19:55:00.799+0100 - // timePeriodOffsetColon - // 15:04:05.999-07:00 - // timePeriodWs - // timePeriodWsOffset - // 00:07:31.945167 +0000 - // 00:00:00.000 +0000 - // timePeriodWsOffsetAlpha - // 00:07:31.945167 +0000 UTC - // 22:18:00.001 +0000 UTC m=+0.000000001 - // 00:00:00.000 +0000 UTC - // timePeriodWsAlpha - // 06:20:00.000 UTC + // (and all variants that can follow the seconds portion of a time format, same as above) if p.houri == 0 { p.houri = i } @@ -1224,38 +1312,37 @@ iterRunes: } // (Z)ulu time p.loc = time.UTC - case 'a', 'A': - if p.nextIs(i, 't') || p.nextIs(i, 'T') { + endPos := i + 1 + if endPos > p.formatSetLen { + p.formatSetLen = endPos + } + case 'a', 'A', 'p', 'P': + if (r == 'a' || r == 'A') && (p.nextIs(i, 't') || p.nextIs(i, 'T')) { // x // September 17, 2012 at 5:00pm UTC-05 - i++ // skip t + i++ // skip 't' if p.nextIs(i, ' ') { // x // September 17, 2012 at 5:00pm UTC-05 - i++ // skip ' + i++ // skip ' ' p.houri = 0 // reset hour } } else { + // Could be AM/PM + isLower := r == 'a' || r == 'p' switch { - case r == 'a' && p.nextIs(i, 'm'): + case isLower && p.nextIs(i, 'm'): p.coalesceTime(i) - p.set(i, "am") - case r == 'A' && p.nextIs(i, 'M'): + p.set(i, "pm") + // skip 'm' + i++ + case !isLower && p.nextIs(i, 'M'): p.coalesceTime(i) p.set(i, "PM") + // skip 'M' + i++ } } - - case 'p', 'P': - // Could be AM/PM - switch { - case r == 'p' && p.nextIs(i, 'm'): - p.coalesceTime(i) - p.set(i, "pm") - case r == 'P' && p.nextIs(i, 'M'): - p.coalesceTime(i) - p.set(i, "PM") - } case ' ': p.coalesceTime(i) p.stateTime = timeWs @@ -1277,8 +1364,9 @@ iterRunes: // Could not get the parsing to work using golang time.Parse() without // replacing that colon with period. p.set(i, ".") - datestr = datestr[0:i] + "." + datestr[i+1:] - p.datestr = datestr + newDatestr := p.datestr[0:i] + "." + p.datestr[i+1:] + p.datestr = newDatestr + p.stateTime = timePeriod } } case timeOffset: @@ -1311,7 +1399,7 @@ iterRunes: // timeZ // 15:04:05.99Z switch r { - case 'A', 'P': + case 'a', 'p', 'A', 'P': // Could be AM/PM or could be PST or similar p.tzi = i p.stateTime = timeWsAMPMMaybe @@ -1345,7 +1433,7 @@ iterRunes: // 15:44:11 UTC+0100 2015 switch r { case '+', '-': - if datestr[p.tzi:i] == "GMT" { + if p.datestr[p.tzi:i] == "GMT" { p.tzi = 0 p.tzlen = 0 } else { @@ -1406,7 +1494,9 @@ iterRunes: if unicode.IsDigit(r) { p.yearlen = i - p.yeari + 1 if p.yearlen == 4 { - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } } case timeWsAMPMMaybe: @@ -1416,10 +1506,16 @@ iterRunes: // timeWsAlpha // 00:12:00 PST // 15:44:11 UTC+0100 2015 - if r == 'M' { - //return parse("2006-01-02 03:04:05 PM", datestr, loc) + if r == 'm' || r == 'M' { + //return parse("2006-01-02 03:04:05 PM", p.datestr, loc) + // This isn't a time zone after all... + p.tzi = 0 p.stateTime = timeWsAMPM - p.set(i-1, "PM") + if r == 'm' { + p.set(i-1, "pm") + } else { + p.set(i-1, "PM") + } if p.hourlen == 2 { p.set(p.houri, "03") } else if p.hourlen == 1 { @@ -1429,6 +1525,15 @@ iterRunes: p.stateTime = timeWsAlpha } + case timeWsAMPM: + // If we have a continuation after AM/PM indicator, reset parse state back to ws + if r == ' ' { + p.stateTime = timeWs + } else { + // unexpected garbage after AM/PM indicator, fail + return p, unexpectedTail(p.datestr[i:]) + } + case timeWsOffset: // timeWsOffset // 15:04:05 -0700 @@ -1458,9 +1563,9 @@ iterRunes: switch r { case '=': // eff you golang - if datestr[i-1] == 'm' { + if p.datestr[i-1] == 'm' { p.extra = i - 2 - p.trimExtra() + p.trimExtra(false) } case '+', '-', '(': // This really doesn't seem valid, but for some reason when round-tripping a go date @@ -1469,13 +1574,15 @@ iterRunes: // 00:00:00 +0300 +0300 p.extra = i - 1 p.stateTime = timeWsOffset - p.trimExtra() + p.trimExtra(false) default: switch { case unicode.IsDigit(r): p.yearlen = i - p.yeari + 1 if p.yearlen == 4 { - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } } case unicode.IsLetter(r): // 15:04:05 -0700 MST @@ -1485,203 +1592,148 @@ iterRunes: } } - case timeWsOffsetColon: + case timeOffsetColon, timeWsOffsetColon: + // timeOffsetColon + // 15:04:05-07:00 + // timeOffsetColonAlpha + // 2015-02-18 00:12:00+00:00 UTC // timeWsOffsetColon // 15:04:05 -07:00 // timeWsOffsetColonAlpha // 2015-02-18 00:12:00 +00:00 UTC if unicode.IsLetter(r) { + // TODO: do we need to handle the m=+0.000000001 case? // 2015-02-18 00:12:00 +00:00 UTC - p.stateTime = timeWsOffsetColonAlpha + if p.stateTime == timeWsOffsetColon { + p.stateTime = timeWsOffsetColonAlpha + } else { + p.stateTime = timeOffsetColonAlpha + } + p.tzi = i break iterTimeRunes } case timePeriod: - // 15:04:05.999999999+07:00 - // 15:04:05.999999999-07:00 - // 15:04:05.999999+07:00 - // 15:04:05.999999-07:00 - // 15:04:05.999+07:00 - // 15:04:05.999-07:00 + // 15:04:05.999999999 + // 15:04:05.999999999 + // 15:04:05.999999 + // 15:04:05.999999 + // 15:04:05.999 + // 15:04:05.999 // timePeriod // 17:24:37.3186369 // 00:07:31.945167 // 18:31:59.257000000 // 00:00:00.000 - // timePeriodOffset - // 19:55:00.799+0100 - // timePeriodOffsetColon - // 15:04:05.999-07:00 - // timePeriodWs - // timePeriodWsOffset - // 00:07:31.945167 +0000 - // 00:00:00.000 +0000 - // With Extra - // 00:00:00.000 +0300 +03 - // timePeriodWsOffsetAlpha - // 00:07:31.945167 +0000 UTC - // 00:00:00.000 +0000 UTC - // 22:18:00.001 +0000 UTC m=+0.000000001 - // timePeriodWsAlpha - // 06:20:00.000 UTC + // (note: if we have an offset (+/-) or whitespace (Ws) after this state, re-enter the timeWs or timeOffset + // state above so that we do not have to duplicate all of the logic again for this parsing just because we + // have parsed a fractional second...) switch r { case ' ': p.mslen = i - p.msi - p.stateTime = timePeriodWs + p.coalesceTime(i) + p.stateTime = timeWs case '+', '-': - // This really shouldn't happen p.mslen = i - p.msi p.offseti = i - p.stateTime = timePeriodOffset - default: - if unicode.IsLetter(r) { - // 06:20:00.000 UTC + p.stateTime = timeOffset + case 'Z': + p.stateTime = timeZ + p.mslen = i - p.msi + // (Z)ulu time + p.loc = time.UTC + endPos := i + 1 + if endPos > p.formatSetLen { + p.formatSetLen = endPos + } + case 'a', 'A', 'p', 'P': + // Could be AM/PM + isLower := r == 'a' || r == 'p' + switch { + case isLower && p.nextIs(i, 'm'): + p.mslen = i - p.msi + p.coalesceTime(i) + p.set(i, "pm") + // skip 'm' + i++ + p.stateTime = timePeriodAMPM + case !isLower && p.nextIs(i, 'M'): p.mslen = i - p.msi - p.stateTime = timePeriodWsAlpha + p.coalesceTime(i) + p.set(i, "PM") + // skip 'M' + i++ + p.stateTime = timePeriodAMPM } - } - case timePeriodOffset: - // timePeriodOffset - // 19:55:00.799+0100 - // timePeriodOffsetColon - // 15:04:05.999-07:00 - // 13:31:51.999-07:00 MST - if r == ':' { - p.stateTime = timePeriodOffsetColon - } - case timePeriodOffsetColon: - // timePeriodOffset - // timePeriodOffsetColon - // 15:04:05.999-07:00 - // 13:31:51.999 -07:00 MST - switch r { - case ' ': - p.set(p.offseti, "-07:00") - p.stateTime = timePeriodOffsetColonWs - p.tzi = i + 1 - } - case timePeriodOffsetColonWs: - // continue - case timePeriodWs: - // timePeriodWs - // timePeriodWsOffset - // 00:07:31.945167 +0000 - // 00:00:00.000 +0000 - // timePeriodWsOffsetAlpha - // 00:07:31.945167 +0000 UTC - // 00:00:00.000 +0000 UTC - // timePeriodWsOffsetColon - // 13:31:51.999 -07:00 MST - // timePeriodWsAlpha - // 06:20:00.000 UTC - if p.offseti == 0 { - p.offseti = i - } - switch r { - case '+', '-': - p.mslen = i - p.msi - 1 - p.stateTime = timePeriodWsOffset default: - if unicode.IsLetter(r) { - // 00:07:31.945167 +0000 UTC - // 00:00:00.000 +0000 UTC - p.stateTime = timePeriodWsOffsetWsAlpha - break iterTimeRunes + if !unicode.IsDigit(r) { + return p, unexpectedTail(p.datestr[i:]) } } - - case timePeriodWsOffset: - // timePeriodWs - // timePeriodWsOffset - // 00:07:31.945167 +0000 - // 00:00:00.000 +0000 - // With Extra - // 00:00:00.000 +0300 +03 - // timePeriodWsOffsetAlpha - // 00:07:31.945167 +0000 UTC - // 00:00:00.000 +0000 UTC - // 03:02:00.001 +0300 MSK m=+0.000000001 - // timePeriodWsOffsetColon - // 13:31:51.999 -07:00 MST - // timePeriodWsAlpha - // 06:20:00.000 UTC + case timePeriodAMPM: switch r { - case ':': - p.stateTime = timePeriodWsOffsetColon case ' ': - p.set(p.offseti, "-0700") + p.stateTime = timeWs case '+', '-': - // This really doesn't seem valid, but for some reason when round-tripping a go date - // their is an extra +03 printed out. seems like go bug to me, but, parsing anyway. - // 00:00:00.000 +0300 +03 - // 00:00:00.000 +0300 +0300 - p.extra = i - 1 - p.trimExtra() - default: - if unicode.IsLetter(r) { - // 00:07:31.945167 +0000 UTC - // 00:00:00.000 +0000 UTC - // 03:02:00.001 +0300 MSK m=+0.000000001 - p.stateTime = timePeriodWsOffsetWsAlpha - } - } - case timePeriodWsOffsetWsAlpha: - // 03:02:00.001 +0300 MSK m=+0.000000001 - // eff you golang - if r == '=' && datestr[i-1] == 'm' { - p.extra = i - 2 - p.trimExtra() - break iterTimeRunes - } - - case timePeriodWsOffsetColon: - // 13:31:51.999 -07:00 MST - switch r { - case ' ': - p.set(p.offseti, "-07:00") + p.offseti = i + p.stateTime = timeOffset default: - if unicode.IsLetter(r) { - // 13:31:51.999 -07:00 MST - p.tzi = i - p.stateTime = timePeriodWsOffsetColonAlpha - } + return p, unexpectedTail(p.datestr[i:]) } - case timePeriodWsOffsetColonAlpha: - // continue case timeZ: - // timeZ - // 15:04:05.99Z - // With a time-zone at end after Z - // 2006-01-02T15:04:05.999999999Z07:00 - // 2006-01-02T15:04:05Z07:00 - // RFC3339 = "2006-01-02T15:04:05Z07:00" - // RFC3339Nano = "2006-01-02T15:04:05.999999999Z07:00" - if unicode.IsDigit(r) { - p.stateTime = timeZDigit - } - + // nothing expected can come after Z + return p, unexpectedTail(p.datestr[i:]) } } switch p.stateTime { - case timeWsAlpha: + case timeOffsetColonAlpha, timeWsOffsetColonAlpha: + // process offset + offsetLen := i - p.offseti + switch offsetLen { + case 6, 7: + // may or may not have a space on the end + if offsetLen == 7 { + if p.datestr[p.offseti+6] != ' ' { + return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:p.offseti+offsetLen])) + } + } + p.set(p.offseti, "-07:00") + default: + return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:p.offseti+offsetLen])) + } + // process timezone switch len(p.datestr) - p.tzi { case 3: // 13:31:51.999 +01:00 CET p.set(p.tzi, "MST") case 4: + p.set(p.tzi, "MST ") + default: + return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) + } + case timeWsAlpha: + switch len(p.datestr) - p.tzi { + case 3: + // 13:31:51.999 +01:00 CET p.set(p.tzi, "MST") - p.extra = len(p.datestr) - 1 - p.trimExtra() + case 4: + p.set(p.tzi, "MST ") + default: + return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) } case timeWsAlphaWs: p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } case timeWsYear: p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } case timeWsAlphaZoneOffsetWsExtra: - p.trimExtra() + p.trimExtra(false) case timeWsAlphaZoneOffset: // 06:20:00 UTC-05 switch i - p.offseti { @@ -1691,25 +1743,27 @@ iterRunes: p.set(p.offseti, "-0700") case 6: p.set(p.offseti, "-07:00") + default: + return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(p.datestr[p.offseti:i])) } case timePeriod: p.mslen = i - p.msi - case timeOffset: - + if p.mslen >= 10 { + return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, string(p.datestr[p.msi:p.mslen])) + } + case timeOffset, timeWsOffset: switch len(p.datestr) - p.offseti { - case 0, 1, 2, 4: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(datestr[p.offseti:])) case 3: - // 19:55:00+01 + // 19:55:00+01 (or 19:55:00 +01) p.set(p.offseti, "-07") case 5: - // 19:55:00+0100 + // 19:55:00+0100 (or 19:55:00 +0100) p.set(p.offseti, "-0700") + default: + return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(p.datestr[p.offseti:])) } - case timeWsOffset: - p.set(p.offseti, "-0700") case timeWsOffsetWs: // 17:57:51 -0700 2009 // 00:12:00 +0000 UTC @@ -1721,30 +1775,19 @@ iterRunes: case 4: // 13:31:51.999 +01:00 CEST p.set(p.tzi, "MST ") + default: + return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) } - } - case timeWsOffsetColon: - // 17:57:51 -07:00 - p.set(p.offseti, "-07:00") - case timeOffsetColon: - // 15:04:05+07:00 - p.set(p.offseti, "-07:00") - case timePeriodOffset: - // 19:55:00.799+0100 - p.set(p.offseti, "-0700") - case timePeriodOffsetColon: - p.set(p.offseti, "-07:00") - case timePeriodWsOffsetColonAlpha: - p.tzlen = i - p.tzi - switch p.tzlen { - case 3: - p.set(p.tzi, "MST") - case 4: - p.set(p.tzi, "MST ") + case timeOffsetColon, timeWsOffsetColon: + // 17:57:51 -07:00 (or 19:55:00.799 +01:00) + // 15:04:05+07:00 (or 19:55:00.799+01:00) + switch len(p.datestr) - p.offseti { + case 6: + p.set(p.offseti, "-07:00") + default: + return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:])) } - case timePeriodWsOffset: - p.set(p.offseti, "-0700") } p.coalesceTime(i) } @@ -1761,36 +1804,36 @@ iterRunes: // 20140601 8 yyyymmdd // 2014 4 yyyy t := time.Time{} - if len(datestr) == len("1499979655583057426") { // 19 + if len(p.datestr) == len("1499979655583057426") { // 19 // nano-seconds - if nanoSecs, err := strconv.ParseInt(datestr, 10, 64); err == nil { + if nanoSecs, err := strconv.ParseInt(p.datestr, 10, 64); err == nil { t = time.Unix(0, nanoSecs) } - } else if len(datestr) == len("1499979795437000") { // 16 + } else if len(p.datestr) == len("1499979795437000") { // 16 // micro-seconds - if microSecs, err := strconv.ParseInt(datestr, 10, 64); err == nil { + if microSecs, err := strconv.ParseInt(p.datestr, 10, 64); err == nil { t = time.Unix(0, microSecs*1000) } - } else if len(datestr) == len("yyyyMMddhhmmss") { // 14 + } else if len(p.datestr) == len("yyyyMMddhhmmss") { // 14 // yyyyMMddhhmmss - p.format = []byte("20060102150405") + p.setEntireFormat([]byte("20060102150405")) return p, nil - } else if len(datestr) == len("1332151919000") { // 13 - if miliSecs, err := strconv.ParseInt(datestr, 10, 64); err == nil { + } else if len(p.datestr) == len("1332151919000") { // 13 + if miliSecs, err := strconv.ParseInt(p.datestr, 10, 64); err == nil { t = time.Unix(0, miliSecs*1000*1000) } - } else if len(datestr) == len("1332151919") { //10 - if secs, err := strconv.ParseInt(datestr, 10, 64); err == nil { + } else if len(p.datestr) == len("1332151919") { //10 + if secs, err := strconv.ParseInt(p.datestr, 10, 64); err == nil { t = time.Unix(secs, 0) } - } else if len(datestr) == len("20140601") { - p.format = []byte("20060102") + } else if len(p.datestr) == len("20140601") { + p.setEntireFormat([]byte("20060102")) return p, nil - } else if len(datestr) == len("2014") { - p.format = []byte("2006") + } else if len(p.datestr) == len("2014") { + p.setEntireFormat([]byte("2006")) return p, nil - } else if len(datestr) < 4 { - return nil, fmt.Errorf("unrecognized format, too short %v", datestr) + } else if len(p.datestr) < 4 { + return p, fmt.Errorf("unrecognized format, too short %v", datestr) } if !t.IsZero() { if loc == nil { @@ -1830,7 +1873,9 @@ iterRunes: // 2013-Feb-03 // 2013-Feb-3 p.daylen = i - p.dayi - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } return p, nil case dateYearDashDashWs: @@ -1844,14 +1889,16 @@ iterRunes: // 13-Feb-03 ambiguous // 28-Feb-03 ambiguous // 29-Jun-2016 - length := len(datestr) - (p.moi + p.molen + 1) + length := len(p.datestr) - (p.moi + p.molen + 1) if length == 4 { p.yearlen = 4 p.set(p.yeari, "2006") // We now also know that part1 was the day p.dayi = 0 p.daylen = p.part1Len - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } else if length == 2 { // We have no idea if this is // yy-mon-dd OR dd-mon-yy @@ -1863,7 +1910,9 @@ iterRunes: // We now also know that part1 was the day p.dayi = 0 p.daylen = p.part1Len - p.setDay() + if !p.setDay() { + return p, unknownErr(datestr) + } } return p, nil @@ -1871,7 +1920,9 @@ iterRunes: case dateDigitDot: // 2014.05 p.molen = i - p.moi - p.setMonth() + if !p.setMonth() { + return p, unknownErr(datestr) + } return p, nil case dateDigitDotDot: @@ -1891,19 +1942,11 @@ iterRunes: // 12 Feb 2006, 19:17 return p, nil - case dateDigitWsMolong: - // 18 January 2018 - // 8 January 2018 - if p.daylen == 2 { - p.format = []byte("02 January 2006") - return p, nil - } - p.format = []byte("2 January 2006") - return p, nil // parse("2 January 2006", datestr, loc) - case dateAlphaWsMonth: p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } return p, nil case dateAlphaWsMonthMore: @@ -1912,7 +1955,9 @@ iterRunes: case dateAlphaWsDigitMoreWs: // oct 1, 1970 p.yearlen = i - p.yeari - p.setYear() + if !p.setYear() { + return p, unknownErr(datestr) + } return p, nil case dateAlphaWsDigitMoreWsYear: @@ -1953,11 +1998,11 @@ iterRunes: case dateDigitChineseYear: // dateDigitChineseYear // 2014年04月08日 - p.format = []byte("2006年01月02日") + p.setEntireFormat([]byte("2006年01月02日")) return p, nil case dateDigitChineseYearWs: - p.format = []byte("2006年01月02日 15:04:05") + p.setEntireFormat([]byte("2006年01月02日 15:04:05")) return p, nil case dateWeekdayComma: @@ -1973,7 +2018,7 @@ iterRunes: } - return nil, unknownErr(datestr) + return p, unknownErr(datestr) } type parser struct { @@ -1981,9 +2026,11 @@ type parser struct { preferMonthFirst bool retryAmbiguousDateWithSwap bool ambiguousMD bool + allowPartialStringMatch bool stateDate dateState stateTime timeState format []byte + formatSetLen int datestr string fullMonth string skip int @@ -2029,6 +2076,16 @@ func RetryAmbiguousDateWithSwap(retryAmbiguousDateWithSwap bool) ParserOption { } } +// AllowPartialStringMatch is an option that allows allowPartialStringMatch to be changed from its default. +// If true, then strings can be attempted to be parsed / matched even if the end of the string might contain +// more than a date/time. This defaults to false. +func AllowPartialStringMatch(allowPartialStringMatch bool) ParserOption { + return func(p *parser) error { + p.allowPartialStringMatch = allowPartialStringMatch + return nil + } +} + func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) { p := &parser{ stateDate: dateStart, @@ -2039,6 +2096,8 @@ func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parse retryAmbiguousDateWithSwap: false, } p.format = []byte(dateStr) + // this tracks how much of the format string has been set, to make sure all of it is set + p.formatSetLen = 0 // allow the options to mutate the parser fields from their defaults for _, option := range opts { @@ -2056,6 +2115,11 @@ func (p *parser) nextIs(i int, b byte) bool { return false } +func (p *parser) setEntireFormat(format []byte) { + p.format = format + p.formatSetLen = len(format) +} + func (p *parser) set(start int, val string) { if start < 0 { return @@ -2066,44 +2130,91 @@ func (p *parser) set(start int, val string) { for i, r := range val { p.format[start+i] = byte(r) } + endingPos := start + len(val) + if endingPos > p.formatSetLen { + p.formatSetLen = endingPos + } } -func (p *parser) setMonth() { +func (p *parser) setMonth() bool { if p.molen == 2 { p.set(p.moi, "01") + return true } else if p.molen == 1 { p.set(p.moi, "1") + return true + } else { + return false } } -func (p *parser) setDay() { +func (p *parser) setDay() bool { if p.daylen == 2 { p.set(p.dayi, "02") + return true } else if p.daylen == 1 { p.set(p.dayi, "2") + return true + } else { + return false } } -func (p *parser) setYear() { +func (p *parser) setYear() bool { if p.yearlen == 2 { p.set(p.yeari, "06") + return true } else if p.yearlen == 4 { p.set(p.yeari, "2006") + return true + } else { + return false + } +} + +// Find the proper end of the current component (scanning chars starting from start and going +// up until the end, and either returning at end or returning the first character that is +// not allowed, as determined by allowNumeric, allowAlpha, and allowOther) +func findProperEnd(s string, start, end int, allowNumeric bool, allowAlpha bool, allowOther bool) int { + for i := start; i < end; i++ { + c := s[i] + if c >= '0' && c <= '9' { + if !allowNumeric { + return i + } + } else if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') { + if !allowAlpha { + return i + } + } else { + if !allowOther { + return i + } + } } + return end } -func (p *parser) coalesceDate(end int) { + +func (p *parser) coalesceDate(end int) bool { if p.yeari > 0 { if p.yearlen == 0 { - p.yearlen = end - p.yeari + p.yearlen = findProperEnd(p.datestr, p.yeari, end, true, false, false) - p.yeari + } + if !p.setYear() { + return false } - p.setYear() } if p.moi > 0 && p.molen == 0 { - p.molen = end - p.moi + p.molen = findProperEnd(p.datestr, p.moi, end, true, true, false) - p.moi + // The month may be the name of the month, so don't treat as invalid in this case. + // We can ignore the return value here. p.setMonth() } if p.dayi > 0 && p.daylen == 0 { - p.daylen = end - p.dayi - p.setDay() + p.daylen = findProperEnd(p.datestr, p.dayi, end, true, false, false) - p.dayi + if !p.setDay() { + return false + } } + return true } func (p *parser) ts() string { return fmt.Sprintf("h:(%d:%d) m:(%d:%d) s:(%d:%d)", p.houri, p.hourlen, p.mini, p.minlen, p.seci, p.seclen) @@ -2149,18 +2260,41 @@ func (p *parser) coalesceTime(end int) { for i := 0; i < p.mslen; i++ { p.format[p.msi+i] = '0' } + endPos := p.msi + p.mslen + if endPos > p.formatSetLen { + p.formatSetLen = endPos + } } } func (p *parser) setFullMonth(month string) { - if p.moi == 0 { - p.format = []byte(fmt.Sprintf("%s%s", "January", p.format[len(month):])) + oldLen := len(p.format) + const fullMonth = "January" + p.format = []byte(fmt.Sprintf("%s%s%s", p.format[0:p.moi], fullMonth, p.format[p.moi+len(month):])) + newLen := len(p.format) + if newLen > oldLen && p.formatSetLen >= p.moi { + p.formatSetLen += newLen - oldLen + } else if newLen < oldLen && p.formatSetLen >= p.moi { + p.formatSetLen -= oldLen - newLen + } + + if p.formatSetLen > len(p.format) { + p.formatSetLen = len(p.format) + } else if p.formatSetLen < len(fullMonth) { + p.formatSetLen = len(fullMonth) + } else if p.formatSetLen < 0 { + p.formatSetLen = 0 } } -func (p *parser) trimExtra() { +func (p *parser) trimExtra(onlyTrimFormat bool) { if p.extra > 0 && len(p.format) > p.extra { p.format = p.format[0:p.extra] - p.datestr = p.datestr[0:p.extra] + if p.formatSetLen > len(p.format) { + p.formatSetLen = len(p.format) + } + if !onlyTrimFormat { + p.datestr = p.datestr[0:p.extra] + } } } @@ -2171,8 +2305,23 @@ func (p *parser) parse() (time.Time, error) { if len(p.fullMonth) > 0 { p.setFullMonth(p.fullMonth) } + + // Make sure that the entire string matched to a known format that was detected + if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) { + // We can always ignore punctuation at the end of a date/time, but do not allow + // any numbers or letters in the format string. + validFormatTo := findProperEnd(string(p.format), p.formatSetLen, len(p.format), false, false, true) + if validFormatTo < len(p.format) { + return time.Time{}, unexpectedTail(string(p.format[p.formatSetLen:])) + } + } + if p.skip > 0 && len(p.format) > p.skip { p.format = p.format[p.skip:] + p.formatSetLen -= p.skip + if p.formatSetLen < 0 { + p.formatSetLen = 0 + } p.datestr = p.datestr[p.skip:] } diff --git a/parseany_test.go b/parseany_test.go index 0193b52..4989161 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -15,8 +15,8 @@ func TestOne(t *testing.T) { } type dateTest struct { - in, out, loc string - err bool + in, out, loc, zname string + err bool } var testInputs = []dateTest{ @@ -44,47 +44,72 @@ var testInputs = []dateTest{ {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, // ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC" - {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC"}, - {in: "Mon 30 Sep 2018 09:09:09 PM UTC", out: "2018-09-30 21:09:09 +0000 UTC"}, + {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC", zname: "UTC"}, + {in: "Mon 02 Jan 2006 03:04:05 PM CEST", out: "2006-01-02 15:04:05 +0000 UTC", zname: "CEST"}, + {in: "Mon 30 Sep 2018 09:09:09 PM UTC", out: "2018-09-30 21:09:09 +0000 UTC", zname: "UTC"}, + {in: "Mon 30 Sep 2018 09:09:09 PM CEST", out: "2018-09-30 21:09:09 +0000 UTC", zname: "CEST"}, // RubyDate = "Mon Jan 02 15:04:05 -0700 2006" {in: "Mon Jan 02 15:04:05 -0700 2006", out: "2006-01-02 22:04:05 +0000 UTC"}, {in: "Thu May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"}, // UnixDate = "Mon Jan _2 15:04:05 MST 2006" - {in: "Mon Jan 2 15:04:05 MST 2006", out: "2006-01-02 15:04:05 +0000 UTC"}, - {in: "Thu May 8 17:57:51 MST 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, - {in: "Thu May 8 17:57:51 PST 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, - {in: "Thu May 08 17:57:51 PST 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, - {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, - {in: "Thu May 08 05:05:07 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC"}, - {in: "Thu May 08 5:5:7 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC"}, + {in: "Mon Jan 2 15:04:05 MST 2006", out: "2006-01-02 15:04:05 +0000 UTC", zname: "MST"}, + {in: "Thu May 8 17:57:51 MST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "MST"}, + {in: "Thu May 8 17:57:51 PST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "PST"}, + {in: "Thu May 08 17:57:51 PST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "PST"}, + {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "CEST"}, + {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 15:57:51 +0000 UTC", loc: "Europe/Berlin"}, + {in: "Thu May 08 05:05:07 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, + {in: "Thu May 08 5:5:7 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, // Day Month dd time - {in: "Mon Aug 10 15:44:11 UTC+0000 2015", out: "2015-08-10 15:44:11 +0000 UTC"}, - {in: "Mon Aug 10 15:44:11 PST-0700 2015", out: "2015-08-10 22:44:11 +0000 UTC"}, - {in: "Mon Aug 10 15:44:11 CEST+0200 2015", out: "2015-08-10 13:44:11 +0000 UTC"}, - {in: "Mon Aug 1 15:44:11 CEST+0200 2015", out: "2015-08-01 13:44:11 +0000 UTC"}, - {in: "Mon Aug 1 5:44:11 CEST+0200 2015", out: "2015-08-01 03:44:11 +0000 UTC"}, + {in: "Mon Aug 10 15:44:11 UTC+0000 2015", out: "2015-08-10 15:44:11 +0000 UTC", zname: "UTC"}, + {in: "Mon Aug 10 15:44:11 PST-0700 2015", out: "2015-08-10 22:44:11 +0000 UTC", zname: "PST"}, + {in: "Mon Aug 10 15:44:11 CEST+0200 2015", out: "2015-08-10 13:44:11 +0000 UTC", zname: "CEST"}, + {in: "Mon Aug 1 15:44:11 CEST+0200 2015", out: "2015-08-01 13:44:11 +0000 UTC", zname: "CEST"}, + {in: "Mon Aug 1 5:44:11 CEST+0200 2015", out: "2015-08-01 03:44:11 +0000 UTC", zname: "CEST"}, // ?? {in: "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", out: "2015-07-03 17:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 GMT+0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, - {in: "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC"}, + {in: "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "PST"}, + {in: "Fri Jul 3 2015 06:04:07 CEST-0700 (Central European Summer Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "CEST"}, // Month dd, yyyy at time - {in: "September 17, 2012 at 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC"}, - {in: "September 17, 2012 at 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC"}, + {in: "January 17, 2012 at 18:17:16", out: "2012-01-17 18:17:16 +0000 UTC"}, + {in: "February 17, 2012 at 18:17:16", out: "2012-02-17 18:17:16 +0000 UTC"}, + {in: "march 17, 2012 at 18:17:16", out: "2012-03-17 18:17:16 +0000 UTC"}, + {in: "APRIL 17, 2012 at 18:17:16", out: "2012-04-17 18:17:16 +0000 UTC"}, + {in: "May 17, 2012 at 18:17:16", out: "2012-05-17 18:17:16 +0000 UTC"}, + {in: "June 17, 2012 at 18:17:16", out: "2012-06-17 18:17:16 +0000 UTC"}, + {in: "July 17, 2012 at 18:17:16", out: "2012-07-17 18:17:16 +0000 UTC"}, + {in: "august 17, 2012 at 18:17:16", out: "2012-08-17 18:17:16 +0000 UTC"}, + {in: "September 17, 2012 at 18:17:16", out: "2012-09-17 18:17:16 +0000 UTC"}, + {in: "OCTober 17, 2012 at 18:17:16", out: "2012-10-17 18:17:16 +0000 UTC"}, + {in: "noVEMBER 17, 2012 at 18:17:16", out: "2012-11-17 18:17:16 +0000 UTC"}, + {in: "December 17, 2012 at 18:17:16", out: "2012-12-17 18:17:16 +0000 UTC"}, + {in: "September 17, 2012 at 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17, 2012 at 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, + {in: "September 17, 2012 at 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17, 2012, 10:10:09", out: "2012-09-17 10:10:09 +0000 UTC"}, - {in: "May 17, 2012 at 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC"}, - {in: "May 17, 2012 AT 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC"}, + {in: "May 17, 2012 at 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC", zname: "PST"}, + {in: "May 17, 2012 AT 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC", zname: "PST"}, + {in: "May 17, 2012 AT 10:09am CEST+02", out: "2012-05-17 08:09:00 +0000 UTC", zname: "CEST"}, // Month dd, yyyy time - {in: "September 17, 2012 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC"}, - {in: "September 17, 2012 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC"}, + {in: "September 17, 2012 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17, 2012 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, + {in: "September 17, 2012 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17, 2012 09:01:00", out: "2012-09-17 09:01:00 +0000 UTC"}, // Month dd yyyy time - {in: "September 17 2012 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC"}, - {in: "September 17 2012 5:00pm UTC-0500", out: "2012-09-17 17:00:00 +0000 UTC"}, - {in: "September 17 2012 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC"}, - {in: "September 17 2012 5:00PM UTC-05", out: "2012-09-17 17:00:00 +0000 UTC"}, - {in: "September 17 2012 10:09AM PST-08", out: "2012-09-17 18:09:00 +0000 UTC"}, + {in: "September 17 2012 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17 2012 5:00pm UTC-0500", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17 2012 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, + {in: "September 17 2012 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, + {in: "September 17 2012 5:00PM UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17 2012 10:09AM PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, + {in: "September 17 2012 10:09AM CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17 2012 09:01:00", out: "2012-09-17 09:01:00 +0000 UTC"}, {in: "May 17, 2012 10:10:09", out: "2012-05-17 10:10:09 +0000 UTC"}, + {in: "July 30 2022 08:33:53 AM PST", out: "2022-07-30 08:33:53 +0000 UTC", zname: "PST"}, + {in: "July 30 2022 08:33:53 AM CEST", out: "2022-07-30 08:33:53 +0000 UTC", zname: "CEST"}, + {in: "July 30 2022 08:33:53 PM PST", out: "2022-07-30 20:33:53 +0000 UTC", zname: "PST"}, + {in: "July 30 2022 08:33:53 PM CEST", out: "2022-07-30 20:33:53 +0000 UTC", zname: "CEST"}, // Month dd, yyyy {in: "September 17, 2012", out: "2012-09-17 00:00:00 +0000 UTC"}, {in: "May 7, 2012", out: "2012-05-07 00:00:00 +0000 UTC"}, @@ -107,13 +132,17 @@ var testInputs = []dateTest{ {in: "June 22nd, 2012", out: "2012-06-22 00:00:00 +0000 UTC"}, {in: "June 22nd 2012", out: "2012-06-22 00:00:00 +0000 UTC"}, // RFC1123 = "Mon, 02 Jan 2006 15:04:05 MST" - {in: "Fri, 03 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, - //{in: "Fri, 03 Jul 2015 08:08:08 CET", out: "2015-07-03 08:08:08 +0000 UTC"}, + {in: "Fri, 03 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 03 Jul 2015 08:08:08 CET", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CET"}, {in: "Fri, 03 Jul 2015 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles"}, - {in: "Fri, 03 Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC"}, - {in: "Fri, 3 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, - {in: "Fri, 03 Jul 2015 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, - {in: "Fri, 03 Jul 2015 8:8:8 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, + {in: "Fri, 03 Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, + {in: "Fri, 03 Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 3 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 3 Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 03 Jul 2015 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 03 Jul 2015 8:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 03 Jul 2015 8:8:8 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 03 Jul 2015 8:8:8 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, // ? {in: "Thu, 03 Jul 2017 08:08:04 +0100", out: "2017-07-03 07:08:04 +0000 UTC"}, {in: "Thu, 03 Jul 2017 08:08:04 -0100", out: "2017-07-03 09:08:04 +0000 UTC"}, @@ -122,21 +151,28 @@ var testInputs = []dateTest{ {in: "Thu, 03 Jul 2017 8:8:4 +0100", out: "2017-07-03 07:08:04 +0000 UTC"}, // {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC"}, - {in: "Tue, 5 Jul 2017 04:08:03 -0700 (CEST)", out: "2017-07-05 11:08:03 +0000 UTC"}, + {in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC"}, {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", loc: "Europe/Berlin"}, // day, dd-Mon-yy hh:mm:zz TZ - {in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, + {in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 03-Jul-15 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles"}, - {in: "Fri, 03-Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC"}, - {in: "Fri, 3-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, - {in: "Fri, 03-Jul-15 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, - {in: "Fri, 03-Jul-15 8:8:8 MST", out: "2015-07-03 08:08:08 +0000 UTC"}, + {in: "Fri, 03-Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, + {in: "Fri, 03-Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 3-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 3-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 03-Jul-15 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 03-Jul-15 8:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 03-Jul-15 8:8:8 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, + {in: "Fri, 03-Jul-15 8:8:8 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, // day, dd-Mon-yy hh:mm:zz TZ (text) https://github.com/araddon/dateparse/issues/116 {in: "Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)", out: "2021-01-02 16:12:23 +0000 UTC"}, // RFC850 = "Monday, 02-Jan-06 15:04:05 MST" - {in: "Wednesday, 07-May-09 08:00:43 MST", out: "2009-05-07 08:00:43 +0000 UTC"}, - {in: "Wednesday, 28-Feb-18 09:01:00 MST", out: "2018-02-28 09:01:00 +0000 UTC"}, + {in: "Wednesday, 07-May-09 08:00:43 MST", out: "2009-05-07 08:00:43 +0000 UTC", zname: "MST"}, + {in: "Wednesday, 07-May-09 08:00:43 CEST", out: "2009-05-07 08:00:43 +0000 UTC", zname: "CEST"}, + {in: "Wednesday, 28-Feb-18 09:01:00 MST", out: "2018-02-28 09:01:00 +0000 UTC", zname: "MST"}, {in: "Wednesday, 28-Feb-18 09:01:00 MST", out: "2018-02-28 16:01:00 +0000 UTC", loc: "America/Denver"}, + {in: "Wednesday, 28-Feb-18 09:01:00 CEST", out: "2018-02-28 09:01:00 +0000 UTC", zname: "CEST"}, // with offset then with variations on non-zero filled stuff {in: "Monday, 02 Jan 2006 15:04:05 +0100", out: "2006-01-02 14:04:05 +0000 UTC"}, {in: "Wednesday, 28 Feb 2018 09:01:00 -0300", out: "2018-02-28 12:01:00 +0000 UTC"}, @@ -154,7 +190,8 @@ var testInputs = []dateTest{ {in: "7 Feb 2004 9:7:8", out: "2004-02-07 09:07:08 +0000 UTC"}, {in: "07 Feb 2004 09:07:08.123", out: "2004-02-07 09:07:08.123 +0000 UTC"}, // dd-mon-yyyy 12 Feb 2006, 19:17:08 GMT - {in: "07 Feb 2004, 09:07:07 GMT", out: "2004-02-07 09:07:07 +0000 UTC"}, + {in: "07 Feb 2004, 09:07:07 GMT", out: "2004-02-07 09:07:07 +0000 UTC", zname: "GMT"}, + {in: "07 Feb 2004, 09:07:07 CEST", out: "2004-02-07 09:07:07 +0000 UTC", zname: "CEST"}, // dd-mon-yyyy 12 Feb 2006, 19:17:08 +0100 {in: "07 Feb 2004, 09:07:07 +0100", out: "2004-02-07 08:07:07 +0000 UTC"}, // dd-mon-yyyy 12-Feb-2006 19:17:08 @@ -197,13 +234,29 @@ var testInputs = []dateTest{ {in: "04:02:2014 04:08:09.12312", out: "2014-04-02 04:08:09.12312 +0000 UTC"}, {in: "04:02:2014 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"}, // mm/dd/yy hh:mm:ss AM + {in: "04/02/2014 04:08:09am", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "04/02/2014 04:08:09AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09 AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, + {in: "04/02/2014 04:08:09 AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, + {in: "04/02/2014 04:08:09pm", out: "2014-04-02 16:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09 PM", out: "2014-04-02 16:08:09 +0000 UTC"}, + {in: "04/02/2014 04:08:09PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09 PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09pm CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"}, + {in: "04/02/2014 04:08:09 PM CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"}, + {in: "04/02/2014 04:08am", out: "2014-04-02 04:08:00 +0000 UTC"}, {in: "04/02/2014 04:08 AM", out: "2014-04-02 04:08:00 +0000 UTC"}, + {in: "04/02/2014 04:08pm", out: "2014-04-02 16:08:00 +0000 UTC"}, {in: "04/02/2014 04:08 PM", out: "2014-04-02 16:08:00 +0000 UTC"}, + {in: "04/02/2014 4:8AM", out: "2014-04-02 04:08:00 +0000 UTC"}, {in: "04/02/2014 4:8 AM", out: "2014-04-02 04:08:00 +0000 UTC"}, + {in: "04/02/2014 4:8pm", out: "2014-04-02 16:08:00 +0000 UTC"}, {in: "04/02/2014 4:8 PM", out: "2014-04-02 16:08:00 +0000 UTC"}, + {in: "04/02/2014 04:08:09.123am", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "04/02/2014 04:08:09.123 AM", out: "2014-04-02 04:08:09.123 +0000 UTC"}, + {in: "04/02/2014 04:08:09.123PM", out: "2014-04-02 16:08:09.123 +0000 UTC"}, {in: "04/02/2014 04:08:09.123 PM", out: "2014-04-02 16:08:09.123 +0000 UTC"}, // yyyy/mm/dd {in: "2014/04/02", out: "2014-04-02 00:00:00 +0000 UTC"}, @@ -219,11 +272,24 @@ var testInputs = []dateTest{ {in: "2014/4/2 04:08:09", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "2014/04/02 04:08:09.123", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "2014/04/02 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"}, + {in: "2014/04/02 04:08:09am", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "2014/04/02 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "2014/03/31 04:08:09am", out: "2014-03-31 04:08:09 +0000 UTC"}, {in: "2014/03/31 04:08:09 AM", out: "2014-03-31 04:08:09 +0000 UTC"}, + {in: "2014/4/2 04:08:09AM", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "2014/4/2 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "2014/04/02 04:08:09.123am", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "2014/04/02 04:08:09.123 AM", out: "2014-04-02 04:08:09.123 +0000 UTC"}, + {in: "2014/04/02 04:08:09.123am PST", out: "2014-04-02 04:08:09.123 +0000 UTC", zname: "PST"}, + {in: "2014/04/02 04:08:09.123 AM PST", out: "2014-04-02 04:08:09.123 +0000 UTC", zname: "PST"}, + {in: "2014/04/02 04:08:09.123AM CEST", out: "2014-04-02 04:08:09.123 +0000 UTC", zname: "CEST"}, + {in: "2014/04/02 04:08:09.123 AM CEST", out: "2014-04-02 04:08:09.123 +0000 UTC", zname: "CEST"}, + {in: "2014/04/02 04:08:09.123pm", out: "2014-04-02 16:08:09.123 +0000 UTC"}, {in: "2014/04/02 04:08:09.123 PM", out: "2014-04-02 16:08:09.123 +0000 UTC"}, + {in: "2014/04/02 04:08:09.123PM PST", out: "2014-04-02 16:08:09.123 +0000 UTC", zname: "PST"}, + {in: "2014/04/02 04:08:09.123 PM PST", out: "2014-04-02 16:08:09.123 +0000 UTC", zname: "PST"}, + {in: "2014/04/02 04:08:09.123PM CEST", out: "2014-04-02 16:08:09.123 +0000 UTC", zname: "CEST"}, + {in: "2014/04/02 04:08:09.123 PM CEST", out: "2014-04-02 16:08:09.123 +0000 UTC", zname: "CEST"}, // dd/mon/yyyy:hh:mm:ss tz nginx-log? https://github.com/araddon/dateparse/issues/118 // 112.195.209.90 - - [20/Feb/2018:12:12:14 +0800] "GET / HTTP/1.1" 200 190 "-" "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Mobile Safari/537.36" "-" {in: "06/May/2008:08:11:17 -0700", out: "2008-05-06 15:11:17 +0000 UTC"}, @@ -256,11 +322,55 @@ var testInputs = []dateTest{ {in: "2014-04-02 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"}, {in: "2014-04-02 04:08:09.12312312", out: "2014-04-02 04:08:09.12312312 +0000 UTC"}, {in: "2014-04-02 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "2014-04-02 04:08:09 AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, + {in: "2014-04-02 04:08:09 AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, {in: "2014-03-31 04:08:09 AM", out: "2014-03-31 04:08:09 +0000 UTC"}, + {in: "2014-03-31 04:08:09 AM PST", out: "2014-03-31 04:08:09 +0000 UTC", zname: "PST"}, + {in: "2014-03-31 04:08:09 AM CEST", out: "2014-03-31 04:08:09 +0000 UTC", zname: "CEST"}, {in: "2014-04-26 05:24:37 PM", out: "2014-04-26 17:24:37 +0000 UTC"}, + {in: "2014-04-26 05:24:37 PM PST", out: "2014-04-26 17:24:37 +0000 UTC", zname: "PST"}, + {in: "2014-04-26 05:24:37 PM CEST", out: "2014-04-26 17:24:37 +0000 UTC", zname: "CEST"}, {in: "2014-4-2 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "2014-4-2 04:08:09 AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, + {in: "2014-4-2 04:08:09 AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, {in: "2014-04-02 04:08:09.123 AM", out: "2014-04-02 04:08:09.123 +0000 UTC"}, + {in: "2014-04-02 04:08:09.123 AM PST", out: "2014-04-02 04:08:09.123 +0000 UTC", zname: "PST"}, + {in: "2014-04-02 04:08:09.123 AM CEST", out: "2014-04-02 04:08:09.123 +0000 UTC", zname: "CEST"}, {in: "2014-04-02 04:08:09.123 PM", out: "2014-04-02 16:08:09.123 +0000 UTC"}, + {in: "2014-04-02 04:08:09.123 PM PST", out: "2014-04-02 16:08:09.123 +0000 UTC", zname: "PST"}, + {in: "2014-04-02 04:08:09.123 PM CEST", out: "2014-04-02 16:08:09.123 +0000 UTC", zname: "CEST"}, + // https://github.com/araddon/dateparse/issues/150 + {in: "2023-01-04 12:01am", out: "2023-01-04 00:01:00 +0000 UTC"}, + {in: "2023-01-04 12:01 AM", out: "2023-01-04 00:01:00 +0000 UTC"}, + {in: "2023-01-04 12:01:59 AM", out: "2023-01-04 00:01:59 +0000 UTC"}, + {in: "2023-01-04 12:01:59.765 AM", out: "2023-01-04 00:01:59.765 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/157 + {in: "Thu Jan 28 2021 15:28:21 GMT+0000 (Coordinated Universal Time)", out: "2021-01-28 15:28:21 +0000 UTC"}, + {in: "Thu Jan 28 2021 15:28:21 GMT+0100 (Coordinated Universal Time)", out: "2021-01-28 14:28:21 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/130 + {in: "1985-04-12T23:20:50Z", out: "1985-04-12 23:20:50 +0000 UTC"}, + {in: "1985-04-12T23:20:50.52Z", out: "1985-04-12 23:20:50.52 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/123 + {in: "2017-04-03 22:32:14.322 CET", out: "2017-04-03 22:32:14.322 +0000 UTC", zname: "CET"}, + {in: "2017-04-03 22:32:14 CET", out: "2017-04-03 22:32:14 +0000 UTC", zname: "CET"}, + {in: "Mon Dec 26 16:22:08 2016", out: "2016-12-26 16:22:08 +0000 UTC"}, + {in: "Mon Dec 26 16:15:55.103786 2016", out: "2016-12-26 16:15:55.103786 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/109 + {in: "Sun, 07 Jun 2020 00:00:00 +0100", out: "2020-06-06 23:00:00 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/100#issuecomment-1118868154 + {in: "1 Apr 2022 23:59", out: "2022-04-01 23:59:00 +0000 UTC"}, + {in: "1 JANuary 2022 23:59", out: "2022-01-01 23:59:00 +0000 UTC"}, + {in: "1 february 2022 23:59", out: "2022-02-01 23:59:00 +0000 UTC"}, + {in: "1 marCH 2022 23:59", out: "2022-03-01 23:59:00 +0000 UTC"}, + {in: "1 April 2022 23:59", out: "2022-04-01 23:59:00 +0000 UTC"}, + {in: "1 May 2022 23:59", out: "2022-05-01 23:59:00 +0000 UTC"}, + {in: "1 JuNe 2022 23:59", out: "2022-06-01 23:59:00 +0000 UTC"}, + {in: "1 JULY 2022 23:59", out: "2022-07-01 23:59:00 +0000 UTC"}, + {in: "1 august 2022 23:59", out: "2022-08-01 23:59:00 +0000 UTC"}, + {in: "1 September 2022 23:59", out: "2022-09-01 23:59:00 +0000 UTC"}, + {in: "1 October 2022 23:59", out: "2022-10-01 23:59:00 +0000 UTC"}, + {in: "1 November 2022 23:59", out: "2022-11-01 23:59:00 +0000 UTC"}, + {in: "1 December 2022 23:59", out: "2022-12-01 23:59:00 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 @@ -297,38 +407,44 @@ var testInputs = []dateTest{ {in: "2014-04-26 17:24:37.1 +00:00", out: "2014-04-26 17:24:37.1 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 TZ // Golang Native Format - {in: "2012-08-03 18:31:59 +0000 UTC", out: "2012-08-03 18:31:59 +0000 UTC"}, + {in: "2012-08-03 18:31:59 +0000 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 13:31:59 -0600 MST", out: "2012-08-03 19:31:59 +0000 UTC", loc: "America/Denver"}, - {in: "2015-02-18 00:12:00 +0000 UTC", out: "2015-02-18 00:12:00 +0000 UTC"}, - {in: "2015-02-18 00:12:00 +0000 GMT", out: "2015-02-18 00:12:00 +0000 UTC"}, + {in: "2015-02-18 00:12:00 +0000 UTC", out: "2015-02-18 00:12:00 +0000 UTC", zname: "UTC"}, + {in: "2015-02-18 00:12:00 +0000 GMT", out: "2015-02-18 00:12:00 +0000 UTC", zname: "GMT"}, {in: "2015-02-08 03:02:00 +0200 CEST", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin"}, - {in: "2015-02-08 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC"}, - {in: "2015-2-08 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC"}, - {in: "2015-02-8 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC"}, - {in: "2015-2-8 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC"}, - {in: "2012-08-03 18:31:59.257000000 +0000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2012-08-03 8:1:59.257000000 +0000 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC"}, - {in: "2012-8-03 18:31:59.257000000 +0000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2012-8-3 18:31:59.257000000 +0000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2014-04-26 17:24:37.123456 +0000 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC"}, - {in: "2014-04-26 17:24:37.12 +0000 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC"}, - {in: "2014-04-26 17:24:37.1 +0000 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC"}, + {in: "2015-02-08 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, + {in: "2015-2-08 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, + {in: "2015-02-8 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, + {in: "2015-2-8 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, + {in: "2012-08-03 18:31:59.257000000 +0000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-08-03 8:1:59.257000000 +0000 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-03 18:31:59.257000000 +0000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-3 18:31:59.257000000 +0000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.123456 +0000 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.12 +0000 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.1 +0000 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "UTC"}, {in: "2015-02-08 03:02:00 +0200 CEST m=+0.000000001", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin"}, - {in: "2015-02-08 03:02:00 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00 +0000 UTC"}, - {in: "2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00.001 +0000 UTC"}, + {in: "2015-02-08 03:02:00 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, + {in: "2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00.001 +0000 UTC", zname: "MSK"}, // yyyy-mm-dd hh:mm:ss TZ - {in: "2012-08-03 18:31:59 UTC", out: "2012-08-03 18:31:59 +0000 UTC"}, - {in: "2014-12-16 06:20:00 GMT", out: "2014-12-16 06:20:00 +0000 UTC"}, + {in: "2012-08-03 18:31:59 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, + {in: "2012-08-03 18:31:59 CEST", out: "2012-08-03 18:31:59 +0000 UTC", zname: "CEST"}, + {in: "2014-12-16 06:20:00 GMT", out: "2014-12-16 06:20:00 +0000 UTC", zname: "GMT"}, {in: "2012-08-03 13:31:59 MST", out: "2012-08-03 20:31:59 +0000 UTC", loc: "America/Denver"}, - {in: "2012-08-03 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2012-08-03 8:1:59.257000000 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC"}, - {in: "2012-8-03 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2012-8-3 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2014-04-26 17:24:37.123456 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC"}, - {in: "2014-04-26 17:24:37.12 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC"}, - {in: "2014-04-26 17:24:37.1 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC"}, + {in: "2012-08-03 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-08-03 8:1:59.257000000 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-03 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-3 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-3 18:31:59.257000000 CEST", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "CEST"}, + {in: "2014-04-26 17:24:37.123456 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.123456 CEST", out: "2014-04-26 17:24:37.123456 +0000 UTC", zname: "CEST"}, + {in: "2014-04-26 17:24:37.123456Z", out: "2014-04-26 17:24:37.123456 +0000 UTC"}, + {in: "2014-04-26 17:24:37.12 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.12 CEST", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "CEST"}, + {in: "2014-04-26 17:24:37.1 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.1 CEST", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "CEST"}, // This one is pretty special, it is TIMEZONE based but starts with P to emulate collions with PM - {in: "2014-04-26 05:24:37 PST", out: "2014-04-26 05:24:37 +0000 UTC"}, + {in: "2014-04-26 05:24:37 PST", out: "2014-04-26 05:24:37 +0000 UTC", zname: "PST"}, {in: "2014-04-26 05:24:37 PST", out: "2014-04-26 13:24:37 +0000 UTC", loc: "America/Los_Angeles"}, // yyyy-mm-dd hh:mm:ss+00:00 {in: "2012-08-03 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, @@ -339,18 +455,26 @@ var testInputs = []dateTest{ {in: "08:03:2012 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss.000+00:00 PST {in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles"}, + {in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin"}, // yyyy-mm-dd hh:mm:ss +00:00 TZ - {in: "2012-08-03 18:31:59 +00:00 UTC", out: "2012-08-03 18:31:59 +0000 UTC"}, + {in: "2012-08-03 18:31:59 +00:00 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 13:31:51 -07:00 MST", out: "2012-08-03 20:31:51 +0000 UTC", loc: "America/Denver"}, - {in: "2012-08-03 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, + {in: "2012-08-03 13:31:51 +02:00 CEST", out: "2012-08-03 11:31:51 +0000 UTC", loc: "Europe/Berlin"}, + {in: "2012-08-03 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 13:31:51.123 -08:00 PST", out: "2012-08-03 21:31:51.123 +0000 UTC", loc: "America/Los_Angeles"}, {in: "2012-08-03 13:31:51.123 +02:00 CEST", out: "2012-08-03 11:31:51.123 +0000 UTC", loc: "Europe/Berlin"}, - {in: "2012-08-03 8:1:59.257000000 +00:00 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC"}, - {in: "2012-8-03 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2012-8-3 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC"}, - {in: "2014-04-26 17:24:37.123456 +00:00 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC"}, - {in: "2014-04-26 17:24:37.12 +00:00 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC"}, - {in: "2014-04-26 17:24:37.1 +00:00 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC"}, + {in: "2012-08-03 8:1:59.257000000 +00:00 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-08-03 8:1:59.257000000 +00:00 CEST", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "CEST"}, + {in: "2012-8-03 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-03 18:31:59.257000000 +00:00 CEST", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "CEST"}, + {in: "2012-8-3 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, + {in: "2012-8-3 18:31:59.257000000 +00:00 CEST", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "CEST"}, + {in: "2014-04-26 17:24:37.123456 +00:00 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.123456 +00:00 CEST", out: "2014-04-26 17:24:37.123456 +0000 UTC", zname: "CEST"}, + {in: "2014-04-26 17:24:37.12 +00:00 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.12 +00:00 CEST", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "CEST"}, + {in: "2014-04-26 17:24:37.1 +00:00 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "UTC"}, + {in: "2014-04-26 17:24:37.1 +00:00 CEST", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "CEST"}, // yyyy-mm-ddThh:mm:ss {in: "2009-08-12T22:15:09", out: "2009-08-12 22:15:09 +0000 UTC"}, {in: "2009-08-08T02:08:08", out: "2009-08-08 02:08:08 +0000 UTC"}, @@ -382,8 +506,8 @@ var testInputs = []dateTest{ {in: "2016-06-21T19:55+0100", out: "2016-06-21 18:55:00 +0000 UTC"}, {in: "2016-06-21T19:55+0130", out: "2016-06-21 18:25:00 +0000 UTC"}, // yyyy-mm-ddThh:mm:ss:000+0000 - weird format with additional colon in front of milliseconds + {in: "2012-08-17T18:31:59:257", out: "2012-08-17 18:31:59.257 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/137 {in: "2012-08-17T18:31:59:257+0100", out: "2012-08-17 17:31:59.257 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/117 - // yyyy-mm-ddThh:mm:ssZ {in: "2009-08-12T22:15Z", out: "2009-08-12 22:15:00 +0000 UTC"}, {in: "2009-08-12T22:15:09Z", out: "2009-08-12 22:15:09 +0000 UTC"}, @@ -409,6 +533,8 @@ var testInputs = []dateTest{ // 080313 05:21:55 mysqld started // 080313 5:21:55 InnoDB: Started; log sequence number 0 43655 {in: "171113 14:14:20", out: "2017-11-13 14:14:20 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/94 + {in: "190910 11:51:49", out: "2019-09-10 11:51:49 +0000 UTC"}, // all digits: unix secs, ms etc {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC"}, @@ -461,6 +587,10 @@ func TestParse(t *testing.T) { if th.out != got { t.Fatalf("whoops, got %s, expected %s", got, th.out) } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + } } else { ts = MustParse(th.in) got := fmt.Sprintf("%v", ts.In(time.UTC)) @@ -468,6 +598,10 @@ func TestParse(t *testing.T) { if th.out != got { t.Fatalf("whoops, got %s, expected %s", got, th.out) } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + } } }) } @@ -538,15 +672,29 @@ var testParseErrors = []dateTest{ {in: "29-06-2016", err: true}, // this is just testing the empty space up front {in: " 2018-01-02 17:08:09 -07:00", err: true}, + // a semantic version number should not be interpreted as a date + {in: "1.22.3-78888", err: true}, + // a semantic version number that starts with a date should not be interpreted as a date + {in: "1.22.2023-78888", err: true}, + // https://github.com/araddon/dateparse/issues/145 + {in: "dataddo, faces, bug", err: true}, + // https://github.com/araddon/dateparse/issues/108 + {in: "1.jpg", err: true}, + // https://github.com/araddon/dateparse/issues/98 + {in: "Wayne, Bruce", err: true}, + {in: "Miami, Florida", err: true}, + {in: "Doe, John", err: true}, } func TestParseErrors(t *testing.T) { for _, th := range testParseErrors { - v, err := ParseAny(th.in) - assert.NotEqual(t, nil, err, "%v for %v", v, th.in) + t.Run(th.in, func(t *testing.T) { + v, err := ParseAny(th.in) + assert.NotEqual(t, nil, err, "%v for %v", v, th.in) - v, err = ParseAny(th.in, RetryAmbiguousDateWithSwap(true)) - assert.NotEqual(t, nil, err, "%v for %v", v, th.in) + v, err = ParseAny(th.in, RetryAmbiguousDateWithSwap(true)) + assert.NotEqual(t, nil, err, "%v for %v", v, th.in) + }) } } @@ -583,7 +731,7 @@ func TestParseLayout(t *testing.T) { {in: "2012-08-03 18:31:59 +0000 UTC", out: "2006-01-02 15:04:05 -0700 MST"}, // yyyy-mm-dd hh:mm:ss TZ {in: "2012-08-03 18:31:59 UTC", out: "2006-01-02 15:04:05 MST"}, - {in: "2012-08-03 18:31:59 CEST", out: "2006-01-02 15:04:05 MST"}, + {in: "2012-08-03 18:31:59 CEST", out: "2006-01-02 15:04:05 MST "}, // yyyy-mm-ddThh:mm:ss-07:00 {in: "2009-08-12T22:15:09-07:00", out: "2006-01-02T15:04:05-07:00"}, // yyyy-mm-ddThh:mm:ss-0700 @@ -593,45 +741,49 @@ func TestParseLayout(t *testing.T) { } for _, th := range testParseFormat { - l, err := ParseFormat(th.in) - if th.err { - assert.NotEqual(t, nil, err) - } else { - assert.Equal(t, nil, err) - assert.Equal(t, th.out, l, "for in=%v", th.in) - } + t.Run(th.in, func(t *testing.T) { + l, err := ParseFormat(th.in) + if th.err { + assert.NotEqual(t, nil, err) + } else { + assert.Equal(t, nil, err) + assert.Equal(t, th.out, l, "for in=%v", th.in) + } + }) } } var testParseStrict = []dateTest{ // dd-mon-yy 13-Feb-03 - {in: "03-03-14"}, + {in: "03-03-14", err: true}, // mm.dd.yyyy - {in: "3.3.2014"}, + {in: "3.3.2014", err: true}, // mm.dd.yy - {in: "08.09.71"}, + {in: "08.09.71", err: true}, // mm/dd/yyyy - {in: "3/5/2014"}, + {in: "3/5/2014", err: true}, // mm/dd/yy - {in: "08/08/71"}, - {in: "8/8/71"}, + {in: "08/08/71", err: true}, + {in: "8/8/71", err: true}, // mm/dd/yy hh:mm:ss - {in: "04/02/2014 04:08:09"}, - {in: "4/2/2014 04:08:09"}, + {in: "04/02/2014 04:08:09", err: true}, + {in: "4/2/2014 04:08:09", err: true}, + {in: `{"hello"}`, err: true}, + {in: "2009-08-12T22:15Z"}, } func TestParseStrict(t *testing.T) { for _, th := range testParseStrict { - _, err := ParseStrict(th.in) - assert.NotEqual(t, nil, err) + t.Run(th.in, func(t *testing.T) { + _, err := ParseStrict(th.in) + if th.err { + assert.NotEqual(t, nil, err) + } else { + assert.Equal(t, nil, err) + } + }) } - - _, err := ParseStrict(`{"hello"}`) - assert.NotEqual(t, nil, err) - - _, err = ParseStrict("2009-08-12T22:15Z") - assert.Equal(t, nil, err) } // Lets test to see how this performs using different Timezones/Locations From 3ebc8bc635b94e4fae022e27b7e349b70cf8a659 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 11 Dec 2023 23:46:44 -0700 Subject: [PATCH 30/62] Incorporate fix for dd.mm.yyyy format Incorporates PR https://github.com/araddon/dateparse/pull/133 from https://github.com/mehanizm to fix https://github.com/araddon/dateparse/issues/129 Adds test cases to verify the following are already fixed: * https://github.com/araddon/dateparse/issues/105 --- parseany.go | 50 +++++++++++++++++++++++++++++++++--------------- parseany_test.go | 33 ++++++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/parseany.go b/parseany.go index 700ffdf..52f5e1a 100644 --- a/parseany.go +++ b/parseany.go @@ -396,12 +396,24 @@ iterRunes: } } else { p.ambiguousMD = true - p.moi = 0 - p.molen = i - if !p.setMonth() { - return p, unknownErr(datestr) + if p.preferMonthFirst { + if p.molen == 0 { + // 03.31.2005 + p.molen = i + if !p.setMonth() { + return p, unknownErr(datestr) + } + p.dayi = i + 1 + } + } else { + if p.daylen == 0 { + p.daylen = i + if !p.setDay() { + return p, unknownErr(datestr) + } + p.moi = i + 1 + } } - p.dayi = i + 1 } case ' ': @@ -799,9 +811,17 @@ iterRunes: return p, unknownErr(datestr) } p.stateDate = dateDigitDotDot + } else if p.dayi == 0 && p.yearlen == 0 { + // 23.07.2002 + p.molen = i - p.moi + p.yeari = i + 1 + if !p.setMonth() { + return p, unknownErr(datestr) + } + p.stateDate = dateDigitDotDot } else { // 2018.09.30 - //p.molen = 2 + // p.molen = 2 p.molen = i - p.moi p.dayi = i + 1 if !p.setMonth() { @@ -2267,20 +2287,20 @@ func (p *parser) coalesceTime(end int) { } } func (p *parser) setFullMonth(month string) { - oldLen := len(p.format) - const fullMonth = "January" + oldLen := len(p.format) + const fullMonth = "January" p.format = []byte(fmt.Sprintf("%s%s%s", p.format[0:p.moi], fullMonth, p.format[p.moi+len(month):])) - newLen := len(p.format) + newLen := len(p.format) if newLen > oldLen && p.formatSetLen >= p.moi { - p.formatSetLen += newLen - oldLen + p.formatSetLen += newLen - oldLen } else if newLen < oldLen && p.formatSetLen >= p.moi { p.formatSetLen -= oldLen - newLen - } + } - if p.formatSetLen > len(p.format) { - p.formatSetLen = len(p.format) - } else if p.formatSetLen < len(fullMonth) { - p.formatSetLen = len(fullMonth) + if p.formatSetLen > len(p.format) { + p.formatSetLen = len(p.format) + } else if p.formatSetLen < len(fullMonth) { + p.formatSetLen = len(fullMonth) } else if p.formatSetLen < 0 { p.formatSetLen = 0 } diff --git a/parseany_test.go b/parseany_test.go index 4989161..c45a5ee 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -17,6 +17,8 @@ func TestOne(t *testing.T) { type dateTest struct { in, out, loc, zname string err bool + preferDayFirst bool + retryAmbiguous bool } var testInputs = []dateTest{ @@ -525,6 +527,32 @@ var testInputs = []dateTest{ {in: "03.31.2014", out: "2014-03-31 00:00:00 +0000 UTC"}, // mm.dd.yy {in: "08.21.71", out: "1971-08-21 00:00:00 +0000 UTC"}, + // dd.mm.yyyy (see https://github.com/araddon/dateparse/issues/129 and https://github.com/araddon/dateparse/issues/28 and https://github.com/araddon/dateparse/pull/133) + {in: "23.07.1938", out: "1938-07-23 00:00:00 +0000 UTC", retryAmbiguous: true}, + {in: "23.07.1938", out: "1938-07-23 00:00:00 +0000 UTC", preferDayFirst: true}, + {in: "23/07/1938", out: "1938-07-23 00:00:00 +0000 UTC", retryAmbiguous: true}, + {in: "23/07/1938", out: "1938-07-23 00:00:00 +0000 UTC", preferDayFirst: true}, + {in: "31/3/2014", out: "2014-03-31 00:00:00 +0000 UTC", retryAmbiguous: true}, + {in: "31/3/2014", out: "2014-03-31 00:00:00 +0000 UTC", preferDayFirst: true}, + {in: "31/03/2014", out: "2014-03-31 00:00:00 +0000 UTC", retryAmbiguous: true}, + {in: "31/03/2014", out: "2014-03-31 00:00:00 +0000 UTC", preferDayFirst: true}, + {in: "21/08/71", out: "1971-08-21 00:00:00 +0000 UTC", retryAmbiguous: true}, + {in: "21/08/71", out: "1971-08-21 00:00:00 +0000 UTC", preferDayFirst: true}, + {in: "1/8/71", out: "1971-01-08 00:00:00 +0000 UTC", preferDayFirst: false}, + {in: "1/8/71", out: "1971-08-01 00:00:00 +0000 UTC", preferDayFirst: true}, + {in: "8/4/2014 22:05", out: "2014-08-04 22:05:00 +0000 UTC", preferDayFirst: false}, + {in: "8/4/2014 22:05", out: "2014-04-08 22:05:00 +0000 UTC", preferDayFirst: true}, + {in: "08/04/2014 22:05", out: "2014-08-04 22:05:00 +0000 UTC", preferDayFirst: false}, + {in: "08/04/2014 22:05", out: "2014-04-08 22:05:00 +0000 UTC", preferDayFirst: true}, + {in: "2/04/2014 03:00:51", out: "2014-02-04 03:00:51 +0000 UTC", preferDayFirst: false}, + {in: "2/04/2014 03:00:51", out: "2014-04-02 03:00:51 +0000 UTC", preferDayFirst: true}, + {in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", retryAmbiguous: true}, + {in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, + {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", retryAmbiguous: true}, + {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", preferDayFirst: true}, + // https://github.com/araddon/dateparse/issues/105 + {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", retryAmbiguous: true}, + {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", preferDayFirst: true}, // yyyymmdd and similar {in: "2014", out: "2014-01-01 00:00:00 +0000 UTC"}, {in: "20140601", out: "2014-06-01 00:00:00 +0000 UTC"}, @@ -573,12 +601,13 @@ func TestParse(t *testing.T) { t.Fatalf("error: %s", r) } }() + parserOptions := []ParserOption{PreferMonthFirst(!th.preferDayFirst), RetryAmbiguousDateWithSwap(th.retryAmbiguous)} if len(th.loc) > 0 { loc, err := time.LoadLocation(th.loc) if err != nil { t.Fatalf("Expected to load location %q but got %v", th.loc, err) } - ts, err = ParseIn(th.in, loc) + ts, err = ParseIn(th.in, loc, parserOptions...) if err != nil { t.Fatalf("expected to parse %q but got %v", th.in, err) } @@ -592,7 +621,7 @@ func TestParse(t *testing.T) { assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) } } else { - ts = MustParse(th.in) + ts = MustParse(th.in, parserOptions...) got := fmt.Sprintf("%v", ts.In(time.UTC)) assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) if th.out != got { From c62ed15d7357bfe5d6a592087b107c5c95a9aa56 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 17:42:09 -0700 Subject: [PATCH 31/62] Support PMDT and AMT time zones Also disallow PM and AM from being specified twice in the string. Fixes https://github.com/araddon/dateparse/issues/149 --- parseany.go | 28 +++++++++++++++++++++------- parseany_test.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/parseany.go b/parseany.go index 52f5e1a..1dc0443 100644 --- a/parseany.go +++ b/parseany.go @@ -114,7 +114,6 @@ const ( timeOffset timeOffsetColon timeOffsetColonAlpha - timeAlpha timePeriod timePeriodAMPM timeZ @@ -1350,17 +1349,22 @@ iterRunes: } else { // Could be AM/PM isLower := r == 'a' || r == 'p' + isTwoLetterWord := ((i+2) == len(p.datestr) || p.nextIs(i+1, ' ')) switch { - case isLower && p.nextIs(i, 'm'): + case isLower && p.nextIs(i, 'm') && isTwoLetterWord && !p.parsedAMPM: p.coalesceTime(i) p.set(i, "pm") + p.parsedAMPM = true // skip 'm' i++ - case !isLower && p.nextIs(i, 'M'): + case !isLower && p.nextIs(i, 'M') && isTwoLetterWord && !p.parsedAMPM: p.coalesceTime(i) p.set(i, "PM") + p.parsedAMPM = true // skip 'M' i++ + default: + return p, unexpectedTail(p.datestr[i:]) } } case ' ': @@ -1526,8 +1530,11 @@ iterRunes: // timeWsAlpha // 00:12:00 PST // 15:44:11 UTC+0100 2015 - if r == 'm' || r == 'M' { - //return parse("2006-01-02 03:04:05 PM", p.datestr, loc) + isTwoLetterWord := ((i+1) == len(p.datestr) || p.nextIs(i, ' ')) + if (r == 'm' || r == 'M') && isTwoLetterWord { + if p.parsedAMPM { + return p, unexpectedTail(p.datestr[i:]) + } // This isn't a time zone after all... p.tzi = 0 p.stateTime = timeWsAMPM @@ -1536,6 +1543,7 @@ iterRunes: } else { p.set(i-1, "PM") } + p.parsedAMPM = true if p.hourlen == 2 { p.set(p.houri, "03") } else if p.hourlen == 1 { @@ -1668,21 +1676,26 @@ iterRunes: case 'a', 'A', 'p', 'P': // Could be AM/PM isLower := r == 'a' || r == 'p' + isTwoLetterWord := ((i+2) == len(p.datestr) || p.nextIs(i+1, ' ')) switch { - case isLower && p.nextIs(i, 'm'): + case isLower && p.nextIs(i, 'm') && isTwoLetterWord && !p.parsedAMPM: p.mslen = i - p.msi p.coalesceTime(i) p.set(i, "pm") + p.parsedAMPM = true // skip 'm' i++ p.stateTime = timePeriodAMPM - case !isLower && p.nextIs(i, 'M'): + case !isLower && p.nextIs(i, 'M') && isTwoLetterWord && !p.parsedAMPM: p.mslen = i - p.msi p.coalesceTime(i) p.set(i, "PM") + p.parsedAMPM = true // skip 'M' i++ p.stateTime = timePeriodAMPM + default: + return p, unexpectedTail(p.datestr[i:]) } default: if !unicode.IsDigit(r) { @@ -2053,6 +2066,7 @@ type parser struct { formatSetLen int datestr string fullMonth string + parsedAMPM bool skip int extra int part1Len int diff --git a/parseany_test.go b/parseany_test.go index c45a5ee..1a6398f 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -373,6 +373,19 @@ var testInputs = []dateTest{ {in: "1 October 2022 23:59", out: "2022-10-01 23:59:00 +0000 UTC"}, {in: "1 November 2022 23:59", out: "2022-11-01 23:59:00 +0000 UTC"}, {in: "1 December 2022 23:59", out: "2022-12-01 23:59:00 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/149 + {in: "2018-09-30 21:09:13 PMDT", out: "2018-09-30 21:09:13 +0000 UTC", zname: "PMDT"}, + {in: "2018-09-30 08:09:13 PM PMDT", out: "2018-09-30 20:09:13 +0000 UTC", zname: "PMDT"}, + {in: "2018-09-30 08:09:13pm PMDT", out: "2018-09-30 20:09:13 +0000 UTC", zname: "PMDT"}, + {in: "2018-09-30 21:09:13.123 PMDT", out: "2018-09-30 21:09:13.123 +0000 UTC", zname: "PMDT"}, + {in: "2018-09-30 08:09:13.123 PM PMDT", out: "2018-09-30 20:09:13.123 +0000 UTC", zname: "PMDT"}, + {in: "2018-09-30 08:09:13.123pm PMDT", out: "2018-09-30 20:09:13.123 +0000 UTC", zname: "PMDT"}, + {in: "2018-09-30 21:09:13 AMT", out: "2018-09-30 21:09:13 +0000 UTC", zname: "AMT"}, + {in: "2018-09-30 08:09:13 AM AMT", out: "2018-09-30 08:09:13 +0000 UTC", zname: "AMT"}, + {in: "2018-09-30 08:09:13am AMT", out: "2018-09-30 08:09:13 +0000 UTC", zname: "AMT"}, + {in: "2018-09-30 21:09:13.123 AMT", out: "2018-09-30 21:09:13.123 +0000 UTC", zname: "AMT"}, + {in: "2018-09-30 08:09:13.123 am AMT", out: "2018-09-30 08:09:13.123 +0000 UTC", zname: "AMT"}, + {in: "2018-09-30 08:09:13.123am AMT", out: "2018-09-30 08:09:13.123 +0000 UTC", zname: "AMT"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 @@ -713,6 +726,23 @@ var testParseErrors = []dateTest{ {in: "Wayne, Bruce", err: true}, {in: "Miami, Florida", err: true}, {in: "Doe, John", err: true}, + // https://github.com/araddon/dateparse/issues/149 + {in: "2018-09-30 21:09:13PMDT", err: true}, + {in: "2018-09-30 08:09:13pm PM", err: true}, + {in: "2018-09-30 08:09:13 PM PM", err: true}, + {in: "2018-09-30 08:09:13 PMDT PM", err: true}, + {in: "2018-09-30 21:09:13.123PMDT", err: true}, + {in: "2018-09-30 08:09:13.123PM pm", err: true}, + {in: "2018-09-30 08:09:13.123 pm PM", err: true}, + {in: "2018-09-30 08:09:13.123 PMDT pm", err: true}, + {in: "2018-09-30 21:09:13AMT", err: true}, + {in: "2018-09-30 08:09:13am AM", err: true}, + {in: "2018-09-30 08:09:13 AM AM", err: true}, + {in: "2018-09-30 08:09:13 AMT AM", err: true}, + {in: "2018-09-30 21:09:13.123AMT", err: true}, + {in: "2018-09-30 08:09:13.123AM am", err: true}, + {in: "2018-09-30 08:09:13.123 am AM", err: true}, + {in: "2018-09-30 08:09:13.123 AMDT am", err: true}, } func TestParseErrors(t *testing.T) { From 49f9259ee38d07981a58acd3778f9ae8be9565c1 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 20:18:58 -0700 Subject: [PATCH 32/62] Add support for dd[th,nd,st,rd] Month yyyy Incorporate PR https://github.com/araddon/dateparse/pull/128 from https://github.com/krhubert to fix https://github.com/araddon/dateparse/issues/127 --- parseany.go | 6 ++++++ parseany_test.go | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/parseany.go b/parseany.go index 1dc0443..74ca494 100644 --- a/parseany.go +++ b/parseany.go @@ -435,6 +435,12 @@ iterRunes: p.stateDate = dateDigitChineseYear case ',': return p, unknownErr(datestr) + case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': + // 1st January 2018 + // 2nd Jan 2018 23:59 + // st, rd, nd, st + p.stateDate = dateAlphaWsMonthSuffix + i-- default: continue } diff --git a/parseany_test.go b/parseany_test.go index 1a6398f..7940dd6 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -133,6 +133,14 @@ var testInputs = []dateTest{ {in: "June 2nd 2012", out: "2012-06-02 00:00:00 +0000 UTC"}, {in: "June 22nd, 2012", out: "2012-06-22 00:00:00 +0000 UTC"}, {in: "June 22nd 2012", out: "2012-06-22 00:00:00 +0000 UTC"}, + // Incorporate PR https://github.com/araddon/dateparse/pull/128 to fix https://github.com/araddon/dateparse/issues/127 + // dd[th,nd,st,rd] Month yyyy + {in: "1st September 2012", out: "2012-09-01 00:00:00 +0000 UTC"}, + {in: "2nd September 2012", out: "2012-09-02 00:00:00 +0000 UTC"}, + {in: "3rd September 2012", out: "2012-09-03 00:00:00 +0000 UTC"}, + {in: "4th September 2012", out: "2012-09-04 00:00:00 +0000 UTC"}, + {in: "2nd January 2018", out: "2018-01-02 00:00:00 +0000 UTC"}, + {in: "3nd Feb 2018 13:58:24", out: "2018-02-03 13:58:24 +0000 UTC"}, // RFC1123 = "Mon, 02 Jan 2006 15:04:05 MST" {in: "Fri, 03 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03 Jul 2015 08:08:08 CET", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CET"}, From 301ffeee02c73f545b6e5d4078e9d5c6ef71506f Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 21:24:17 -0700 Subject: [PATCH 33/62] Add support for mon/dd/yyyy (Oct/31/1970) --- parseany.go | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ parseany_test.go | 12 ++++++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index 74ca494..582e58a 100644 --- a/parseany.go +++ b/parseany.go @@ -90,6 +90,9 @@ const ( dateAlphaWsAlpha dateAlphaWsAlphaYearmaybe // 34 dateAlphaPeriodWsDigit + dateAlphaSlash + dateAlphaSlashDigit + dateAlphaSlashDigitSlash dateWeekdayComma dateWeekdayAbbrevComma ) @@ -855,6 +858,14 @@ iterRunes: // // dateAlphaPeriodWsDigit // oct. 1, 1970 + // dateAlphaSlash + // dateAlphaSlashDigit + // dateAlphaSlashDigitSlash + // Oct/ 7/1970 + // Oct/07/1970 + // February/ 7/1970 + // February/07/1970 + // // dateWeekdayComma // Monday, 02 Jan 2006 15:04:05 MST // Monday, 02-Jan-06 15:04:05 MST @@ -937,6 +948,30 @@ iterRunes: } else { return p, unknownErr(datestr) } + case r == '/': + // X + // Oct/ 7/1970 + // Oct/07/1970 + // X + // February/ 7/1970 + // February/07/1970 + // Must be a valid short or long month + if i == 3 { + p.moi = 0 + p.molen = i - p.moi + p.set(p.moi, "Jan") + p.stateDate = dateAlphaSlash + } else { + possibleFullMonth := strings.ToLower(p.datestr[:i]) + if i > 3 && isMonthFull(possibleFullMonth) { + p.moi = 0 + p.molen = i - p.moi + p.fullMonth = possibleFullMonth + p.stateDate = dateAlphaSlash + } else { + return p, unknownErr(datestr) + } + } } case dateAlphaWs: @@ -1183,6 +1218,53 @@ iterRunes: default: return p, unknownErr(datestr) } + + case dateAlphaSlash: + // Oct/ 7/1970 + // February/07/1970 + switch { + case r == ' ': + // continue + case unicode.IsDigit(r): + p.stateDate = dateAlphaSlashDigit + p.dayi = i + default: + return p, unknownErr(datestr) + } + + case dateAlphaSlashDigit: + // dateAlphaSlash: + // dateAlphaSlashDigit: + // dateAlphaSlashDigitSlash: + // Oct/ 7/1970 + // Oct/07/1970 + // February/ 7/1970 + // February/07/1970 + switch { + case r == '/': + p.yeari = i + 1 + p.daylen = i - p.dayi + if !p.setDay() { + return p, unknownErr(datestr) + } + p.stateDate = dateAlphaSlashDigitSlash + case unicode.IsDigit(r): + // continue + default: + return p, unknownErr(datestr) + } + + case dateAlphaSlashDigitSlash: + switch { + case unicode.IsDigit(r): + // continue + case r == ' ': + p.stateTime = timeStart + break iterRunes + default: + return p, unknownErr(datestr) + } + case dateWeekdayComma: // Monday, 02 Jan 2006 15:04:05 MST // Monday, 02 Jan 2006 15:04:05 -0700 @@ -2044,6 +2126,11 @@ iterRunes: p.setEntireFormat([]byte("2006年01月02日 15:04:05")) return p, nil + case dateAlphaSlashDigitSlash: + // Oct/ 7/1970 + // February/07/1970 + return p, nil + case dateWeekdayComma: // Monday, 02 Jan 2006 15:04:05 -0700 // Monday, 02 Jan 2006 15:04:05 +0100 @@ -2381,6 +2468,9 @@ func isDay(alpha string) bool { return false } func isMonthFull(alpha string) bool { + if len(alpha) > len("september") { + return false + } for _, month := range months { if alpha == month { return true diff --git a/parseany_test.go b/parseany_test.go index 7940dd6..190e2c1 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -304,9 +304,19 @@ var testInputs = []dateTest{ // 112.195.209.90 - - [20/Feb/2018:12:12:14 +0800] "GET / HTTP/1.1" 200 190 "-" "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Mobile Safari/537.36" "-" {in: "06/May/2008:08:11:17 -0700", out: "2008-05-06 15:11:17 +0000 UTC"}, {in: "30/May/2008:08:11:17 -0700", out: "2008-05-30 15:11:17 +0000 UTC"}, - // dd/mon/yyy hh:mm:ss tz + // dd/mon/yyyy hh:mm:ss tz {in: "06/May/2008:08:11:17 -0700", out: "2008-05-06 15:11:17 +0000 UTC"}, {in: "30/May/2008:08:11:17 -0700", out: "2008-05-30 15:11:17 +0000 UTC"}, + // mon/dd/yyyy + {in: "Oct/ 7/1970", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "Oct/31/1970", out: "1970-10-31 00:00:00 +0000 UTC"}, + {in: "Oct/03/1970", out: "1970-10-03 00:00:00 +0000 UTC"}, + {in: "Oct/03/1970 22:33:44", out: "1970-10-03 22:33:44 +0000 UTC"}, + {in: "February/ 7/1970", out: "1970-02-07 00:00:00 +0000 UTC"}, + {in: "February/27/1970", out: "1970-02-27 00:00:00 +0000 UTC"}, + {in: "February/03/1970", out: "1970-02-03 00:00:00 +0000 UTC"}, + {in: "February/03/1970 22:33:44.555", out: "1970-02-03 22:33:44.555 +0000 UTC"}, + {in: "February/03/1970 11:33:44.555 PM PST", out: "1970-02-03 23:33:44.555 +0000 UTC", zname: "PST"}, // yyyy-mm-dd {in: "2014-04-02", out: "2014-04-02 00:00:00 +0000 UTC"}, {in: "2014-03-31", out: "2014-03-31 00:00:00 +0000 UTC"}, From 18938f16ae75df76755cb92fded6eaa826ea70fa Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 21:42:48 -0700 Subject: [PATCH 34/62] Implement support for yyyy mon dd (2013 May 02) Incorporate PR https://github.com/araddon/dateparse/pull/142 from https://github.com/dferstay to fix https://github.com/araddon/dateparse/issues/141 --- parseany.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++- parseany_test.go | 4 ++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index 582e58a..717c35a 100644 --- a/parseany.go +++ b/parseany.go @@ -95,6 +95,8 @@ const ( dateAlphaSlashDigitSlash dateWeekdayComma dateWeekdayAbbrevComma + dateYearWs + dateYearWsMonthWs ) const ( // Time state @@ -426,7 +428,15 @@ iterRunes: // 02 Jan 2018 23:59:34 // 12 Feb 2006, 19:17 // 12 Feb 2006, 19:17:22 - if i == 6 { + // 2013 Jan 06 15:04:05 + if i == 4 { + p.yearlen = i + p.moi = i + 1 + if !p.setYear() { + return p, unknownErr(datestr) + } + p.stateDate = dateYearWs + } else if i == 6 { p.stateDate = dateDigitSt } else { p.stateDate = dateDigitWs @@ -795,6 +805,45 @@ iterRunes: } break iterRunes } + + case dateYearWs: + // 2013 Jan 06 15:04:05 + // 2013 January 06 15:04:05 + if r == ' ' { + p.molen = i - p.moi + // Must be a valid short or long month + if p.molen == 3 { + p.set(p.moi, "Jan") + p.dayi = i + 1 + p.stateDate = dateYearWsMonthWs + } else { + possibleFullMonth := strings.ToLower(p.datestr[p.moi:(p.moi + p.molen)]) + if i > 3 && isMonthFull(possibleFullMonth) { + p.fullMonth = possibleFullMonth + p.dayi = i + 1 + p.stateDate = dateYearWsMonthWs + } else { + return p, unknownErr(datestr) + } + } + } + case dateYearWsMonthWs: + // 2013 Jan 06 15:04:05 + // 2013 January 06 15:04:05 + switch r { + case ',': + p.daylen = i - p.dayi + p.setDay() + i++ + p.stateTime = timeStart + break iterRunes + case ' ': + p.daylen = i - p.dayi + p.setDay() + p.stateTime = timeStart + break iterRunes + } + case dateDigitChineseYear: // dateDigitChineseYear // 2014年04月08日 @@ -2142,6 +2191,11 @@ iterRunes: // Mon, 02 Jan 2006 15:04:05 MST return p, nil + case dateYearWsMonthWs: + // 2013 May 02 11:37:55 + // 2013 December 02 11:37:55 + return p, nil + } return p, unknownErr(datestr) diff --git a/parseany_test.go b/parseany_test.go index 190e2c1..0a545f9 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -404,6 +404,10 @@ var testInputs = []dateTest{ {in: "2018-09-30 21:09:13.123 AMT", out: "2018-09-30 21:09:13.123 +0000 UTC", zname: "AMT"}, {in: "2018-09-30 08:09:13.123 am AMT", out: "2018-09-30 08:09:13.123 +0000 UTC", zname: "AMT"}, {in: "2018-09-30 08:09:13.123am AMT", out: "2018-09-30 08:09:13.123 +0000 UTC", zname: "AMT"}, + /// yyyy mmm dd https://github.com/araddon/dateparse/issues/141 + {in: "2013 May 02 11:37:55", out: "2013-05-02 11:37:55 +0000 UTC"}, + {in: "2013 June 02 11:37:55", out: "2013-06-02 11:37:55 +0000 UTC"}, + {in: "2013 December 02 11:37:55", out: "2013-12-02 11:37:55 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 From fc278d32da7d449723ff481dab38a09267a4de35 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 23:07:11 -0700 Subject: [PATCH 35/62] Incorporate support for dd-mm-yyyy (digit month) Incorporate PR https://github.com/araddon/dateparse/pull/140 from https://github.com/dferstay to fix https://github.com/araddon/dateparse/issues/139 This also fixes https://github.com/araddon/dateparse/issues/155 (duplicate of issue 139) PR is adapted to avoid duplicate code and validate format. --- parseany.go | 114 ++++++++++++++++++++++++++++++++--------------- parseany_test.go | 13 +++++- 2 files changed, 88 insertions(+), 39 deletions(-) diff --git a/parseany.go b/parseany.go index 717c35a..5fabaca 100644 --- a/parseany.go +++ b/parseany.go @@ -59,26 +59,28 @@ const ( dateYearDash dateYearDashAlphaDash dateYearDashDash - dateYearDashDashWs // 5 + dateYearDashDashWs // 6 dateYearDashDashT dateYearDashDashOffset dateDigitDash dateDigitDashAlpha - dateDigitDashAlphaDash // 10 + dateDigitDashAlphaDash // 11 + dateDigitDashDigit + dateDigitDashDigitDash dateDigitDot dateDigitDotDot dateDigitSlash dateDigitYearSlash - dateDigitSlashAlpha // 15 + dateDigitSlashAlpha // 18 dateDigitColon dateDigitChineseYear dateDigitChineseYearWs dateDigitWs - dateDigitWsMoYear // 20 + dateDigitWsMoYear // 23 dateAlpha dateAlphaWs dateAlphaWsDigit - dateAlphaWsDigitMore // 24 + dateAlphaWsDigitMore // 27 dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWsYear dateAlphaWsMonth @@ -88,7 +90,7 @@ const ( dateAlphaWsMore dateAlphaWsAtTime dateAlphaWsAlpha - dateAlphaWsAlphaYearmaybe // 34 + dateAlphaWsAlphaYearmaybe // 37 dateAlphaPeriodWsDigit dateAlphaSlash dateAlphaSlashDigit @@ -545,6 +547,9 @@ iterRunes: if unicode.IsLetter(r) { p.stateDate = dateDigitDashAlpha p.moi = i + } else if unicode.IsDigit(r) { + p.stateDate = dateDigitDashDigit + p.moi = i } else { return p, unknownErr(datestr) } @@ -560,10 +565,29 @@ iterRunes: p.stateDate = dateDigitDashAlphaDash } - case dateDigitDashAlphaDash: - // 13-Feb-03 ambiguous - // 28-Feb-03 ambiguous - // 29-Jun-2016 dd-month(alpha)-yyyy + case dateDigitDashDigit: + // 29-06-2026 + switch r { + case '-': + // X + // 29-06-2026 + p.molen = i - p.moi + if p.molen == 2 { + p.set(p.moi, "01") + p.yeari = i + 1 + p.stateDate = dateDigitDashDigitDash + } else { + return p, unknownErr(datestr) + } + } + + case dateDigitDashAlphaDash, dateDigitDashDigitDash: + // dateDigitDashAlphaDash: + // 13-Feb-03 ambiguous + // 28-Feb-03 ambiguous + // 29-Jun-2016 dd-month(alpha)-yyyy + // dateDigitDashDigitDash: + // 29-06-2026 switch r { case ' ': // we need to find if this was 4 digits, aka year @@ -581,8 +605,11 @@ iterRunes: } else if length == 2 { // We have no idea if this is // yy-mon-dd OR dd-mon-yy + // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) // - // We are going to ASSUME (bad, bad) that it is dd-mon-yy which is a horible assumption + // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), + // which is a horrible assumption, but seems to be the convention for + // dates that are formatted in this way. p.ambiguousMD = true p.yearlen = 2 p.set(p.yeari, "06") @@ -592,6 +619,8 @@ iterRunes: if !p.setDay() { return p, unknownErr(datestr) } + } else { + return p, unknownErr(datestr) } p.stateTime = timeStart break iterRunes @@ -2055,32 +2084,43 @@ iterRunes: case dateYearDashDashT: return p, nil - case dateDigitDashAlphaDash: - // 13-Feb-03 ambiguous - // 28-Feb-03 ambiguous - // 29-Jun-2016 - length := len(p.datestr) - (p.moi + p.molen + 1) - if length == 4 { - p.yearlen = 4 - p.set(p.yeari, "2006") - // We now also know that part1 was the day - p.dayi = 0 - p.daylen = p.part1Len - if !p.setDay() { - return p, unknownErr(datestr) - } - } else if length == 2 { - // We have no idea if this is - // yy-mon-dd OR dd-mon-yy - // - // We are going to ASSUME (bad, bad) that it is dd-mon-yy which is a horible assumption - p.ambiguousMD = true - p.yearlen = 2 - p.set(p.yeari, "06") - // We now also know that part1 was the day - p.dayi = 0 - p.daylen = p.part1Len - if !p.setDay() { + case dateDigitDashAlphaDash, dateDigitDashDigitDash: + // This has already been done if we parsed the time already + if p.stateTime == timeIgnore { + // dateDigitDashAlphaDash: + // 13-Feb-03 ambiguous + // 28-Feb-03 ambiguous + // 29-Jun-2016 + // dateDigitDashDigitDash: + // 29-06-2026 + length := len(p.datestr) - (p.moi + p.molen + 1) + if length == 4 { + p.yearlen = 4 + p.set(p.yeari, "2006") + // We now also know that part1 was the day + p.dayi = 0 + p.daylen = p.part1Len + if !p.setDay() { + return p, unknownErr(datestr) + } + } else if length == 2 { + // We have no idea if this is + // yy-mon-dd OR dd-mon-yy + // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) + // + // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), + // which is a horrible assumption, but seems to be the convention for + // dates that are formatted in this way. + p.ambiguousMD = true + p.yearlen = 2 + p.set(p.yeari, "06") + // We now also know that part1 was the day + p.dayi = 0 + p.daylen = p.part1Len + if !p.setDay() { + return p, unknownErr(datestr) + } + } else { return p, unknownErr(datestr) } } diff --git a/parseany_test.go b/parseany_test.go index 0a545f9..fc1938a 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -324,10 +324,20 @@ var testInputs = []dateTest{ // yyyy-mm-dd-07:00 {in: "2020-07-20+08:00", out: "2020-07-19 16:00:00 +0000 UTC"}, {in: "2020-07-20+0800", out: "2020-07-19 16:00:00 +0000 UTC"}, - // dd-mmm-yy + // dd-mmm-yy (alpha month) {in: "28-Feb-02", out: "2002-02-28 00:00:00 +0000 UTC"}, {in: "15-Jan-18", out: "2018-01-15 00:00:00 +0000 UTC"}, {in: "15-Jan-2017", out: "2017-01-15 00:00:00 +0000 UTC"}, + {in: "28-Feb-02 15:16:17", out: "2002-02-28 15:16:17 +0000 UTC"}, + {in: "15-Jan-18 15:16:17", out: "2018-01-15 15:16:17 +0000 UTC"}, + {in: "15-Jan-2017 15:16:17", out: "2017-01-15 15:16:17 +0000 UTC"}, + // dd-mm-yy (digit month - potentially ambiguous) - https://github.com/araddon/dateparse/issues/139 + {in: "28-02-02", out: "2002-02-28 00:00:00 +0000 UTC"}, + {in: "15-01-18", out: "2018-01-15 00:00:00 +0000 UTC"}, + {in: "15-01-2017", out: "2017-01-15 00:00:00 +0000 UTC"}, + {in: "28-02-02 15:16:17", out: "2002-02-28 15:16:17 +0000 UTC"}, + {in: "15-01-18 15:16:17", out: "2018-01-15 15:16:17 +0000 UTC"}, + {in: "15-01-2017 15:16:17", out: "2017-01-15 15:16:17 +0000 UTC"}, // yyyy-mm {in: "2014-04", out: "2014-04-01 00:00:00 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss AM @@ -733,7 +743,6 @@ var testParseErrors = []dateTest{ {in: "oct.-7-1970", err: true}, {in: "septe. 7, 1970", err: true}, {in: "SeptemberRR 7th, 1970", err: true}, - {in: "29-06-2016", err: true}, // this is just testing the empty space up front {in: " 2018-01-02 17:08:09 -07:00", err: true}, // a semantic version number should not be interpreted as a date From df9ae2e32a78ca70fd81941021477382ad6ec27c Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 23:19:35 -0700 Subject: [PATCH 36/62] Incorporate support for yyyymmddhhmmss.SSS Incorporate PR https://github.com/araddon/dateparse/pull/144 from https://github.com/dferstay to fix https://github.com/araddon/dateparse/issues/143 --- parseany.go | 19 +++++++++++++------ parseany_test.go | 2 ++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/parseany.go b/parseany.go index 5fabaca..ab38784 100644 --- a/parseany.go +++ b/parseany.go @@ -400,7 +400,7 @@ iterRunes: if !p.setYear() { return p, unknownErr(datestr) } - } else { + } else if i <= 2 { p.ambiguousMD = true if p.preferMonthFirst { if p.molen == 0 { @@ -421,6 +421,8 @@ iterRunes: } } } + // else this might be a unixy combined datetime of the form: + // yyyyMMddhhmmss.SSS case ' ': // 18 January 2018 @@ -2128,12 +2130,17 @@ iterRunes: return p, nil case dateDigitDot: - // 2014.05 - p.molen = i - p.moi - if !p.setMonth() { - return p, unknownErr(datestr) + if len(datestr) == len("yyyyMMddhhmmss.SSS") { // 18 + p.setEntireFormat([]byte("20060102150405.000")) + return p, nil + } else { + // 2014.05 + p.molen = i - p.moi + if !p.setMonth() { + return p, unknownErr(datestr) + } + return p, nil } - return p, nil case dateDigitDotDot: // 03.31.1981 diff --git a/parseany_test.go b/parseany_test.go index fc1938a..d78ef05 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -418,6 +418,8 @@ var testInputs = []dateTest{ {in: "2013 May 02 11:37:55", out: "2013-05-02 11:37:55 +0000 UTC"}, {in: "2013 June 02 11:37:55", out: "2013-06-02 11:37:55 +0000 UTC"}, {in: "2013 December 02 11:37:55", out: "2013-12-02 11:37:55 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/143 + {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 From 8f0059d6da414dd3f1a364e452b44dbda4d74b99 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 12 Dec 2023 23:40:07 -0700 Subject: [PATCH 37/62] Add tests to verify ambiguous cases Test cases now validates the following is true: * Fixed https://github.com/araddon/dateparse/issues/91 * Fixed https://github.com/araddon/dateparse/issues/28 (previous commits already addresses these issues, these tests ensure that these issues remain fixed) --- parseany_test.go | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/parseany_test.go b/parseany_test.go index d78ef05..229efda 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -19,6 +19,7 @@ type dateTest struct { err bool preferDayFirst bool retryAmbiguous bool + expectAmbiguous bool } var testInputs = []dateTest{ @@ -847,21 +848,30 @@ func TestParseLayout(t *testing.T) { var testParseStrict = []dateTest{ // dd-mon-yy 13-Feb-03 - {in: "03-03-14", err: true}, + {in: "03-03-14", err: true, expectAmbiguous: true}, // mm.dd.yyyy - {in: "3.3.2014", err: true}, + {in: "3.3.2014", err: true, expectAmbiguous: true}, // mm.dd.yy - {in: "08.09.71", err: true}, + {in: "08.09.71", err: true, expectAmbiguous: true}, // mm/dd/yyyy - {in: "3/5/2014", err: true}, + {in: "3/5/2014", err: true, expectAmbiguous: true}, // mm/dd/yy - {in: "08/08/71", err: true}, - {in: "8/8/71", err: true}, + {in: "08/08/71", err: true, expectAmbiguous: true}, + {in: "8/8/71", err: true, expectAmbiguous: true}, // mm/dd/yy hh:mm:ss - {in: "04/02/2014 04:08:09", err: true}, - {in: "4/2/2014 04:08:09", err: true}, + {in: "04/02/2014 04:08:09", err: true, expectAmbiguous: true}, + {in: "4/2/2014 04:08:09", err: true, expectAmbiguous: true}, {in: `{"hello"}`, err: true}, {in: "2009-08-12T22:15Z"}, + // https://github.com/araddon/dateparse/issues/91 + {in: "3.31.2014", err: true, expectAmbiguous: true}, + {in: "3.3.2014", err: true, expectAmbiguous: true}, + {in: "03.31.2014", err: true, expectAmbiguous: true}, + {in: "08.21.71", err: true, expectAmbiguous: true}, + {in: "3/31/2014", err: true, expectAmbiguous: true}, + {in: "3/3/2014", err: true, expectAmbiguous: true}, + {in: "03/31/2014", err: true, expectAmbiguous: true}, + {in: "08/21/71", err: true, expectAmbiguous: true}, } func TestParseStrict(t *testing.T) { @@ -871,6 +881,9 @@ func TestParseStrict(t *testing.T) { _, err := ParseStrict(th.in) if th.err { assert.NotEqual(t, nil, err) + if th.expectAmbiguous { + assert.Contains(t, err.Error(), ErrAmbiguousMMDD.Error(), "expected ambiguous") + } } else { assert.Equal(t, nil, err) } From 2b3f700718fb8492590ddc035a875d3f7d4cba60 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Wed, 13 Dec 2023 23:58:04 -0700 Subject: [PATCH 38/62] Handle format "date time (MST)" Was unable to handle standalone timezone in parentheses before. Also update tests to indicate expected timezone name for all tests that are parsed in a specific location. With updated logic/fixes, add tests to verify: * Fix https://github.com/araddon/dateparse/issues/71 * Fix https://github.com/araddon/dateparse/issues/72 --- parseany.go | 30 ++++++++++++++++++++------- parseany_test.go | 54 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/parseany.go b/parseany.go index ab38784..ee3a422 100644 --- a/parseany.go +++ b/parseany.go @@ -106,18 +106,19 @@ const ( timeStart timeWs timeWsAlpha + timeWsAlphaRParen timeWsAlphaWs - timeWsAlphaZoneOffset // 5 + timeWsAlphaZoneOffset // 6 timeWsAlphaZoneOffsetWs timeWsAlphaZoneOffsetWsYear timeWsAlphaZoneOffsetWsExtra timeWsAMPMMaybe - timeWsAMPM // 10 + timeWsAMPM // 11 timeWsOffset - timeWsOffsetWs // 12 + timeWsOffsetWs // 13 timeWsOffsetColonAlpha timeWsOffsetColon - timeWsYear // 15 + timeWsYear // 16 timeOffset timeOffsetColon timeOffsetColonAlpha @@ -1615,6 +1616,7 @@ iterRunes: case timeWsAlpha: // 06:20:00 UTC // 06:20:00 UTC-05 + // 06:20:00 (EST) // timeWsAlphaWs // 17:57:51 MST 2009 // timeWsAlphaZoneOffset @@ -1638,17 +1640,28 @@ iterRunes: } p.stateTime = timeWsAlphaZoneOffset p.offseti = i - case ' ': + case ' ', ')': // 17:57:51 MST 2009 // 17:57:51 MST + // 06:20:00 (EST) p.tzlen = i - p.tzi if p.tzlen == 4 { p.set(p.tzi, " MST") } else if p.tzlen == 3 { p.set(p.tzi, "MST") } - p.stateTime = timeWsAlphaWs - p.yeari = i + 1 + if r == ' ' { + p.stateTime = timeWsAlphaWs + p.yeari = i + 1 + } else { + // 06:20:00 (EST) + // This must be the end of the datetime or the format is unknown + if i+1 == len(p.datestr) { + p.stateTime = timeWsAlphaRParen + } else { + return p, unknownErr(datestr) + } + } } case timeWsAlphaWs: // 17:57:51 MST 2009 @@ -1923,6 +1936,9 @@ iterRunes: return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) } + case timeWsAlphaRParen: + // continue + case timeWsAlphaWs: p.yearlen = i - p.yeari if !p.setYear() { diff --git a/parseany_test.go b/parseany_test.go index 229efda..dbfe149 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -60,7 +60,7 @@ var testInputs = []dateTest{ {in: "Thu May 8 17:57:51 PST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "PST"}, {in: "Thu May 08 17:57:51 PST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "PST"}, {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "CEST"}, - {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 15:57:51 +0000 UTC", loc: "Europe/Berlin"}, + {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 15:57:51 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "Thu May 08 05:05:07 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, {in: "Thu May 08 5:5:7 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, // Day Month dd time @@ -145,7 +145,7 @@ var testInputs = []dateTest{ // RFC1123 = "Mon, 02 Jan 2006 15:04:05 MST" {in: "Fri, 03 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03 Jul 2015 08:08:08 CET", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CET"}, - {in: "Fri, 03 Jul 2015 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles"}, + {in: "Fri, 03 Jul 2015 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles", zname: "PDT"}, {in: "Fri, 03 Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, {in: "Fri, 03 Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 3 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, @@ -163,11 +163,11 @@ var testInputs = []dateTest{ // {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC"}, {in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC"}, - {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", loc: "Europe/Berlin"}, + {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, // day, dd-Mon-yy hh:mm:zz TZ {in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, - {in: "Fri, 03-Jul-15 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles"}, + {in: "Fri, 03-Jul-15 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles", zname: "PDT"}, {in: "Fri, 03-Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, {in: "Fri, 03-Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 3-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, @@ -182,7 +182,7 @@ var testInputs = []dateTest{ {in: "Wednesday, 07-May-09 08:00:43 MST", out: "2009-05-07 08:00:43 +0000 UTC", zname: "MST"}, {in: "Wednesday, 07-May-09 08:00:43 CEST", out: "2009-05-07 08:00:43 +0000 UTC", zname: "CEST"}, {in: "Wednesday, 28-Feb-18 09:01:00 MST", out: "2018-02-28 09:01:00 +0000 UTC", zname: "MST"}, - {in: "Wednesday, 28-Feb-18 09:01:00 MST", out: "2018-02-28 16:01:00 +0000 UTC", loc: "America/Denver"}, + {in: "Wednesday, 28-Feb-18 09:01:00 MST", out: "2018-02-28 16:01:00 +0000 UTC", loc: "America/Denver", zname: "MST"}, {in: "Wednesday, 28-Feb-18 09:01:00 CEST", out: "2018-02-28 09:01:00 +0000 UTC", zname: "CEST"}, // with offset then with variations on non-zero filled stuff {in: "Monday, 02 Jan 2006 15:04:05 +0100", out: "2006-01-02 14:04:05 +0000 UTC"}, @@ -421,6 +421,11 @@ var testInputs = []dateTest{ {in: "2013 December 02 11:37:55", out: "2013-12-02 11:37:55 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/143 {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/71 and https://github.com/araddon/dateparse/issues/72 + {in: "2017-12-31T16:00:00Z", out: "2017-12-31 16:00:00 +0000 UTC", loc: "America/Denver", zname: "UTC"}, + {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 05:02:00 +0000 UTC", zname: "EST"}, + {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 05:02:00 +0000 UTC", loc: "US/Pacific", zname: "EST"}, + {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 10:02:00 +0000 UTC", loc: "America/New_York", zname: "EDT"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 @@ -458,10 +463,10 @@ var testInputs = []dateTest{ // yyyy-mm-dd hh:mm:ss +0000 TZ // Golang Native Format {in: "2012-08-03 18:31:59 +0000 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, - {in: "2012-08-03 13:31:59 -0600 MST", out: "2012-08-03 19:31:59 +0000 UTC", loc: "America/Denver"}, + {in: "2012-08-03 13:31:59 -0600 MST", out: "2012-08-03 19:31:59 +0000 UTC", loc: "America/Denver", zname: "MST"}, {in: "2015-02-18 00:12:00 +0000 UTC", out: "2015-02-18 00:12:00 +0000 UTC", zname: "UTC"}, {in: "2015-02-18 00:12:00 +0000 GMT", out: "2015-02-18 00:12:00 +0000 UTC", zname: "GMT"}, - {in: "2015-02-08 03:02:00 +0200 CEST", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin"}, + {in: "2015-02-08 03:02:00 +0200 CEST", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "2015-02-08 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, {in: "2015-2-08 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, {in: "2015-02-8 03:02:00 +0300 MSK", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, @@ -473,14 +478,16 @@ var testInputs = []dateTest{ {in: "2014-04-26 17:24:37.123456 +0000 UTC", out: "2014-04-26 17:24:37.123456 +0000 UTC", zname: "UTC"}, {in: "2014-04-26 17:24:37.12 +0000 UTC", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "UTC"}, {in: "2014-04-26 17:24:37.1 +0000 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "UTC"}, - {in: "2015-02-08 03:02:00 +0200 CEST m=+0.000000001", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin"}, + {in: "2015-02-08 03:02:00 +0200 CEST m=+0.000000001", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "2015-02-08 03:02:00 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, {in: "2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00.001 +0000 UTC", zname: "MSK"}, // yyyy-mm-dd hh:mm:ss TZ {in: "2012-08-03 18:31:59 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 18:31:59 CEST", out: "2012-08-03 18:31:59 +0000 UTC", zname: "CEST"}, {in: "2014-12-16 06:20:00 GMT", out: "2014-12-16 06:20:00 +0000 UTC", zname: "GMT"}, - {in: "2012-08-03 13:31:59 MST", out: "2012-08-03 20:31:59 +0000 UTC", loc: "America/Denver"}, + {in: "2012-08-03 13:31:58 MST", out: "2012-08-03 13:31:58 +0000 UTC", zname: "MST"}, + {in: "2012-08-03 13:31:59 MST", out: "2012-08-03 20:31:59 +0000 UTC", loc: "America/Denver", zname: "MDT"}, + {in: "2012-01-03 13:31:59 MST", out: "2012-01-03 20:31:59 +0000 UTC", loc: "America/Denver", zname: "MST"}, {in: "2012-08-03 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 8:1:59.257000000 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "UTC"}, {in: "2012-8-03 18:31:59.257000000 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, @@ -493,9 +500,15 @@ var testInputs = []dateTest{ {in: "2014-04-26 17:24:37.12 CEST", out: "2014-04-26 17:24:37.12 +0000 UTC", zname: "CEST"}, {in: "2014-04-26 17:24:37.1 UTC", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "UTC"}, {in: "2014-04-26 17:24:37.1 CEST", out: "2014-04-26 17:24:37.1 +0000 UTC", zname: "CEST"}, + // Test the capturing of arbitrary time zone names even if we use a different specific location (offset will be zero, but name will be filled in) + {in: "2012-08-03 19:32:59 UTC", out: "2012-08-03 19:32:59 +0000 UTC", loc: "Europe/Berlin", zname: "UTC"}, + {in: "2012-08-03 19:32:59 CEST", out: "2012-08-03 19:32:59 +0000 UTC", loc: "America/Denver", zname: "CEST"}, + {in: "2014-12-16 07:22:00 GMT", out: "2014-12-16 07:22:00 +0000 UTC", loc: "America/Los_Angeles", zname: "GMT"}, + {in: "2012-08-03 14:32:59 MST", out: "2012-08-03 14:32:59 +0000 UTC", loc: "America/Los_Angeles", zname: "MST"}, // This one is pretty special, it is TIMEZONE based but starts with P to emulate collions with PM {in: "2014-04-26 05:24:37 PST", out: "2014-04-26 05:24:37 +0000 UTC", zname: "PST"}, - {in: "2014-04-26 05:24:37 PST", out: "2014-04-26 13:24:37 +0000 UTC", loc: "America/Los_Angeles"}, + {in: "2014-04-26 05:24:38 PST", out: "2014-04-26 13:24:38 +0000 UTC", loc: "America/Los_Angeles", zname: "PDT"}, + {in: "2014-01-26 05:24:39 PST", out: "2014-01-26 13:24:39 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"}, // yyyy-mm-dd hh:mm:ss+00:00 {in: "2012-08-03 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, {in: "2017-07-19 03:21:51+00:00", out: "2017-07-19 03:21:51 +0000 UTC"}, @@ -504,15 +517,16 @@ var testInputs = []dateTest{ // dd:mm:yyyy hh:mm:ss+00:00 {in: "08:03:2012 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss.000+00:00 PST - {in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles"}, - {in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin"}, + {in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"}, + {in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, // yyyy-mm-dd hh:mm:ss +00:00 TZ {in: "2012-08-03 18:31:59 +00:00 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, - {in: "2012-08-03 13:31:51 -07:00 MST", out: "2012-08-03 20:31:51 +0000 UTC", loc: "America/Denver"}, - {in: "2012-08-03 13:31:51 +02:00 CEST", out: "2012-08-03 11:31:51 +0000 UTC", loc: "Europe/Berlin"}, + {in: "2012-08-03 13:31:51 -07:00 MST", out: "2012-08-03 20:31:51 +0000 UTC", loc: "America/Denver", zname: "MST"}, + {in: "2012-08-03 13:31:51 +02:00 CEST", out: "2012-08-03 11:31:51 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "2012-08-03 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, - {in: "2012-08-03 13:31:51.123 -08:00 PST", out: "2012-08-03 21:31:51.123 +0000 UTC", loc: "America/Los_Angeles"}, - {in: "2012-08-03 13:31:51.123 +02:00 CEST", out: "2012-08-03 11:31:51.123 +0000 UTC", loc: "Europe/Berlin"}, + {in: "2012-08-03 13:31:51.123 -08:00 PST", out: "2012-08-03 21:31:51.123 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"}, + {in: "2012-08-03 13:31:51.123 +02:00 CEST", out: "2012-08-03 11:31:51.123 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, + {in: "2012-08-03 13:31:51.123 +02:00 CEST", out: "2012-08-03 11:31:51.123 +0000 UTC", loc: "America/Los_Angeles", zname: "CEST"}, {in: "2012-08-03 8:1:59.257000000 +00:00 UTC", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 8:1:59.257000000 +00:00 CEST", out: "2012-08-03 08:01:59.257 +0000 UTC", zname: "CEST"}, {in: "2012-8-03 18:31:59.257000000 +00:00 UTC", out: "2012-08-03 18:31:59.257 +0000 UTC", zname: "UTC"}, @@ -613,8 +627,8 @@ var testInputs = []dateTest{ {in: "190910 11:51:49", out: "2019-09-10 11:51:49 +0000 UTC"}, // all digits: unix secs, ms etc - {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC"}, - {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", loc: "America/Denver"}, + {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", zname: "UTC"}, + {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", loc: "America/Denver", zname: "MDT"}, {in: "1384216367111", out: "2013-11-12 00:32:47.111 +0000 UTC"}, {in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC"}, {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC"}, @@ -1040,3 +1054,7 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { assert.Equal(t, nil, err) assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } + +func TestDebug(t *testing.T) { + MustParse("Jul 9, 2012 at 5:02am (EST)") +} From d05b099ca64f4555c5abd252b074e938329af6af Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Thu, 14 Dec 2023 00:00:36 -0700 Subject: [PATCH 39/62] Add better timezone explanation to README.md How golang parses date strings with respect to time zones and locations can be really confusing. Document the key points that need to be understood to properly interpret the results of parsing arbitrary date strings, which may or may not have explicit time zone name or offset information include the parsed date string. --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fe682dd..a38646d 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,7 @@ Parse many date strings without knowing format in advance. Uses a scanner to re [![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse) [![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse) -**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. - -**Timezones** The location your server is configured affects the results! See example or https://play.golang.org/p/IDHRalIyXh and last paragraph here https://golang.org/pkg/time/#Parse. - +**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. ```go @@ -29,6 +26,20 @@ layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM") ``` +Timezone Considerations +---------------------------------- + +**Timezones** The location your server is configured affects the results! See example or https://play.golang.org/p/IDHRalIyXh and last paragraph here https://golang.org/pkg/time/#Parse. + +Important points to understand: +* If you are parsing a date string that does *not* reference a timezone, if you use `Parse` it will assume UTC, or for `ParseIn` it will use the specified location. +* If you are parsing a date string that *does* reference a timezone and *does* specify an explicit offset (e.g., `2012-08-03 13:31:59 -0600 MST`), then it will return a time object with a location that represents a fixed timezone that has the given offset and name (it will not validate that the timezone abbreviation specified in the date string is a potential valid match for the given offset). + * This can lead to some potentially unexpected results, for example consider the date string `2012-08-03 18:31:59.000+00:00 PST` -- this string has an explicit offset of `+00:00` (UTC), and so the returned time will have a location with a zero offset (18:31:59.000 UTC) even though the name of the fixed time zone associated with the returned time is `PST`. Essentially, it will always prioritize an explicit offset as accurate over an explicit +* If you are parsing a date string that *does* reference a timezone but *without* an explicit offset (e.g., `2012-08-03 14:32:59 MST`), then it will only recognize and map the timezone name and add an offset if you are using `ParseIn` and specify a location that knows about the given time zone abbreviation (e.g., in this example, you would need to pass the `America/Denver` location and it will recognize the `MST` and `MDT` time zone names) + * If a time zone abbreviation is recognized based on the passed location, then it will use the appropriate offset, and make any appropriate adjustment for daylight saving time (e.g., in the above example, the parsed time would actually contain a zone name of `MDT` because the date is within the range when daylight savings time is active). + * If a time zone abbreviation is *not* recognized for the passed location, then it will create a fake time zone with a *zero* offset but with the specified name. This requires further processing if you are trying to actually get the correct absolute time in the UTC time zone. + * If you receive a parsed time that has a zero offset but a non-UTC timezone name, then you should use a method to map the (sometimes ambiguous) timezone name (e.g., `"EEG"`) into a location name (e.g., `"Africa/Cairo"` or `"Europe/Bucharest"`), and then reconstruct a new time object with the same date/time/nanosecond but with the properly mapped location. (Do not use the `time.In` method to convert it to the new location, as this will treat the original time as if it was in UTC with a zero offset -- you need to reconstruct the time as if it was constructed with the proper location in the first place.) + cli tool for testing dateformats ---------------------------------- From 14fb9398e4a3ed137e6135c142b4db6cbdce8422 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Thu, 14 Dec 2023 22:57:42 -0700 Subject: [PATCH 40/62] Fix parsing for format (time) UTC[+-]NNNN Fixes https://github.com/araddon/dateparse/issues/158 --- parseany.go | 7 ++++++- parseany_test.go | 18 +++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/parseany.go b/parseany.go index ee3a422..0e633e4 100644 --- a/parseany.go +++ b/parseany.go @@ -1627,7 +1627,12 @@ iterRunes: // 15:44:11 UTC+0100 2015 switch r { case '+', '-': - if p.datestr[p.tzi:i] == "GMT" { + tzNameLower := strings.ToLower(p.datestr[p.tzi:i]) + if tzNameLower == "gmt" || tzNameLower == "utc" { + // This is a special form where the actual timezone isn't UTC, but is rather + // specifying that the correct offset is a specified numeric offset from UTC: + // 06:20:00 UTC-05 + // 06:20:00 GMT+02 p.tzi = 0 p.tzlen = 0 } else { diff --git a/parseany_test.go b/parseany_test.go index dbfe149..41cb6d8 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -72,6 +72,8 @@ var testInputs = []dateTest{ // ?? {in: "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", out: "2015-07-03 17:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 GMT+0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, + {in: "Fri Jul 03 2015 18:04:07 UTC+0100 (GMT Daylight Time)", out: "2015-07-03 17:04:07 +0000 UTC"}, + {in: "Fri Jul 3 2015 06:04:07 UTC+0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "PST"}, {in: "Fri Jul 3 2015 06:04:07 CEST-0700 (Central European Summer Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "CEST"}, // Month dd, yyyy at time @@ -87,7 +89,7 @@ var testInputs = []dateTest{ {in: "OCTober 17, 2012 at 18:17:16", out: "2012-10-17 18:17:16 +0000 UTC"}, {in: "noVEMBER 17, 2012 at 18:17:16", out: "2012-11-17 18:17:16 +0000 UTC"}, {in: "December 17, 2012 at 18:17:16", out: "2012-12-17 18:17:16 +0000 UTC"}, - {in: "September 17, 2012 at 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17, 2012 at 5:00pm UTC-05", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, // empty zone name, special case of UTC+NNNN {in: "September 17, 2012 at 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "September 17, 2012 at 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17, 2012, 10:10:09", out: "2012-09-17 10:10:09 +0000 UTC"}, @@ -95,16 +97,16 @@ var testInputs = []dateTest{ {in: "May 17, 2012 AT 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "May 17, 2012 AT 10:09am CEST+02", out: "2012-05-17 08:09:00 +0000 UTC", zname: "CEST"}, // Month dd, yyyy time - {in: "September 17, 2012 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17, 2012 5:00pm UTC-05", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, {in: "September 17, 2012 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "September 17, 2012 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17, 2012 09:01:00", out: "2012-09-17 09:01:00 +0000 UTC"}, // Month dd yyyy time - {in: "September 17 2012 5:00pm UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, - {in: "September 17 2012 5:00pm UTC-0500", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17 2012 5:00pm UTC-05", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, + {in: "September 17 2012 5:00pm UTC-0500", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, {in: "September 17 2012 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "September 17 2012 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, - {in: "September 17 2012 5:00PM UTC-05", out: "2012-09-17 17:00:00 +0000 UTC", zname: "UTC"}, + {in: "September 17 2012 5:00PM UTC-05", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, {in: "September 17 2012 10:09AM PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "September 17 2012 10:09AM CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17 2012 09:01:00", out: "2012-09-17 09:01:00 +0000 UTC"}, @@ -178,6 +180,7 @@ var testInputs = []dateTest{ {in: "Fri, 03-Jul-15 8:8:8 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, // day, dd-Mon-yy hh:mm:zz TZ (text) https://github.com/araddon/dateparse/issues/116 {in: "Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)", out: "2021-01-02 16:12:23 +0000 UTC"}, + {in: "Sun, 3 Jan 2021 00:12:23 +0800 (UTC+08:00)", out: "2021-01-02 16:12:23 +0000 UTC"}, // RFC850 = "Monday, 02-Jan-06 15:04:05 MST" {in: "Wednesday, 07-May-09 08:00:43 MST", out: "2009-05-07 08:00:43 +0000 UTC", zname: "MST"}, {in: "Wednesday, 07-May-09 08:00:43 CEST", out: "2009-05-07 08:00:43 +0000 UTC", zname: "CEST"}, @@ -378,6 +381,8 @@ var testInputs = []dateTest{ // https://github.com/araddon/dateparse/issues/157 {in: "Thu Jan 28 2021 15:28:21 GMT+0000 (Coordinated Universal Time)", out: "2021-01-28 15:28:21 +0000 UTC"}, {in: "Thu Jan 28 2021 15:28:21 GMT+0100 (Coordinated Universal Time)", out: "2021-01-28 14:28:21 +0000 UTC"}, + {in: "Thu Jan 28 2021 15:28:21 UTC+0000 (Coordinated Universal Time)", out: "2021-01-28 15:28:21 +0000 UTC"}, + {in: "Thu Jan 28 2021 15:28:21 UTC+0100 (Coordinated Universal Time)", out: "2021-01-28 14:28:21 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/130 {in: "1985-04-12T23:20:50Z", out: "1985-04-12 23:20:50 +0000 UTC"}, {in: "1985-04-12T23:20:50.52Z", out: "1985-04-12 23:20:50.52 +0000 UTC"}, @@ -635,7 +640,9 @@ var testInputs = []dateTest{ {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, {in: "FRI, 16 AUG 2013 9:39:51 +1000", out: "2013-08-15 23:39:51 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/158 {in: "Mon, 1 Dec 2008 14:48:22 GMT-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, + {in: "Mon, 1 Dec 2008 14:48:22 UTC-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, } func TestParse(t *testing.T) { @@ -1055,6 +1062,7 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } +// Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { MustParse("Jul 9, 2012 at 5:02am (EST)") } From 23869f345e7d53d57deb4b1c2fb3862ba80c602e Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Thu, 14 Dec 2023 23:14:26 -0700 Subject: [PATCH 41/62] Add support for mm/dd/yyyy, hh:mm:ss Incorporate PR https://github.com/araddon/dateparse/pull/156 from https://github.com/BrianLeishman and adapt to also validate the format --- parseany.go | 11 ++++++++--- parseany_test.go | 8 ++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/parseany.go b/parseany.go index 0e633e4..5c6abb1 100644 --- a/parseany.go +++ b/parseany.go @@ -254,14 +254,14 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par if p != nil && p.ambiguousMD { // if it errors out with the following error, swap before we // get out of this function to reduce scope it needs to be applied on - _, err := p.parse() + _, err = p.parse() if err != nil && strings.Contains(err.Error(), "month out of range") { // create the option to reverse the preference preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) // turn off the retry to avoid endless recursion retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap) - p, _ = parseTime(datestr, time.Local, modifiedOpts...) + p, err = parseTime(datestr, time.Local, modifiedOpts...) } } @@ -684,6 +684,7 @@ iterRunes: case dateDigitSlash: // 03/19/2012 10:11:59 // 04/2/2014 03:00:37 + // 04/2/2014, 03:00:37 // 3/1/2012 10:11:59 // 4/8/2014 22:05 // 3/1/2014 @@ -713,10 +714,14 @@ iterRunes: } // Note no break, we are going to pass by and re-enter this dateDigitSlash // and look for ending (space) or not (just date) - case ' ': + case ' ', ',': p.stateTime = timeStart if p.yearlen == 0 { p.yearlen = i - p.yeari + if r == ',' { + // skip the comma + i++ + } if !p.setYear() { return p, unknownErr(datestr) } diff --git a/parseany_test.go b/parseany_test.go index 41cb6d8..dccdca2 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -9,8 +9,7 @@ import ( ) func TestOne(t *testing.T) { - time.Local = time.UTC - var ts = MustParse("2020-07-20+08:00") + ts := MustParse("2020-07-20+08:00") assert.Equal(t, "2020-07-19 16:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } @@ -431,6 +430,11 @@ var testInputs = []dateTest{ {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 05:02:00 +0000 UTC", zname: "EST"}, {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 05:02:00 +0000 UTC", loc: "US/Pacific", zname: "EST"}, {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 10:02:00 +0000 UTC", loc: "America/New_York", zname: "EDT"}, + // https://github.com/araddon/dateparse/pull/156 + {in: "04/02/2014, 04:08:09", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "4/2/2014, 04:08:09", out: "2014-04-02 04:08:09 +0000 UTC"}, + {in: "04/02/2014, 04:08 AM", out: "2014-04-02 04:08:00 +0000 UTC"}, + {in: "04/02/2014, 04:08 PM", out: "2014-04-02 16:08:00 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 From cc63421875f601bc3f15abc2e64e65f912157394 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Thu, 14 Dec 2023 23:47:31 -0700 Subject: [PATCH 42/62] Support times after yyyy.mm.dd dates Fix for this bug mentioned in https://github.com/araddon/dateparse/pull/134 Also, the other cases mentioned in this PR are not valid formats, so add them to the TestParseErrors test, to document that this is expected. --- parseany.go | 74 ++++++++++++++++++++++++++++++++++++++++++++---- parseany_test.go | 14 +++++++++ 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/parseany.go b/parseany.go index 5c6abb1..022afae 100644 --- a/parseany.go +++ b/parseany.go @@ -69,18 +69,21 @@ const ( dateDigitDashDigitDash dateDigitDot dateDigitDotDot + dateDigitDotDotWs + dateDigitDotDotT + dateDigitDotDotOffset dateDigitSlash dateDigitYearSlash - dateDigitSlashAlpha // 18 + dateDigitSlashAlpha // 21 dateDigitColon dateDigitChineseYear dateDigitChineseYearWs dateDigitWs - dateDigitWsMoYear // 23 + dateDigitWsMoYear // 26 dateAlpha dateAlphaWs dateAlphaWsDigit - dateAlphaWsDigitMore // 27 + dateAlphaWsDigitMore // 30 dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWsYear dateAlphaWsMonth @@ -90,7 +93,7 @@ const ( dateAlphaWsMore dateAlphaWsAtTime dateAlphaWsAlpha - dateAlphaWsAlphaYearmaybe // 37 + dateAlphaWsAlphaYearmaybe // 40 dateAlphaPeriodWsDigit dateAlphaSlash dateAlphaSlashDigit @@ -924,8 +927,52 @@ iterRunes: p.stateDate = dateDigitDotDot } } + case dateDigitDotDot: - // iterate all the way through + // dateYearDashDashT + // 2006.01.02T15:04:05Z07:00 + // dateYearDashDashWs + // 2013.04.01 22:43:22 + // dateYearDashDashOffset + // 2020.07.20+00:00 + switch r { + case '+', '-': + p.offseti = i + p.daylen = i - p.dayi + p.stateDate = dateDigitDotDotOffset + if !p.setDay() { + return p, unknownErr(datestr) + } + case ' ': + p.daylen = i - p.dayi + p.stateDate = dateDigitDotDotWs + p.stateTime = timeStart + if !p.setDay() { + return p, unknownErr(datestr) + } + break iterRunes + case 'T': + p.daylen = i - p.dayi + p.stateDate = dateDigitDotDotT + p.stateTime = timeStart + if !p.setDay() { + return p, unknownErr(datestr) + } + break iterRunes + } + + case dateDigitDotDotT: + // dateYearDashDashT + // 2006-01-02T15:04:05Z07:00 + // 2020-08-17T17:00:00:000+0100 + + case dateDigitDotDotOffset: + // 2020-07-20+00:00 + switch r { + case ':': + p.set(p.offseti, "-07:00") + } + case dateAlpha: // dateAlphaWS // Mon Jan _2 15:04:05 2006 @@ -2177,6 +2224,23 @@ iterRunes: // 2018.09.30 return p, nil + case dateDigitDotDotWs: + // 2013.04.01 + return p, nil + + case dateDigitDotDotT: + return p, nil + + case dateDigitDotDotOffset: + // 2020.07.20+00:00 + switch len(p.datestr) - p.offseti { + case 5: + p.set(p.offseti, "-0700") + case 6: + p.set(p.offseti, "-07:00") + } + return p, nil + case dateDigitWsMoYear: // 2 Jan 2018 // 2 Jan 18 diff --git a/parseany_test.go b/parseany_test.go index dccdca2..bf17542 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -647,6 +647,16 @@ var testInputs = []dateTest{ // https://github.com/araddon/dateparse/issues/158 {in: "Mon, 1 Dec 2008 14:48:22 GMT-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, {in: "Mon, 1 Dec 2008 14:48:22 UTC-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, + + // Fixes for bugs mentioned in https://github.com/araddon/dateparse/pull/134 + {in: "2014.02.13", out: "2014-02-13 00:00:00 +0000 UTC"}, + {in: "2014-02-13 00:00:00", out: "2014-02-13 00:00:00 +0000 UTC"}, + {in: "2014.02.13 00:00:00", out: "2014-02-13 00:00:00 +0000 UTC"}, + {in: "2014.02.13 08:33:44", out: "2014-02-13 08:33:44 +0000 UTC"}, + {in: "2014.02.13T08:33:44", out: "2014-02-13 08:33:44 +0000 UTC"}, + {in: "2014.02.13T08:33:44.555", out: "2014-02-13 08:33:44.555 +0000 UTC"}, + {in: "2014.02.13T08:33:44.555 PM -0700 MST", out: "2014-02-14 03:33:44.555 +0000 UTC", zname: "MST"}, + {in: "2014.02.13-0200", out: "2014-02-13 02:00:00 +0000 UTC"}, } func TestParse(t *testing.T) { @@ -802,6 +812,10 @@ var testParseErrors = []dateTest{ {in: "2018-09-30 08:09:13.123AM am", err: true}, {in: "2018-09-30 08:09:13.123 am AM", err: true}, {in: "2018-09-30 08:09:13.123 AMDT am", err: true}, + // https://github.com/araddon/dateparse/pull/134 + {in: "2014-02-13 00:00:00 utc", err: true}, // lowercase timezones are not valid + {in: "2014-02-13t00:00:00.0z", err: true}, // lowercase 't' separator is not supported + {in: "2014-02-13T00:00:00.0z", err: true}, // lowercase 'z' zulu timezone indicator not a valid format } func TestParseErrors(t *testing.T) { From 18ec8c69f6c7f0a363d0cde8f541d4b3d71b5113 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Fri, 15 Dec 2023 17:14:03 -0700 Subject: [PATCH 43/62] Expand Chinese date format support Inspired by https://github.com/araddon/dateparse/pull/132 from https://github.com/xwjdsh -- made this more general to all time formats that could follow, and added format validation. Also include the related README.md touchup from https://github.com/araddon/dateparse/pull/136 --- README.md | 2 +- parseany.go | 29 ++++++++++++++++++++++++++--- parseany_test.go | 10 +++++++++- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a38646d..0ca7b6a 100644 --- a/README.md +++ b/README.md @@ -285,7 +285,7 @@ func main() { | 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | | 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | | 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | | 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | | 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | | 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | diff --git a/parseany.go b/parseany.go index 022afae..08b4450 100644 --- a/parseany.go +++ b/parseany.go @@ -454,6 +454,11 @@ iterRunes: case '年': // Chinese Year p.stateDate = dateDigitChineseYear + p.yearlen = i - 2 + p.moi = i + 1 + if !p.setYear() { + return p, unknownErr(datestr) + } case ',': return p, unknownErr(datestr) case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': @@ -889,8 +894,26 @@ iterRunes: // 2014年04月08日 // weekday %Y年%m月%e日 %A %I:%M %p // 2013年07月18日 星期四 10:27 上午 - if r == ' ' { + switch r { + case '月': + // month + p.molen = i - p.moi - 2 + p.dayi = i + 1 + if !p.setMonth() { + return p, unknownErr(datestr) + } + case '日': + // day + p.daylen = i - p.dayi - 2 + if !p.setDay() { + return p, unknownErr(datestr) + } + case ' ': + if p.daylen <= 0 { + return p, unknownErr(datestr) + } p.stateDate = dateDigitChineseYearWs + p.stateTime = timeStart break iterRunes } case dateDigitDot: @@ -2305,11 +2328,11 @@ iterRunes: case dateDigitChineseYear: // dateDigitChineseYear // 2014年04月08日 - p.setEntireFormat([]byte("2006年01月02日")) + // 2014年4月12日 return p, nil case dateDigitChineseYearWs: - p.setEntireFormat([]byte("2006年01月02日 15:04:05")) + // 2014年04月08日 00:00:00 ... return p, nil case dateAlphaSlashDigitSlash: diff --git a/parseany_test.go b/parseany_test.go index bf17542..c786cf4 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -216,9 +216,17 @@ var testInputs = []dateTest{ // 03 February 2013 {in: "03 February 2013", out: "2013-02-03 00:00:00 +0000 UTC"}, {in: "3 February 2013", out: "2013-02-03 00:00:00 +0000 UTC"}, - // Chinese 2014年04月18日 + // Chinese 2014年04月18日 - https://github.com/araddon/dateparse/pull/132 {in: "2014年04月08日", out: "2014-04-08 00:00:00 +0000 UTC"}, + {in: "2014年4月8日", out: "2014-04-08 00:00:00 +0000 UTC"}, {in: "2014年04月08日 19:17:22", out: "2014-04-08 19:17:22 +0000 UTC"}, + {in: "2014年04月08日 19:17:22 MDT", out: "2014-04-08 19:17:22 +0000 UTC", zname: "MDT"}, + {in: "2014年04月08日 19:17:22 MDT-0700", out: "2014-04-09 02:17:22 +0000 UTC", zname: "MDT"}, + {in: "2014年4月8日 19:17:22", out: "2014-04-08 19:17:22 +0000 UTC"}, + {in: "2014年4月8日 19:17:22 MDT", out: "2014-04-08 19:17:22 +0000 UTC", zname: "MDT"}, + {in: "2014年4月8日 19:17:22 MDT-0700", out: "2014-04-09 02:17:22 +0000 UTC", zname: "MDT"}, + {in: "2014年4月8日 10:17pm", out: "2014-04-08 22:17:00 +0000 UTC"}, + // TODO: support Chinese AM (上午) and PM (下午) indicators // mm/dd/yyyy {in: "03/31/2014", out: "2014-03-31 00:00:00 +0000 UTC"}, {in: "3/31/2014", out: "2014-03-31 00:00:00 +0000 UTC"}, From 249dd7368c9c48c25f4a2d7940cef08bc727574e Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Fri, 15 Dec 2023 17:42:07 -0700 Subject: [PATCH 44/62] Support git log format (Thu Apr 7 15:13:13 2005 -0700) Adapt commit https://github.com/araddon/dateparse/pull/92/commits/99d9682a1cbe7a14975b5b71704af8847c2684f9 from https://github.com/araddon/dateparse/pull/92 by https://github.com/jiangxin (merge timeWsYearOffset case and validate format) --- parseany.go | 30 ++++++++++++++++++++++++++---- parseany_test.go | 5 ++++- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/parseany.go b/parseany.go index 08b4450..b2f11aa 100644 --- a/parseany.go +++ b/parseany.go @@ -122,6 +122,7 @@ const ( timeWsOffsetColonAlpha timeWsOffsetColon timeWsYear // 16 + timeWsYearOffset timeOffset timeOffsetColon timeOffsetColonAlpha @@ -997,13 +998,14 @@ iterRunes: } case dateAlpha: - // dateAlphaWS + // dateAlphaWs // Mon Jan _2 15:04:05 2006 // Mon Jan _2 15:04:05 MST 2006 // Mon Jan 02 15:04:05 -0700 2006 + // Mon Jan 02 15:04:05 2006 -0700 // Mon Aug 10 15:44:11 UTC+0100 2015 // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) - // dateAlphaWSDigit + // dateAlphaWsDigit // May 8, 2009 5:57:51 PM // oct 1, 1970 // dateAlphaWsMonth @@ -1135,6 +1137,7 @@ iterRunes: // Mon Jan _2 15:04:05 2006 // Mon Jan _2 15:04:05 MST 2006 // Mon Jan 02 15:04:05 -0700 2006 + // Mon Jan 02 15:04:05 2006 -0700 // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) // Mon Aug 10 15:44:11 UTC+0100 2015 // dateAlphaWsDigit @@ -1663,7 +1666,9 @@ iterRunes: // timeWsOffsetColonAlpha // 00:12:00 +00:00 UTC // timeWsYear - // 00:12:00 2008 + // 00:12:00 2008 + // timeWsYearOffset + // 00:12:00 2008 -0700 // timeZ // 15:04:05.99Z switch r { @@ -1688,6 +1693,23 @@ iterRunes: p.yeari = i } } + case timeWsYear: + // timeWsYearOffset + // 00:12:00 2008 -0700 + switch r { + case ' ': + p.yearlen = i - p.yeari + if !p.setYear() { + return p, unknownErr(datestr) + } + case '+', '-': + p.offseti = i + p.stateTime = timeWsYearOffset + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } + } case timeWsAlpha: // 06:20:00 UTC // 06:20:00 UTC-05 @@ -2049,7 +2071,7 @@ iterRunes: if p.mslen >= 10 { return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, string(p.datestr[p.msi:p.mslen])) } - case timeOffset, timeWsOffset: + case timeOffset, timeWsOffset, timeWsYearOffset: switch len(p.datestr) - p.offseti { case 3: // 19:55:00+01 (or 19:55:00 +01) diff --git a/parseany_test.go b/parseany_test.go index c786cf4..57a3998 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -443,6 +443,9 @@ var testInputs = []dateTest{ {in: "4/2/2014, 04:08:09", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014, 04:08 AM", out: "2014-04-02 04:08:00 +0000 UTC"}, {in: "04/02/2014, 04:08 PM", out: "2014-04-02 16:08:00 +0000 UTC"}, + // Git log default date format - https://github.com/araddon/dateparse/pull/92 + {in: "Thu Apr 7 15:13:13 2005 -0700", out: "2005-04-07 22:13:13 +0000 UTC"}, + {in: "Tue Dec 12 23:07:11 2023 -0700", out: "2023-12-13 06:07:11 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 @@ -1090,5 +1093,5 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - MustParse("Jul 9, 2012 at 5:02am (EST)") + MustParse("Tue Dec 12 23:07:11 2023 -0700") } From 0c3943eacdeb8c201d95dc20b7bad09f28f5a3b3 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Fri, 15 Dec 2023 20:22:47 -0700 Subject: [PATCH 45/62] Support RabbitMQ log format (dd-mon-yyyy::hh:mm:ss) Adapt https://github.com/araddon/dateparse/pull/122 by https://github.com/bizy01 to add support for RMQ log format. Refactor to avoid redundant code. Add format validations. As a side note, will also support the format dd-mm-yyyy:hh:mm:ss. --- parseany.go | 93 ++++++++++++++++++++++++++++++++---------------- parseany_test.go | 9 ++++- 2 files changed, 71 insertions(+), 31 deletions(-) diff --git a/parseany.go b/parseany.go index b2f11aa..03cdad7 100644 --- a/parseany.go +++ b/parseany.go @@ -598,44 +598,76 @@ iterRunes: // 13-Feb-03 ambiguous // 28-Feb-03 ambiguous // 29-Jun-2016 dd-month(alpha)-yyyy + // 8-Mar-2018:: // dateDigitDashDigitDash: // 29-06-2026 + // 08-03-18:: ambiguous (dd-mm-yy or yy-mm-dd) switch r { - case ' ': - // we need to find if this was 4 digits, aka year - // or 2 digits which makes it ambiguous year/day - length := i - (p.moi + p.molen + 1) - if length == 4 { - p.yearlen = 4 - p.set(p.yeari, "2006") - // We now also know that part1 was the day - p.dayi = 0 - p.daylen = p.part1Len - if !p.setDay() { - return p, unknownErr(datestr) + case ' ', ':': + doubleColonTimeConnector := false + if r == ':' { + p.link++ + if p.link == 2 { + if i+1 < len(p.datestr) { + // only legitimate content to follow "::" is the start of the time + nextChar, _ := utf8.DecodeRuneInString(p.datestr[i+1:]) + if unicode.IsDigit(nextChar) { + doubleColonTimeConnector = true + } + } + if !doubleColonTimeConnector { + return p, unknownErr(datestr) + } } - } else if length == 2 { - // We have no idea if this is - // yy-mon-dd OR dd-mon-yy - // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) - // - // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), - // which is a horrible assumption, but seems to be the convention for - // dates that are formatted in this way. - p.ambiguousMD = true - p.yearlen = 2 - p.set(p.yeari, "06") - // We now also know that part1 was the day - p.dayi = 0 - p.daylen = p.part1Len - if !p.setDay() { + } else if p.link > 0 { + return p, unknownErr(datestr) + } + if r == ' ' || doubleColonTimeConnector { + // we need to find if this was 4 digits, aka year + // or 2 digits which makes it ambiguous year/day + var sepLen int + if doubleColonTimeConnector { + sepLen = 2 + } else { + sepLen = 1 + } + length := i - (p.moi + p.molen + sepLen) + if length == 4 { + p.yearlen = 4 + p.set(p.yeari, "2006") + // We now also know that part1 was the day + p.dayi = 0 + p.daylen = p.part1Len + if !p.setDay() { + return p, unknownErr(datestr) + } + } else if length == 2 { + // We have no idea if this is + // yy-mon-dd OR dd-mon-yy + // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) + // + // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), + // which is a horrible assumption, but seems to be the convention for + // dates that are formatted in this way. + p.ambiguousMD = true + p.yearlen = 2 + p.set(p.yeari, "06") + // We now also know that part1 was the day + p.dayi = 0 + p.daylen = p.part1Len + if !p.setDay() { + return p, unknownErr(datestr) + } + } else { return p, unknownErr(datestr) } - } else { + p.stateTime = timeStart + break iterRunes + } + default: + if !unicode.IsDigit(r) && !unicode.IsLetter(r) && p.link > 0 { return p, unknownErr(datestr) } - p.stateTime = timeStart - break iterRunes } case dateDigitYearSlash: @@ -2397,6 +2429,7 @@ type parser struct { fullMonth string parsedAMPM bool skip int + link int extra int part1Len int yeari int diff --git a/parseany_test.go b/parseany_test.go index 57a3998..9c9b6fa 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -446,6 +446,9 @@ var testInputs = []dateTest{ // Git log default date format - https://github.com/araddon/dateparse/pull/92 {in: "Thu Apr 7 15:13:13 2005 -0700", out: "2005-04-07 22:13:13 +0000 UTC"}, {in: "Tue Dec 12 23:07:11 2023 -0700", out: "2023-12-13 06:07:11 +0000 UTC"}, + // RabbitMQ log format - https://github.com/araddon/dateparse/pull/122 + {in: "8-Mar-2018::14:09:27", out: "2018-03-08 14:09:27 +0000 UTC"}, + {in: "08-03-2018::02:09:29 PM", out: "2018-03-08 14:09:29 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 @@ -827,6 +830,10 @@ var testParseErrors = []dateTest{ {in: "2014-02-13 00:00:00 utc", err: true}, // lowercase timezones are not valid {in: "2014-02-13t00:00:00.0z", err: true}, // lowercase 't' separator is not supported {in: "2014-02-13T00:00:00.0z", err: true}, // lowercase 'z' zulu timezone indicator not a valid format + // Invalid variants of RabbitMQ log format + {in: "8-Mar-2018:14:09:27", err: true}, + {in: "8-Mar-2018: 14:09:27", err: true}, + {in: "8-Mar-2018:::14:09:27", err: true}, } func TestParseErrors(t *testing.T) { @@ -1093,5 +1100,5 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - MustParse("Tue Dec 12 23:07:11 2023 -0700") + MustParse("8-Mar-2018::14:09:27") } From 0d2fd5e275b55ec1292e0c727c06da4a7d017973 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 08:18:54 -0700 Subject: [PATCH 46/62] Add broader benchmarks Uses the main test set for a broader stress test. --- bench_test.go | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/bench_test.go b/bench_test.go index 1973308..f979d95 100644 --- a/bench_test.go +++ b/bench_test.go @@ -40,6 +40,63 @@ func BenchmarkParseAny(b *testing.B) { } } +func BenchmarkBigShotgunParse(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, t := range testInputs { + // This is the non dateparse traditional approach + _, _ = parseShotgunStyle(t.in) + } + } +} + +func BenchmarkBigParseAny(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, t := range testInputs { + _, _ = ParseAny(t.in) + } + } +} + +func BenchmarkBigParseIn(b *testing.B) { + b.ReportAllocs() + loc, _ := time.LoadLocation("America/New_York") + for i := 0; i < b.N; i++ { + for _, t := range testInputs { + _, _ = ParseIn(t.in, loc) + } + } +} + +func BenchmarkBigParseRetryAmbiguous(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, t := range testInputs { + _, _ = ParseAny(t.in, RetryAmbiguousDateWithSwap(true)) + } + } +} + +func BenchmarkShotgunParseErrors(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, t := range testParseErrors { + // This is the non dateparse traditional approach + _, _ = parseShotgunStyle(t.in) + } + } +} + +func BenchmarkParseAnyErrors(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, t := range testParseErrors { + _, _ = ParseAny(t.in) + } + } +} + /* func BenchmarkParseDateString(b *testing.B) { b.ReportAllocs() From f4307ef59d19cef2308fdb74476cc0d07d1b0efa Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 10:48:24 -0700 Subject: [PATCH 47/62] Heavily optimize memory allocations Uses a memory pool for parser struct and format []byte Uses a new go 1.20 feature to avoid allocations for []byte to string conversions in allowable cases. go 1.20 also fixes a go bug for parsing fractional sec after a comma, so we can eliminate a workaround. The remaining allocations are mostly unavoidable (e.g., time.Parse constructing a FixedZone location or part to strings.ToLower). Results show an 89% reduction in allocated bytes for the big benchmark cases, and for some formats an allocation can be avoided entirely. There is also a resulting 26% speedup in ns/op. Details: BEFORE: cpu: 12th Gen Intel(R) Core(TM) i7-1255U BenchmarkShotgunParse-12 19448 B/op 474 allocs/op BenchmarkParseAny-12 4736 B/op 42 allocs/op BenchmarkBigShotgunParse-12 1075049 B/op 24106 allocs/op BenchmarkBigParseAny-12 241422 B/op 2916 allocs/op BenchmarkBigParseIn-12 244195 B/op 2984 allocs/op BenchmarkBigParseRetryAmbiguous-12 260751 B/op 3715 allocs/op BenchmarkShotgunParseErrors-12 67080 B/op 1679 allocs/op BenchmarkParseAnyErrors-12 15903 B/op 200 allocs/op AFTER: BenchmarkShotgunParse-12 19448 B/op 474 allocs/op BenchmarkParseAny-12 48 B/op 2 allocs/op BenchmarkBigShotgunParse-12 1075049 B/op 24106 allocs/op BenchmarkBigParseAny-12 25394 B/op 824 allocs/op BenchmarkBigParseIn-12 28165 B/op 892 allocs/op BenchmarkBigParseRetryAmbiguous-12 37880 B/op 1502 allocs/op BenchmarkShotgunParseErrors-12 67080 B/op 1679 allocs/op BenchmarkParseAnyErrors-12 3851 B/op 117 allocs/op --- go.mod | 2 +- parseany.go | 172 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 131 insertions(+), 43 deletions(-) diff --git a/go.mod b/go.mod index baa649f..8268462 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/araddon/dateparse -go 1.19 +go 1.20 require ( github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4 diff --git a/parseany.go b/parseany.go index 03cdad7..6a868fe 100644 --- a/parseany.go +++ b/parseany.go @@ -7,9 +7,11 @@ import ( "fmt" "strconv" "strings" + "sync" "time" "unicode" "unicode/utf8" + "unsafe" ) // func init() { @@ -147,11 +149,22 @@ func unexpectedTail(tail string) error { return fmt.Errorf("%w %q", ErrUnexpectedTail, tail) } +// go 1.20 allows us to convert a byte slice to a string without a memory allocation. +// See https://github.com/golang/go/issues/53003#issuecomment-1140276077. +func bytesToString(b []byte) string { + if b == nil || len(b) <= 0 { + return "" + } else { + return unsafe.String(&b[0], len(b)) + } +} + // ParseAny parse an unknown date format, detect the layout. // Normal parse. Equivalent Timezone rules as time.Parse(). // NOTE: please see readme on mmdd vs ddmm ambiguous dates. func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) { p, err := parseTime(datestr, nil, opts...) + defer putBackParser(p) if err != nil { return time.Time{}, err } @@ -165,6 +178,7 @@ func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) { // in other locations. func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Time, error) { p, err := parseTime(datestr, loc, opts...) + defer putBackParser(p) if err != nil { return time.Time{}, err } @@ -187,6 +201,7 @@ func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Tim // t, err := dateparse.ParseIn("3/1/2014", denverLoc) func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) { p, err := parseTime(datestr, time.Local, opts...) + defer putBackParser(p) if err != nil { return time.Time{}, err } @@ -197,6 +212,7 @@ func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) { // Not recommended for most use-cases. func MustParse(datestr string, opts ...ParserOption) time.Time { p, err := parseTime(datestr, nil, opts...) + defer putBackParser(p) if err != nil { panic(err.Error()) } @@ -214,6 +230,7 @@ func MustParse(datestr string, opts ...ParserOption) time.Time { // // layout = "2006-01-02 15:04:05" func ParseFormat(datestr string, opts ...ParserOption) (string, error) { p, err := parseTime(datestr, nil, opts...) + defer putBackParser(p) if err != nil { return "", err } @@ -228,6 +245,7 @@ func ParseFormat(datestr string, opts ...ParserOption) (string, error) { // mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) { p, err := parseTime(datestr, nil, opts...) + defer putBackParser(p) if err != nil { return time.Time{}, err } @@ -237,6 +255,8 @@ func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) { return p.parse() } +// Creates a new parser and parses the given datestr in the given loc with the given options. +// The caller must call putBackParser on the returned parser when done with it. func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *parser, err error) { p, err = newParser(datestr, loc, opts...) @@ -244,12 +264,6 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par return } - // if this string is impossibly long, don't even try. longest date might be something like: - // 'Wednesday, 8 February 2023 19:00:46.999999999 +11:00 (AEDT) m=+0.000000001' - if len(datestr) > 75 { - return p, unknownErr(datestr) - } - if p.retryAmbiguousDateWithSwap { // month out of range signifies that a day/month swap is the correct solution to an ambiguous date // this is because it means that a day is being interpreted as a month and overflowing the valid value for that @@ -265,6 +279,7 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par // turn off the retry to avoid endless recursion retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap) + putBackParser(p) p, err = parseTime(datestr, time.Local, modifiedOpts...) } } @@ -1105,7 +1120,9 @@ iterRunes: maybeDay := strings.ToLower(p.datestr[0:i]) if isDay(maybeDay) { // using skip throws off indices used by other code; saner to restart - return parseTime(p.datestr[i+1:], loc) + newDateStr := p.datestr[i+1:] + putBackParser(p) + return parseTime(newDateStr, loc) } p.stateDate = dateAlphaWs } @@ -1133,8 +1150,9 @@ iterRunes: p.set(0, "Jan") } else if i == 4 { // gross - newDatestr := p.datestr[0:i-1] + p.datestr[i:] - return parseTime(newDatestr, loc, opts...) + newDateStr := p.datestr[0:i-1] + p.datestr[i:] + putBackParser(p) + return parseTime(newDateStr, loc, opts...) } else { return p, unknownErr(datestr) } @@ -1332,25 +1350,33 @@ iterRunes: case 't', 'T': if p.nextIs(i, 'h') || p.nextIs(i, 'H') { if len(p.datestr) > i+2 { - return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) + newDateStr := p.datestr[0:i] + p.datestr[i+2:] + putBackParser(p) + return parseTime(newDateStr, loc, opts...) } } case 'n', 'N': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { - return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) + newDateStr := p.datestr[0:i] + p.datestr[i+2:] + putBackParser(p) + return parseTime(newDateStr, loc, opts...) } } case 's', 'S': if p.nextIs(i, 't') || p.nextIs(i, 'T') { if len(p.datestr) > i+2 { - return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) + newDateStr := p.datestr[0:i] + p.datestr[i+2:] + putBackParser(p) + return parseTime(newDateStr, loc, opts...) } } case 'r', 'R': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { - return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...) + newDateStr := p.datestr[0:i] + p.datestr[i+2:] + putBackParser(p) + return parseTime(newDateStr, loc, opts...) } } } @@ -1575,13 +1601,6 @@ iterRunes: p.houri = i } switch r { - case ',': - // hm, lets just swap out comma for period. for some reason go - // won't parse it. - // 2014-05-11 08:20:13,787 - ds := []byte(p.datestr) - ds[i] = '.' - return parseTime(string(ds), loc, opts...) case '-', '+': // 03:21:51+00:00 p.stateTime = timeOffset @@ -1597,7 +1616,8 @@ iterRunes: } } p.offseti = i - case '.': + case '.', ',': + // NOTE: go 1.20 can now parse a string that has a comma delimiter properly p.stateTime = timePeriod p.seclen = i - p.seci p.msi = i + 1 @@ -2042,12 +2062,12 @@ iterRunes: // may or may not have a space on the end if offsetLen == 7 { if p.datestr[p.offseti+6] != ' ' { - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:p.offseti+offsetLen])) + return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen]) } } p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:p.offseti+offsetLen])) + return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen]) } // process timezone switch len(p.datestr) - p.tzi { @@ -2057,7 +2077,7 @@ iterRunes: case 4: p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) + return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) } case timeWsAlpha: switch len(p.datestr) - p.tzi { @@ -2067,7 +2087,7 @@ iterRunes: case 4: p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) + return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) } case timeWsAlphaRParen: @@ -2095,13 +2115,13 @@ iterRunes: case 6: p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(p.datestr[p.offseti:i])) + return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:i]) } case timePeriod: p.mslen = i - p.msi if p.mslen >= 10 { - return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, string(p.datestr[p.msi:p.mslen])) + return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, p.datestr[p.msi:p.mslen]) } case timeOffset, timeWsOffset, timeWsYearOffset: switch len(p.datestr) - p.offseti { @@ -2112,7 +2132,7 @@ iterRunes: // 19:55:00+0100 (or 19:55:00 +0100) p.set(p.offseti, "-0700") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(p.datestr[p.offseti:])) + return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:]) } case timeWsOffsetWs: @@ -2127,7 +2147,7 @@ iterRunes: // 13:31:51.999 +01:00 CEST p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:])) + return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) } } case timeOffsetColon, timeWsOffsetColon: @@ -2137,7 +2157,7 @@ iterRunes: case 6: p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:])) + return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:]) } } p.coalesceTime(i) @@ -2452,6 +2472,46 @@ type parser struct { t *time.Time } +// something like: "Wednesday, 8 February 2023 19:00:46.999999999 +11:00 (AEDT) m=+0.000000001" +const longestPossibleDateStr = 78 + +// the format byte slice is always a little larger, in case we need to expand it to contain a full month +const formatExtraBufferBytes = 16 +const formatBufferCapacity = longestPossibleDateStr + formatExtraBufferBytes + +var parserPool = sync.Pool{ + New: func() interface{} { + // allocate a max-sized fixed-capacity format byte slice + // that will be re-used with this parser struct + return &parser{ + format: make([]byte, 0, formatBufferCapacity), + } + }, +} + +var emptyString = "" + +// Use to put a parser back into the pool in the right way +func putBackParser(p *parser) { + if p == nil { + return + } + // we'll be reusing the backing memory for the format byte slice, put it back + // to maximum capacity + if cap(p.format) == longestPossibleDateStr { + p.format = p.format[:longestPossibleDateStr] + } else { + // the parsing process replaced this, get back a new one with the right cap + p.format = make([]byte, 0, longestPossibleDateStr) + } + // clear out pointers so we don't leak memory we don't need any longer + p.loc = nil + p.datestr = emptyString + p.fullMonth = emptyString + p.t = nil + parserPool.Put(p) +} + // ParserOption defines a function signature implemented by options // Options defined like this accept the parser and operate on the data within type ParserOption func(*parser) error @@ -2482,18 +2542,29 @@ func AllowPartialStringMatch(allowPartialStringMatch bool) ParserOption { } } +// Creates a new parser. The caller must call putBackParser on the returned parser when done with it. func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) { - p := &parser{ + dateStrLen := len(dateStr) + if dateStrLen > longestPossibleDateStr { + return nil, unknownErr(dateStr) + } + + // Make sure to re-use the format byte slice from the pooled parser struct + p := parserPool.Get().(*parser) + // This re-slicing is guaranteed to work because of the length check above + startingFormat := p.format[:dateStrLen] + copy(startingFormat, dateStr) + *p = parser{ stateDate: dateStart, stateTime: timeIgnore, datestr: dateStr, loc: loc, preferMonthFirst: true, retryAmbiguousDateWithSwap: false, + format: startingFormat, + // this tracks how much of the format string has been set, to make sure all of it is set + formatSetLen: 0, } - p.format = []byte(dateStr) - // this tracks how much of the format string has been set, to make sure all of it is set - p.formatSetLen = 0 // allow the options to mutate the parser fields from their defaults for _, option := range opts { @@ -2512,7 +2583,8 @@ func (p *parser) nextIs(i int, b byte) bool { } func (p *parser) setEntireFormat(format []byte) { - p.format = format + // Copy so that we don't lose this pooled format byte slice + copy(p.format, format) p.formatSetLen = len(format) } @@ -2665,8 +2737,22 @@ func (p *parser) coalesceTime(end int) { func (p *parser) setFullMonth(month string) { oldLen := len(p.format) const fullMonth = "January" - p.format = []byte(fmt.Sprintf("%s%s%s", p.format[0:p.moi], fullMonth, p.format[p.moi+len(month):])) - newLen := len(p.format) + // Do an overlapping copy so we don't lose the pooled format buffer + part1Len := p.moi + part3 := p.format[p.moi+len(month):] + newLen := part1Len + len(fullMonth) + len(part3) + if newLen > oldLen { + // We can re-slice this, because the capacity is guaranteed to be a little longer than any possible datestr + p.format = p.format[:newLen] + } + // first part will not change, we need to shift the third part + copy(p.format[part1Len+len(fullMonth):], part3) + copy(p.format[part1Len:], fullMonth) + // shorten the format slice now if needed + if newLen < oldLen { + p.format = p.format[:newLen] + } + if newLen > oldLen && p.formatSetLen >= p.moi { p.formatSetLen += newLen - oldLen } else if newLen < oldLen && p.formatSetLen >= p.moi { @@ -2706,14 +2792,16 @@ func (p *parser) parse() (time.Time, error) { if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) { // We can always ignore punctuation at the end of a date/time, but do not allow // any numbers or letters in the format string. - validFormatTo := findProperEnd(string(p.format), p.formatSetLen, len(p.format), false, false, true) + validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true) if validFormatTo < len(p.format) { - return time.Time{}, unexpectedTail(string(p.format[p.formatSetLen:])) + return time.Time{}, unexpectedTail(p.datestr[p.formatSetLen:]) } } if p.skip > 0 && len(p.format) > p.skip { - p.format = p.format[p.skip:] + // copy and then re-slice to shorten to avoid losing the header of the pooled format string + copy(p.format, p.format[p.skip:]) + p.format = p.format[:len(p.format)-p.skip] p.formatSetLen -= p.skip if p.formatSetLen < 0 { p.formatSetLen = 0 @@ -2723,10 +2811,10 @@ func (p *parser) parse() (time.Time, error) { if p.loc == nil { // gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr) - return time.Parse(string(p.format), p.datestr) + return time.Parse(bytesToString(p.format), p.datestr) } //gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc) - return time.ParseInLocation(string(p.format), p.datestr, p.loc) + return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) } func isDay(alpha string) bool { for _, day := range days { From ed5310d0c16e080ef43fd250a8f57a5fb617836e Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 12:55:03 -0700 Subject: [PATCH 48/62] Optimize ambiguous date parsing Previously, for ambiguous date strings, it was always calling parse twice even when the first parse would have been successful. Refactor so that parsing isn't re-attempted unless the first parse fails ambiguously. Benchmark results show that with RetryAmbiguousDateWithSwap(true), it's now about 6.5% faster (ns/op) and reduces allocated bytes by 3.4%. --- parseany.go | 72 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/parseany.go b/parseany.go index 6a868fe..d9cfd0a 100644 --- a/parseany.go +++ b/parseany.go @@ -168,7 +168,7 @@ func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) { if err != nil { return time.Time{}, err } - return p.parse() + return p.parse(nil, opts...) } // ParseIn with Location, equivalent to time.ParseInLocation() timezone/offset @@ -182,7 +182,7 @@ func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Tim if err != nil { return time.Time{}, err } - return p.parse() + return p.parse(loc, opts...) } // ParseLocal Given an unknown date format, detect the layout, @@ -205,7 +205,7 @@ func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) { if err != nil { return time.Time{}, err } - return p.parse() + return p.parse(time.Local, opts...) } // MustParse parse a date, and panic if it can't be parsed. Used for testing. @@ -216,7 +216,7 @@ func MustParse(datestr string, opts ...ParserOption) time.Time { if err != nil { panic(err.Error()) } - t, err := p.parse() + t, err := p.parse(nil, opts...) if err != nil { panic(err.Error()) } @@ -234,7 +234,7 @@ func ParseFormat(datestr string, opts ...ParserOption) (string, error) { if err != nil { return "", err } - _, err = p.parse() + _, err = p.parse(nil, opts...) if err != nil { return "", err } @@ -252,7 +252,7 @@ func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) { if p.ambiguousMD { return time.Time{}, ErrAmbiguousMMDD } - return p.parse() + return p.parse(nil, opts...) } // Creates a new parser and parses the given datestr in the given loc with the given options. @@ -264,29 +264,6 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par return } - if p.retryAmbiguousDateWithSwap { - // month out of range signifies that a day/month swap is the correct solution to an ambiguous date - // this is because it means that a day is being interpreted as a month and overflowing the valid value for that - // by retrying in this case, we can fix a common situation with no assumptions - defer func() { - if p != nil && p.ambiguousMD { - // if it errors out with the following error, swap before we - // get out of this function to reduce scope it needs to be applied on - _, err = p.parse() - if err != nil && strings.Contains(err.Error(), "month out of range") { - // create the option to reverse the preference - preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) - // turn off the retry to avoid endless recursion - retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) - modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap) - putBackParser(p) - p, err = parseTime(datestr, time.Local, modifiedOpts...) - } - } - - }() - } - // IMPORTANT: we may need to modify the datestr while we are parsing (e.g., to // remove pieces of the string that should be ignored during golang parsing). // We will iterate over the modified datestr, and whenever we update datestr, @@ -2584,6 +2561,12 @@ func (p *parser) nextIs(i int, b byte) bool { func (p *parser) setEntireFormat(format []byte) { // Copy so that we don't lose this pooled format byte slice + oldLen := len(p.format) + newLen := len(format) + if oldLen != newLen { + // guaranteed to work because of the allocated capacity for format buffers + p.format = p.format[:newLen] + } copy(p.format, format) p.formatSetLen = len(format) } @@ -2780,7 +2763,10 @@ func (p *parser) trimExtra(onlyTrimFormat bool) { } } -func (p *parser) parse() (time.Time, error) { +func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) (t time.Time, err error) { + if p == nil { + return time.Time{}, unknownErr("") + } if p.t != nil { return *p.t, nil } @@ -2788,6 +2774,32 @@ func (p *parser) parse() (time.Time, error) { p.setFullMonth(p.fullMonth) } + if p.retryAmbiguousDateWithSwap && p.ambiguousMD { + // month out of range signifies that a day/month swap is the correct solution to an ambiguous date + // this is because it means that a day is being interpreted as a month and overflowing the valid value for that + // by retrying in this case, we can fix a common situation with no assumptions + defer func() { + // if actual time parsing errors out with the following error, swap before we + // get out of this function to reduce scope it needs to be applied on + if err != nil && strings.Contains(err.Error(), "month out of range") { + // create the option to reverse the preference + preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) + // turn off the retry to avoid endless recursion + retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) + modifiedOpts := append(originalOpts, preferMonthFirst, retryAmbiguousDateWithSwap) + var newParser *parser + newParser, err = parseTime(p.datestr, originalLoc, modifiedOpts...) + defer putBackParser(newParser) + if err == nil { + t, err = newParser.parse(originalLoc, modifiedOpts...) + // The caller might use the format and datestr, so copy that back to the original parser + p.setEntireFormat(newParser.format) + p.datestr = newParser.datestr + } + } + }() + } + // Make sure that the entire string matched to a known format that was detected if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) { // We can always ignore punctuation at the end of a date/time, but do not allow From 23f8fa1af098ae3824a5dcb1ab3ad398b179356f Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 13:52:00 -0700 Subject: [PATCH 49/62] Further optimize ambiguous parsing Optimize the common and special case where mm and dd are the same length, just swap in place. Avoids having to reparse the entire string. For this case, it's about 30% faster and reduces allocations by about 15%. This format is especially common, hence the reason to optimize for this case. Also fix the case for ambiguous date/time in the mm:dd:yyyy format. --- bench_test.go | 7 +++++ parseany.go | 82 ++++++++++++++++++++++++++++++++++++------------ parseany_test.go | 33 +++++++++++++++++-- 3 files changed, 100 insertions(+), 22 deletions(-) diff --git a/bench_test.go b/bench_test.go index f979d95..a46e5dd 100644 --- a/bench_test.go +++ b/bench_test.go @@ -97,6 +97,13 @@ func BenchmarkParseAnyErrors(b *testing.B) { } } +func BenchmarkParseAmbiguous(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true)) + } +} + /* func BenchmarkParseDateString(b *testing.B) { b.ReportAllocs() diff --git a/parseany.go b/parseany.go index d9cfd0a..307402f 100644 --- a/parseany.go +++ b/parseany.go @@ -342,6 +342,7 @@ iterRunes: // 03/31/2005 // 31/03/2005 p.ambiguousMD = true + p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { // 03/31/2005 @@ -364,8 +365,8 @@ iterRunes: } case ':': - // 03/31/2005 - // 2014/02/24 + // 03:31:2005 + // 2014:02:24 p.stateDate = dateDigitColon if i == 4 { p.yearlen = i @@ -375,6 +376,7 @@ iterRunes: } } else { p.ambiguousMD = true + p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { p.molen = i @@ -383,6 +385,14 @@ iterRunes: } p.dayi = i + 1 } + } else { + if p.daylen == 0 { + p.daylen = i + if !p.setDay() { + return p, unknownErr(datestr) + } + p.moi = i + 1 + } } } @@ -399,6 +409,7 @@ iterRunes: } } else if i <= 2 { p.ambiguousMD = true + p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { // 03.31.2005 @@ -641,7 +652,7 @@ iterRunes: // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), // which is a horrible assumption, but seems to be the convention for // dates that are formatted in this way. - p.ambiguousMD = true + p.ambiguousMD = true // not retryable p.yearlen = 2 p.set(p.yeari, "06") // We now also know that part1 was the day @@ -786,6 +797,11 @@ iterRunes: if !p.setDay() { return p, unknownErr(datestr) } + } else if p.molen == 0 { + p.molen = i - p.moi + if !p.setMonth() { + return p, unknownErr(datestr) + } } break iterRunes case ':': @@ -806,6 +822,14 @@ iterRunes: } p.yeari = i + 1 } + } else { + if p.molen == 0 { + p.molen = i - p.moi + if !p.setMonth() { + return p, unknownErr(datestr) + } + p.yeari = i + 1 + } } } @@ -2260,7 +2284,7 @@ iterRunes: // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), // which is a horrible assumption, but seems to be the convention for // dates that are formatted in this way. - p.ambiguousMD = true + p.ambiguousMD = true // not retryable p.yearlen = 2 p.set(p.yeari, "06") // We now also know that part1 was the day @@ -2417,6 +2441,7 @@ type parser struct { preferMonthFirst bool retryAmbiguousDateWithSwap bool ambiguousMD bool + ambiguousRetryable bool allowPartialStringMatch bool stateDate dateState stateTime timeState @@ -2774,7 +2799,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) p.setFullMonth(p.fullMonth) } - if p.retryAmbiguousDateWithSwap && p.ambiguousMD { + if p.retryAmbiguousDateWithSwap && p.ambiguousMD && p.ambiguousRetryable { // month out of range signifies that a day/month swap is the correct solution to an ambiguous date // this is because it means that a day is being interpreted as a month and overflowing the valid value for that // by retrying in this case, we can fix a common situation with no assumptions @@ -2782,19 +2807,35 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) // if actual time parsing errors out with the following error, swap before we // get out of this function to reduce scope it needs to be applied on if err != nil && strings.Contains(err.Error(), "month out of range") { - // create the option to reverse the preference - preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) - // turn off the retry to avoid endless recursion - retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) - modifiedOpts := append(originalOpts, preferMonthFirst, retryAmbiguousDateWithSwap) - var newParser *parser - newParser, err = parseTime(p.datestr, originalLoc, modifiedOpts...) - defer putBackParser(newParser) - if err == nil { - t, err = newParser.parse(originalLoc, modifiedOpts...) - // The caller might use the format and datestr, so copy that back to the original parser - p.setEntireFormat(newParser.format) - p.datestr = newParser.datestr + // simple optimized case where mm and dd can be swapped directly + if p.molen == 2 && p.daylen == 2 { + moi := p.moi + p.moi = p.dayi + p.dayi = moi + if !p.setDay() || !p.setMonth() { + err = unknownErr(p.datestr) + } else { + if p.loc == nil { + t, err = time.Parse(bytesToString(p.format), p.datestr) + } else { + t, err = time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) + } + } + } else { + // create the option to reverse the preference + preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) + // turn off the retry to avoid endless recursion + retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) + modifiedOpts := append(originalOpts, preferMonthFirst, retryAmbiguousDateWithSwap) + var newParser *parser + newParser, err = parseTime(p.datestr, originalLoc, modifiedOpts...) + defer putBackParser(newParser) + if err == nil { + t, err = newParser.parse(originalLoc, modifiedOpts...) + // The caller might use the format and datestr, so copy that back to the original parser + p.setEntireFormat(newParser.format) + p.datestr = newParser.datestr + } } } }() @@ -2824,9 +2865,10 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) if p.loc == nil { // gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr) return time.Parse(bytesToString(p.format), p.datestr) + } else { + //gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc) + return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) } - //gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc) - return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) } func isDay(alpha string) bool { for _, day := range days { diff --git a/parseany_test.go b/parseany_test.go index 9c9b6fa..e2940e7 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -254,6 +254,7 @@ var testInputs = []dateTest{ {in: "04:02:2014 04:08:09.123", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "04:02:2014 04:08:09.12312", out: "2014-04-02 04:08:09.12312 +0000 UTC"}, {in: "04:02:2014 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"}, + {in: "04:01:2014 04:08:09", out: "2014-01-04 04:08:09 +0000 UTC", preferDayFirst: true}, // mm/dd/yy hh:mm:ss AM {in: "04/02/2014 04:08:09am", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, @@ -537,8 +538,10 @@ var testInputs = []dateTest{ {in: "2017-07-19 03:21:51+00:00", out: "2017-07-19 03:21:51 +0000 UTC"}, // yyyy:mm:dd hh:mm:ss+00:00 {in: "2012:08:03 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, - // dd:mm:yyyy hh:mm:ss+00:00 + // mm:dd:yyyy hh:mm:ss+00:00 {in: "08:03:2012 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, + {in: "08:04:2012 18:31:59+00:00", out: "2012-04-08 18:31:59 +0000 UTC", preferDayFirst: true}, + {in: "24:03:2012 18:31:59+00:00", out: "2012-03-24 18:31:59 +0000 UTC", retryAmbiguous: true}, // yyyy-mm-dd hh:mm:ss.000+00:00 PST {in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"}, {in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, @@ -1068,37 +1071,63 @@ func TestPreferMonthFirst(t *testing.T) { ts, err := ParseAny("04/02/2014 04:08:09 +0000 UTC") assert.Equal(t, nil, err) assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC") + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC") + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) preferMonthFirstTrue := PreferMonthFirst(true) ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) assert.Equal(t, nil, err) assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) // allows the day to be preferred before the month, when completely ambiguous preferMonthFirstFalse := PreferMonthFirst(false) ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) assert.Equal(t, nil, err) assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } func TestRetryAmbiguousDateWithSwap(t *testing.T) { // default is false _, err := ParseAny("13/02/2014 04:08:09 +0000 UTC") assert.NotEqual(t, nil, err) + _, err = ParseAny("13/2/2014 04:08:09 +0000 UTC") + assert.NotEqual(t, nil, err) // will fail error if the month preference cannot work due to the value being larger than 12 retryAmbiguousDateWithSwapFalse := RetryAmbiguousDateWithSwap(false) _, err = ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse) assert.NotEqual(t, nil, err) + _, err = ParseAny("13/2/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse) + assert.NotEqual(t, nil, err) // will retry with the other month preference if this error is detected retryAmbiguousDateWithSwapTrue := RetryAmbiguousDateWithSwap(true) ts, err := ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue) assert.Equal(t, nil, err) assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("13/2/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - MustParse("8-Mar-2018::14:09:27") + ts := MustParse("03:08:2012 18:31:59+00:00", PreferMonthFirst(false)) + assert.Equal(t, "2012-08-03 18:31:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } From d2e1443c4dbb8804b3dbae51534f2b0e5a153c5e Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 22:31:48 -0700 Subject: [PATCH 50/62] Comprehensive date format validation Audit every stateDate so every unexpected alternative will fail. In the process, fixed some newly found bugs: * Extend format yyyy-mon-dd to allow times to follow it. Also allow full month name. * Allow full day name before month (e.g., Monday January 4th, 2017) Relevant confirmatory test cases were added. --- parseany.go | 424 +++++++++++++++++++++++++++++++---------------- parseany_test.go | 70 +++++++- 2 files changed, 352 insertions(+), 142 deletions(-) diff --git a/parseany.go b/parseany.go index 307402f..87333ba 100644 --- a/parseany.go +++ b/parseany.go @@ -59,7 +59,7 @@ const ( dateDigit dateDigitSt dateYearDash - dateYearDashAlphaDash + dateYearDashAlpha dateYearDashDash dateYearDashDashWs // 6 dateYearDashDashT @@ -77,25 +77,23 @@ const ( dateDigitSlash dateDigitYearSlash dateDigitSlashAlpha // 21 + dateDigitSlashAlphaSlash dateDigitColon dateDigitChineseYear dateDigitChineseYearWs dateDigitWs - dateDigitWsMoYear // 26 + dateDigitWsMoYear // 27 dateAlpha dateAlphaWs dateAlphaWsDigit - dateAlphaWsDigitMore // 30 + dateAlphaWsDigitMore // 31 dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWsYear - dateAlphaWsMonth - dateAlphaWsDigitYearmaybe - dateAlphaWsMonthMore - dateAlphaWsMonthSuffix - dateAlphaWsMore - dateAlphaWsAtTime + dateAlphaWsDigitYearMaybe + dateVariousDaySuffix + dateAlphaFullMonthWs + dateAlphaFullMonthWsDayWs dateAlphaWsAlpha - dateAlphaWsAlphaYearmaybe // 40 dateAlphaPeriodWsDigit dateAlphaSlash dateAlphaSlashDigit @@ -351,6 +349,8 @@ iterRunes: return p, unknownErr(datestr) } p.dayi = i + 1 + } else { + return p, unknownErr(datestr) } } else { if p.daylen == 0 { @@ -359,9 +359,10 @@ iterRunes: return p, unknownErr(datestr) } p.moi = i + 1 + } else { + return p, unknownErr(datestr) } } - } case ':': @@ -384,6 +385,8 @@ iterRunes: return p, unknownErr(datestr) } p.dayi = i + 1 + } else { + return p, unknownErr(datestr) } } else { if p.daylen == 0 { @@ -392,6 +395,8 @@ iterRunes: return p, unknownErr(datestr) } p.moi = i + 1 + } else { + return p, unknownErr(datestr) } } } @@ -418,6 +423,8 @@ iterRunes: return p, unknownErr(datestr) } p.dayi = i + 1 + } else { + return p, unknownErr(datestr) } } else { if p.daylen == 0 { @@ -426,6 +433,8 @@ iterRunes: return p, unknownErr(datestr) } p.moi = i + 1 + } else { + return p, unknownErr(datestr) } } } @@ -468,10 +477,13 @@ iterRunes: case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': // 1st January 2018 // 2nd Jan 2018 23:59 - // st, rd, nd, st - p.stateDate = dateAlphaWsMonthSuffix + // st, rd, nd, th + p.stateDate = dateVariousDaySuffix i-- default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } continue } p.part1Len = i @@ -487,8 +499,9 @@ iterRunes: // 2020-08-17T17:00:00:000+0100 // dateYearDashDashWs // 2013-04-01 22:43:22 - // dateYearDashAlphaDash - // 2013-Feb-03 + // dateYearDashAlpha + // 2013-Feb-03 + // 2013-February-03 switch r { case '-': p.molen = i - p.moi @@ -499,7 +512,9 @@ iterRunes: } default: if unicode.IsLetter(r) { - p.stateDate = dateYearDashAlphaDash + p.stateDate = dateYearDashAlpha + } else if !unicode.IsDigit(r) { + return p, unknownErr(datestr) } } @@ -510,6 +525,7 @@ iterRunes: // 2013-04-01 22:43:22 // dateYearDashDashOffset // 2020-07-20+00:00 + // (these states are also reused after dateYearDashAlpha, like 2020-July-20...) switch r { case '+', '-': p.offseti = i @@ -534,28 +550,58 @@ iterRunes: return p, unknownErr(datestr) } break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateYearDashDashT: // dateYearDashDashT // 2006-01-02T15:04:05Z07:00 // 2020-08-17T17:00:00:000+0100 + // (this state should never be reached, we break out when in this state) + return p, unknownErr(datestr) case dateYearDashDashOffset: // 2020-07-20+00:00 switch r { case ':': p.set(p.offseti, "-07:00") + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } - case dateYearDashAlphaDash: - // 2013-Feb-03 + case dateYearDashAlpha: + // dateYearDashAlpha + // 2013-Feb-03 + // 2013-February-03 switch r { case '-': p.molen = i - p.moi - p.set(p.moi, "Jan") - p.dayi = i + 1 + // Must be a valid short or long month + if p.molen == 3 { + p.set(p.moi, "Jan") + p.dayi = i + 1 + p.stateDate = dateYearDashDash + } else { + possibleFullMonth := strings.ToLower(p.datestr[p.moi:(p.moi + p.molen)]) + if i > 3 && isMonthFull(possibleFullMonth) { + p.fullMonth = possibleFullMonth + p.dayi = i + 1 + p.stateDate = dateYearDashDash + } else { + return p, unknownErr(datestr) + } + } + default: + if !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } } + case dateDigitDash: // 13-Feb-03 // 29-Jun-2016 @@ -578,6 +624,10 @@ iterRunes: p.set(p.moi, "Jan") p.yeari = i + 1 p.stateDate = dateDigitDashAlphaDash + default: + if !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } } case dateDigitDashDigit: @@ -594,6 +644,10 @@ iterRunes: } else { return p, unknownErr(datestr) } + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitDashAlphaDash, dateDigitDashDigitDash: @@ -668,7 +722,7 @@ iterRunes: break iterRunes } default: - if !unicode.IsDigit(r) && !unicode.IsLetter(r) && p.link > 0 { + if !unicode.IsDigit(r) { return p, unknownErr(datestr) } } @@ -697,21 +751,47 @@ iterRunes: } p.dayi = i + 1 } + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitSlashAlpha: // 06/May/2008 + // 06/September/2008 switch r { case '/': // | // 06/May/2008 if p.molen == 0 { - p.set(p.moi, "Jan") - p.yeari = i + 1 + p.molen = i - p.moi + if p.molen == 3 { + p.set(p.moi, "Jan") + p.yeari = i + 1 + p.stateDate = dateDigitSlashAlphaSlash + } else { + possibleFullMonth := strings.ToLower(p.datestr[p.moi:(p.moi + p.molen)]) + if i > 3 && isMonthFull(possibleFullMonth) { + p.fullMonth = possibleFullMonth + p.yeari = i + 1 + p.stateDate = dateDigitSlashAlphaSlash + } else { + return p, unknownErr(datestr) + } + } + } else { + return p, unknownErr(datestr) } - // We aren't breaking because we are going to re-use this case - // to find where the date starts, and possible time begins + default: + if !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } + } + + case dateDigitSlashAlphaSlash: + switch r { case ' ': fallthrough case ':': @@ -723,6 +803,10 @@ iterRunes: } } break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitSlash: @@ -771,6 +855,10 @@ iterRunes: } } break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitColon: @@ -831,6 +919,10 @@ iterRunes: p.yeari = i + 1 } } + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitWs: @@ -876,6 +968,10 @@ iterRunes: p.set(p.moi, "Jan") p.stateDate = dateDigitWsMoYear } + default: + if !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } } case dateDigitWsMoYear: @@ -898,6 +994,10 @@ iterRunes: return p, unknownErr(datestr) } break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateYearWs: @@ -920,6 +1020,8 @@ iterRunes: return p, unknownErr(datestr) } } + } else if !unicode.IsLetter(r) { + return p, unknownErr(datestr) } case dateYearWsMonthWs: // 2013 Jan 06 15:04:05 @@ -936,6 +1038,10 @@ iterRunes: p.setDay() p.stateTime = timeStart break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitChineseYear: @@ -964,13 +1070,18 @@ iterRunes: p.stateDate = dateDigitChineseYearWs p.stateTime = timeStart break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitDot: - // This is the 2nd period // 3.31.2014 // 08.21.71 // 2014.05 // 2018.09.30 + + // This is the 2nd period if r == '.' { if p.moi == 0 { // 3.31.2014 @@ -998,14 +1109,16 @@ iterRunes: } p.stateDate = dateDigitDotDot } + } else if !unicode.IsDigit(r) { + return p, unknownErr(datestr) } case dateDigitDotDot: - // dateYearDashDashT + // dateDigitDotDotT // 2006.01.02T15:04:05Z07:00 - // dateYearDashDashWs + // dateDigitDotDotWs // 2013.04.01 22:43:22 - // dateYearDashDashOffset + // dateDigitDotDotOffset // 2020.07.20+00:00 switch r { case '+', '-': @@ -1031,18 +1144,28 @@ iterRunes: return p, unknownErr(datestr) } break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateDigitDotDotT: - // dateYearDashDashT + // dateDigitDotDotT // 2006-01-02T15:04:05Z07:00 // 2020-08-17T17:00:00:000+0100 + // (should be unreachable, we break in this state) + return p, unknownErr(datestr) case dateDigitDotDotOffset: // 2020-07-20+00:00 switch r { case ':': p.set(p.offseti, "-07:00") + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateAlpha: @@ -1056,10 +1179,9 @@ iterRunes: // dateAlphaWsDigit // May 8, 2009 5:57:51 PM // oct 1, 1970 - // dateAlphaWsMonth - // April 8, 2009 - // dateAlphaWsMore - // dateAlphaWsAtTime + // dateAlphaFullMonthWs + // January 02, 2006 3:04pm + // January 02, 2006 3:04pm MST-07 // January 02, 2006 at 3:04pm MST-07 // // dateAlphaPeriodWsDigit @@ -1085,47 +1207,45 @@ iterRunes: // Mon, 02-Jan-06 15:04:05 MST switch { case r == ' ': + // This could be a weekday or a month, detect and parse both cases. + // skip & return to dateStart + // Tue 05 May 2020, 05:05:05 + // Tuesday 05 May 2020, 05:05:05 + // Mon Jan 2 15:04:05 2006 + // Monday Jan 2 15:04:05 2006 + maybeDayOrMonth := strings.ToLower(p.datestr[0:i]) + if isDay(maybeDayOrMonth) { + // using skip throws off indices used by other code; saner to restart + newDateStr := p.datestr[i+1:] + putBackParser(p) + return parseTime(newDateStr, loc) + } + // X // April 8, 2009 if i > 3 { - // Check to see if the alpha is name of month? or Day? - month := strings.ToLower(p.datestr[0:i]) - if isMonthFull(month) { + // Expecting a full month name at this point + if isMonthFull(maybeDayOrMonth) { p.moi = 0 p.molen = i - p.fullMonth = month - // len(" 31, 2018") = 9 - if len(p.datestr[i:]) < 10 { - // April 8, 2009 - p.stateDate = dateAlphaWsMonth - } else { - p.stateDate = dateAlphaWsMore - } + p.fullMonth = maybeDayOrMonth + p.stateDate = dateAlphaFullMonthWs p.dayi = i + 1 break + } else { + return p, unknownErr(datestr) } - } else { - // This is possibly ambiguous? May will parse as either though. - // So, it could return in-correct format. + } else if i == 3 { // dateAlphaWs // May 05, 2005, 05:05:05 // May 05 2005, 05:05:05 // Jul 05, 2005, 05:05:05 // May 8 17:57:51 2009 // May 8 17:57:51 2009 - // skip & return to dateStart - // Tue 05 May 2020, 05:05:05 - // Mon Jan 2 15:04:05 2006 - - maybeDay := strings.ToLower(p.datestr[0:i]) - if isDay(maybeDay) { - // using skip throws off indices used by other code; saner to restart - newDateStr := p.datestr[i+1:] - putBackParser(p) - return parseTime(newDateStr, loc) - } p.stateDate = dateAlphaWs + } else { + return p, unknownErr(datestr) } case r == ',': @@ -1135,12 +1255,15 @@ iterRunes: p.stateDate = dateWeekdayAbbrevComma p.set(0, "Mon") } else { - p.stateDate = dateWeekdayComma - p.skip = i + 2 - i++ - // TODO: lets just make this "skip" as we don't need - // the mon, monday, they are all superfelous and not needed - // just lay down the skip, no need to fill and then skip + maybeDay := strings.ToLower(p.datestr[0:i]) + if isDay(maybeDay) { + p.stateDate = dateWeekdayComma + // Just skip past the weekday, it contains no valuable info + p.skip = i + 2 + i++ + } else { + return p, unknownErr(datestr) + } } case r == '.': // sept. 28, 2017 @@ -1181,6 +1304,10 @@ iterRunes: return p, unknownErr(datestr) } } + default: + if !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } } case dateAlphaWs: @@ -1208,6 +1335,10 @@ iterRunes: p.set(0, "Jan") p.stateDate = dateAlphaWsDigit p.dayi = i + case r == ' ': + // continue + default: + return p, unknownErr(datestr) } case dateAlphaWsDigit: @@ -1231,13 +1362,15 @@ iterRunes: return p, unknownErr(datestr) } p.yeari = i + 1 - p.stateDate = dateAlphaWsDigitYearmaybe + p.stateDate = dateAlphaWsDigitYearMaybe p.stateTime = timeStart } else if unicode.IsLetter(r) { - p.stateDate = dateAlphaWsMonthSuffix + p.stateDate = dateVariousDaySuffix i-- + } else if !unicode.IsDigit(r) { + return p, unknownErr(datestr) } - case dateAlphaWsDigitYearmaybe: + case dateAlphaWsDigitYearMaybe: // x // May 8 2009 5:57:51 PM // May 8 17:57:51 2009 @@ -1257,6 +1390,8 @@ iterRunes: return p, unknownErr(datestr) } break iterRunes + } else if !unicode.IsDigit(r) { + return p, unknownErr(datestr) } case dateAlphaWsDigitMore: // x @@ -1268,6 +1403,8 @@ iterRunes: if r == ' ' { p.yeari = i + 1 p.stateDate = dateAlphaWsDigitMoreWs + } else { + return p, unknownErr(datestr) } case dateAlphaWsDigitMoreWs: // x @@ -1292,58 +1429,13 @@ iterRunes: } p.stateTime = timeStart break iterRunes - } - - case dateAlphaWsMonth: - // April 8, 2009 - // April 8 2009 - switch r { - case ' ': - fallthrough - case ',': - // x - // June 8, 2009 - // x - // June 8 2009 - if p.daylen == 0 { - p.daylen = i - p.dayi - if !p.setDay() { - return p, unknownErr(datestr) - } - } - case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': - // st, rd, nd, st - i-- - p.stateDate = dateAlphaWsMonthSuffix default: - if p.daylen > 0 && p.yeari == 0 { - p.yeari = i - } - } - case dateAlphaWsMonthMore: - // X - // January 02, 2006, 15:04:05 - // January 02 2006, 15:04:05 - // January 02, 2006 15:04:05 - // January 02 2006 15:04:05 - switch r { - case ',': - p.yearlen = i - p.yeari - if !p.setYear() { + if r != '\'' && !unicode.IsDigit(r) { return p, unknownErr(datestr) } - p.stateTime = timeStart - i++ - break iterRunes - case ' ': - p.yearlen = i - p.yeari - if !p.setYear() { - return p, unknownErr(datestr) - } - p.stateTime = timeStart - break iterRunes } - case dateAlphaWsMonthSuffix: + + case dateVariousDaySuffix: // x // April 8th, 2009 // April 8th 2009 @@ -1356,6 +1448,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } + return p, unknownErr(datestr) case 'n', 'N': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { @@ -1364,6 +1457,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } + return p, unknownErr(datestr) case 's', 'S': if p.nextIs(i, 't') || p.nextIs(i, 'T') { if len(p.datestr) > i+2 { @@ -1372,6 +1466,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } + return p, unknownErr(datestr) case 'r', 'R': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { @@ -1380,8 +1475,12 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } + return p, unknownErr(datestr) + default: + return p, unknownErr(datestr) } - case dateAlphaWsMore: + + case dateAlphaFullMonthWs: // January 02, 2006, 15:04:05 // January 02 2006, 15:04:05 // January 2nd, 2006, 15:04:05 @@ -1397,8 +1496,10 @@ iterRunes: return p, unknownErr(datestr) } p.yeari = i + 2 - p.stateDate = dateAlphaWsMonthMore + p.stateDate = dateAlphaFullMonthWsDayWs i++ + } else { + return p, unknownErr(datestr) } case r == ' ': @@ -1409,7 +1510,7 @@ iterRunes: return p, unknownErr(datestr) } p.yeari = i + 1 - p.stateDate = dateAlphaWsMonthMore + p.stateDate = dateAlphaFullMonthWsDayWs case unicode.IsDigit(r): // XX // January 02, 2006, 15:04:05 @@ -1421,8 +1522,37 @@ iterRunes: if !p.setDay() { return p, unknownErr(datestr) } - p.stateDate = dateAlphaWsMonthSuffix + p.stateDate = dateVariousDaySuffix i-- + default: + return p, unknownErr(datestr) + } + case dateAlphaFullMonthWsDayWs: + // X + // January 02, 2006, 15:04:05 + // January 02 2006, 15:04:05 + // January 02, 2006 15:04:05 + // January 02 2006 15:04:05 + switch r { + case ',': + p.yearlen = i - p.yeari + if !p.setYear() { + return p, unknownErr(datestr) + } + p.stateTime = timeStart + i++ + break iterRunes + case ' ': + p.yearlen = i - p.yeari + if !p.setYear() { + return p, unknownErr(datestr) + } + p.stateTime = timeStart + break iterRunes + default: + if !unicode.IsDigit(r) { + return p, unknownErr(datestr) + } } case dateAlphaPeriodWsDigit: @@ -1504,11 +1634,19 @@ iterRunes: } else if p.yeari == 0 { p.yeari = i + 1 p.molen = i - p.moi - p.set(p.moi, "Jan") + if p.molen == 3 { + p.set(p.moi, "Jan") + } else { + return p, unknownErr(datestr) + } } else { p.stateTime = timeStart break iterRunes } + default: + if !unicode.IsDigit(r) && !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } } case dateWeekdayAbbrevComma: // Mon, 02 Jan 2006 15:04:05 MST @@ -1536,7 +1674,11 @@ iterRunes: p.moi = i + 1 } else if p.yeari == 0 { p.molen = i - p.moi - offset - p.set(p.moi, "Jan") + if p.molen == 3 { + p.set(p.moi, "Jan") + } else { + return p, unknownErr(datestr) + } p.yeari = i + 1 } else { p.yearlen = i - p.yeari - offset @@ -1546,10 +1688,15 @@ iterRunes: p.stateTime = timeStart break iterRunes } + default: + if !unicode.IsDigit(r) && !unicode.IsLetter(r) { + return p, unknownErr(datestr) + } } default: - break iterRunes + // Reaching an unhandled state unexpectedly should always fail parsing + return p, unknownErr(datestr) } } if !p.coalesceDate(i) { @@ -2241,13 +2388,10 @@ iterRunes: } return p, nil - case dateYearDashAlphaDash: + case dateYearDashAlpha: // 2013-Feb-03 // 2013-Feb-3 - p.daylen = i - p.dayi - if !p.setDay() { - return p, unknownErr(datestr) - } + // 2013-February-3 return p, nil case dateYearDashDashWs: @@ -2347,14 +2491,16 @@ iterRunes: // 12 Feb 2006, 19:17 return p, nil - case dateAlphaWsMonth: - p.yearlen = i - p.yeari - if !p.setYear() { - return p, unknownErr(datestr) + case dateAlphaFullMonthWs: + if p.stateTime == timeIgnore && p.yearlen == 0 { + p.yearlen = i - p.yeari + if !p.setYear() { + return p, unknownErr(datestr) + } } return p, nil - case dateAlphaWsMonthMore: + case dateAlphaFullMonthWsDayWs: return p, nil case dateAlphaWsDigitMoreWs: @@ -2376,7 +2522,7 @@ iterRunes: case dateAlphaWsDigit: return p, nil - case dateAlphaWsDigitYearmaybe: + case dateAlphaWsDigitYearMaybe: return p, nil case dateDigitSlash: @@ -2385,7 +2531,7 @@ iterRunes: // 01/02/2006 return p, nil - case dateDigitSlashAlpha: + case dateDigitSlashAlphaSlash: // 03/Jun/2014 return p, nil @@ -2500,11 +2646,11 @@ func putBackParser(p *parser) { } // we'll be reusing the backing memory for the format byte slice, put it back // to maximum capacity - if cap(p.format) == longestPossibleDateStr { - p.format = p.format[:longestPossibleDateStr] + if cap(p.format) == formatBufferCapacity { + p.format = p.format[:formatBufferCapacity] } else { - // the parsing process replaced this, get back a new one with the right cap - p.format = make([]byte, 0, longestPossibleDateStr) + // the parsing improperly process replaced this, get back a new one with the right cap + p.format = make([]byte, 0, formatBufferCapacity) } // clear out pointers so we don't leak memory we don't need any longer p.loc = nil diff --git a/parseany_test.go b/parseany_test.go index e2940e7..e99904e 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -23,36 +23,68 @@ type dateTest struct { var testInputs = []dateTest{ {in: "oct 7, 1970", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "oct 7, 1970 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "oct 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "oct 7, '70 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "Oct 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "Oct 7, '70 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "Oct. 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "Oct. 7, '70 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "oct. 7, '70", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "oct. 7, '70 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "oct. 7, 1970", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "oct. 7, 1970 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "Sept. 7, '70", out: "1970-09-07 00:00:00 +0000 UTC"}, + {in: "Sept. 7, '70 11:15:26pm", out: "1970-09-07 23:15:26 +0000 UTC"}, {in: "sept. 7, 1970", out: "1970-09-07 00:00:00 +0000 UTC"}, + {in: "sept. 7, 1970 11:15:26pm", out: "1970-09-07 23:15:26 +0000 UTC"}, {in: "Feb 8, 2009 5:57:51 AM", out: "2009-02-08 05:57:51 +0000 UTC"}, {in: "May 8, 2009 5:57:51 PM", out: "2009-05-08 17:57:51 +0000 UTC"}, {in: "May 8, 2009 5:57:1 PM", out: "2009-05-08 17:57:01 +0000 UTC"}, {in: "May 8, 2009 5:7:51 PM", out: "2009-05-08 17:07:51 +0000 UTC"}, {in: "May 8, 2009, 5:7:51 PM", out: "2009-05-08 17:07:51 +0000 UTC"}, + {in: "June 8 2009", out: "2009-06-08 00:00:00 +0000 UTC"}, + {in: "June 8, 2009", out: "2009-06-08 00:00:00 +0000 UTC"}, + {in: "February 8th 2009", out: "2009-02-08 00:00:00 +0000 UTC"}, + {in: "February 8th, 2009", out: "2009-02-08 00:00:00 +0000 UTC"}, + {in: "September 3rd 2009", out: "2009-09-03 00:00:00 +0000 UTC"}, + {in: "September 3rd, 2009", out: "2009-09-03 00:00:00 +0000 UTC"}, + {in: "June 8 2009 11:15:26pm", out: "2009-06-08 23:15:26 +0000 UTC"}, + {in: "June 8, 2009 11:15:26pm", out: "2009-06-08 23:15:26 +0000 UTC"}, + {in: "February 8th 2009 11:15:26pm", out: "2009-02-08 23:15:26 +0000 UTC"}, + {in: "February 8th, 2009 11:15:26pm", out: "2009-02-08 23:15:26 +0000 UTC"}, + {in: "September 3rd 2009 11:15:26pm", out: "2009-09-03 23:15:26 +0000 UTC"}, + {in: "September 3rd, 2009 11:15:26pm", out: "2009-09-03 23:15:26 +0000 UTC"}, {in: "7 oct 70", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "7 oct 70 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "7 oct 1970", out: "1970-10-07 00:00:00 +0000 UTC"}, + {in: "7 oct 1970 11:15:26pm", out: "1970-10-07 23:15:26 +0000 UTC"}, {in: "7 May 1970", out: "1970-05-07 00:00:00 +0000 UTC"}, + {in: "7 May 1970 11:15:26pm", out: "1970-05-07 23:15:26 +0000 UTC"}, {in: "7 Sep 1970", out: "1970-09-07 00:00:00 +0000 UTC"}, + {in: "7 Sep 1970 11:15:26pm", out: "1970-09-07 23:15:26 +0000 UTC"}, {in: "7 June 1970", out: "1970-06-07 00:00:00 +0000 UTC"}, + {in: "7 June 1970 11:15:26pm", out: "1970-06-07 23:15:26 +0000 UTC"}, {in: "7 September 1970", out: "1970-09-07 00:00:00 +0000 UTC"}, + {in: "7 September 1970 11:15:26pm", out: "1970-09-07 23:15:26 +0000 UTC"}, // ANSIC = "Mon Jan _2 15:04:05 2006" {in: "Mon Jan 2 15:04:05 2006", out: "2006-01-02 15:04:05 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, + {in: "Monday Jan 2 15:04:05 2006", out: "2006-01-02 15:04:05 +0000 UTC"}, + {in: "Thursday May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, + {in: "Thursday May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, // ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC" {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC", zname: "UTC"}, {in: "Mon 02 Jan 2006 03:04:05 PM CEST", out: "2006-01-02 15:04:05 +0000 UTC", zname: "CEST"}, {in: "Mon 30 Sep 2018 09:09:09 PM UTC", out: "2018-09-30 21:09:09 +0000 UTC", zname: "UTC"}, {in: "Mon 30 Sep 2018 09:09:09 PM CEST", out: "2018-09-30 21:09:09 +0000 UTC", zname: "CEST"}, + {in: "Mon 02 Jan 2006", out: "2006-01-02 00:00:00 +0000 UTC"}, + {in: "Monday 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC", zname: "UTC"}, // RubyDate = "Mon Jan 02 15:04:05 -0700 2006" {in: "Mon Jan 02 15:04:05 -0700 2006", out: "2006-01-02 22:04:05 +0000 UTC"}, {in: "Thu May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"}, + {in: "Thursday May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"}, // UnixDate = "Mon Jan _2 15:04:05 MST 2006" {in: "Mon Jan 2 15:04:05 MST 2006", out: "2006-01-02 15:04:05 +0000 UTC", zname: "MST"}, {in: "Thu May 8 17:57:51 MST 2009", out: "2009-05-08 17:57:51 +0000 UTC", zname: "MST"}, @@ -62,6 +94,7 @@ var testInputs = []dateTest{ {in: "Thu May 08 17:57:51 CEST 2009", out: "2009-05-08 15:57:51 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "Thu May 08 05:05:07 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, {in: "Thu May 08 5:5:7 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, + {in: "Thursday May 08 05:05:07 PST 2009", out: "2009-05-08 05:05:07 +0000 UTC", zname: "PST"}, // Day Month dd time {in: "Mon Aug 10 15:44:11 UTC+0000 2015", out: "2015-08-10 15:44:11 +0000 UTC", zname: "UTC"}, {in: "Mon Aug 10 15:44:11 PST-0700 2015", out: "2015-08-10 22:44:11 +0000 UTC", zname: "PST"}, @@ -75,6 +108,8 @@ var testInputs = []dateTest{ {in: "Fri Jul 3 2015 06:04:07 UTC+0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "PST"}, {in: "Fri Jul 3 2015 06:04:07 CEST-0700 (Central European Summer Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "CEST"}, + {in: "Fri Jul 3 2015", out: "2015-07-03 00:00:00 +0000 UTC"}, + {in: "Fri Jul 3 2015 11:15:26pm", out: "2015-07-03 23:15:26 +0000 UTC"}, // Month dd, yyyy at time {in: "January 17, 2012 at 18:17:16", out: "2012-01-17 18:17:16 +0000 UTC"}, {in: "February 17, 2012 at 18:17:16", out: "2012-02-17 18:17:16 +0000 UTC"}, @@ -88,10 +123,12 @@ var testInputs = []dateTest{ {in: "OCTober 17, 2012 at 18:17:16", out: "2012-10-17 18:17:16 +0000 UTC"}, {in: "noVEMBER 17, 2012 at 18:17:16", out: "2012-11-17 18:17:16 +0000 UTC"}, {in: "December 17, 2012 at 18:17:16", out: "2012-12-17 18:17:16 +0000 UTC"}, + {in: "September 17 2012 at 5:00pm UTC-05", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, // empty zone name, special case of UTC+NNNN {in: "September 17, 2012 at 5:00pm UTC-05", out: "2012-09-17 22:00:00 +0000 UTC", zname: ""}, // empty zone name, special case of UTC+NNNN {in: "September 17, 2012 at 10:09am PST-08", out: "2012-09-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "September 17, 2012 at 10:09am CEST+02", out: "2012-09-17 08:09:00 +0000 UTC", zname: "CEST"}, {in: "September 17, 2012, 10:10:09", out: "2012-09-17 10:10:09 +0000 UTC"}, + {in: "May 17 2012 at 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "May 17, 2012 at 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "May 17, 2012 AT 10:09am PST-08", out: "2012-05-17 18:09:00 +0000 UTC", zname: "PST"}, {in: "May 17, 2012 AT 10:09am CEST+02", out: "2012-05-17 08:09:00 +0000 UTC", zname: "CEST"}, @@ -135,6 +172,21 @@ var testInputs = []dateTest{ {in: "June 2nd 2012", out: "2012-06-02 00:00:00 +0000 UTC"}, {in: "June 22nd, 2012", out: "2012-06-22 00:00:00 +0000 UTC"}, {in: "June 22nd 2012", out: "2012-06-22 00:00:00 +0000 UTC"}, + {in: "September 17th, 2012 11:15:26pm", out: "2012-09-17 23:15:26 +0000 UTC"}, + {in: "September 17th 2012 11:15:26pm", out: "2012-09-17 23:15:26 +0000 UTC"}, + {in: "September 7th, 2012 11:15:26pm", out: "2012-09-07 23:15:26 +0000 UTC"}, + {in: "September 7th 2012 11:15:26pm", out: "2012-09-07 23:15:26 +0000 UTC"}, + {in: "September 7tH 2012 11:15:26pm", out: "2012-09-07 23:15:26 +0000 UTC"}, + {in: "May 1st 2012 11:15:26pm", out: "2012-05-01 23:15:26 +0000 UTC"}, + {in: "May 1st, 2012 11:15:26pm", out: "2012-05-01 23:15:26 +0000 UTC"}, + {in: "May 21st 2012 11:15:26pm", out: "2012-05-21 23:15:26 +0000 UTC"}, + {in: "May 21st, 2012 11:15:26pm", out: "2012-05-21 23:15:26 +0000 UTC"}, + {in: "May 23rd 2012 11:15:26pm", out: "2012-05-23 23:15:26 +0000 UTC"}, + {in: "May 23rd, 2012 11:15:26pm", out: "2012-05-23 23:15:26 +0000 UTC"}, + {in: "June 2nd, 2012 11:15:26pm", out: "2012-06-02 23:15:26 +0000 UTC"}, + {in: "June 2nd 2012 11:15:26pm", out: "2012-06-02 23:15:26 +0000 UTC"}, + {in: "June 22nd, 2012 11:15:26pm", out: "2012-06-22 23:15:26 +0000 UTC"}, + {in: "June 22nd 2012 11:15:26pm", out: "2012-06-22 23:15:26 +0000 UTC"}, // Incorporate PR https://github.com/araddon/dateparse/pull/128 to fix https://github.com/araddon/dateparse/issues/127 // dd[th,nd,st,rd] Month yyyy {in: "1st September 2012", out: "2012-09-01 00:00:00 +0000 UTC"}, @@ -142,7 +194,8 @@ var testInputs = []dateTest{ {in: "3rd September 2012", out: "2012-09-03 00:00:00 +0000 UTC"}, {in: "4th September 2012", out: "2012-09-04 00:00:00 +0000 UTC"}, {in: "2nd January 2018", out: "2018-01-02 00:00:00 +0000 UTC"}, - {in: "3nd Feb 2018 13:58:24", out: "2018-02-03 13:58:24 +0000 UTC"}, + {in: "3rd Feb 2018 13:58:24", out: "2018-02-03 13:58:24 +0000 UTC"}, + {in: "1st February 2018 13:58:24", out: "2018-02-01 13:58:24 +0000 UTC"}, // RFC1123 = "Mon, 02 Jan 2006 15:04:05 MST" {in: "Fri, 03 Jul 2015 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03 Jul 2015 08:08:08 CET", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CET"}, @@ -213,9 +266,14 @@ var testInputs = []dateTest{ {in: "07-Feb-04 09:07:07 +0100", out: "2004-02-07 08:07:07 +0000 UTC"}, // yyyy-mon-dd 2013-Feb-03 {in: "2013-Feb-03", out: "2013-02-03 00:00:00 +0000 UTC"}, + {in: "2013-Feb-03 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, + {in: "2013-February-03", out: "2013-02-03 00:00:00 +0000 UTC"}, + {in: "2013-February-03 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, // 03 February 2013 {in: "03 February 2013", out: "2013-02-03 00:00:00 +0000 UTC"}, + {in: "03 February 2013 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, {in: "3 February 2013", out: "2013-02-03 00:00:00 +0000 UTC"}, + {in: "3 February 2013 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, // Chinese 2014年04月18日 - https://github.com/araddon/dateparse/pull/132 {in: "2014年04月08日", out: "2014-04-08 00:00:00 +0000 UTC"}, {in: "2014年4月8日", out: "2014-04-08 00:00:00 +0000 UTC"}, @@ -316,9 +374,12 @@ var testInputs = []dateTest{ // 112.195.209.90 - - [20/Feb/2018:12:12:14 +0800] "GET / HTTP/1.1" 200 190 "-" "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Mobile Safari/537.36" "-" {in: "06/May/2008:08:11:17 -0700", out: "2008-05-06 15:11:17 +0000 UTC"}, {in: "30/May/2008:08:11:17 -0700", out: "2008-05-30 15:11:17 +0000 UTC"}, + {in: "30/October/2008:08:11:17 -0700", out: "2008-10-30 15:11:17 +0000 UTC"}, // dd/mon/yyyy hh:mm:ss tz - {in: "06/May/2008:08:11:17 -0700", out: "2008-05-06 15:11:17 +0000 UTC"}, - {in: "30/May/2008:08:11:17 -0700", out: "2008-05-30 15:11:17 +0000 UTC"}, + {in: "06/May/2008", out: "2008-05-06 00:00:00 +0000 UTC"}, + {in: "06/May/2008 08:11:17 -0700", out: "2008-05-06 15:11:17 +0000 UTC"}, + {in: "30/May/2008 08:11:17 -0700", out: "2008-05-30 15:11:17 +0000 UTC"}, + {in: "30/September/2008 08:11:17 -0700", out: "2008-09-30 15:11:17 +0000 UTC"}, // mon/dd/yyyy {in: "Oct/ 7/1970", out: "1970-10-07 00:00:00 +0000 UTC"}, {in: "Oct/31/1970", out: "1970-10-31 00:00:00 +0000 UTC"}, @@ -387,6 +448,7 @@ var testInputs = []dateTest{ {in: "2023-01-04 12:01:59 AM", out: "2023-01-04 00:01:59 +0000 UTC"}, {in: "2023-01-04 12:01:59.765 AM", out: "2023-01-04 00:01:59.765 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/157 + {in: "Thu Jan 28 2021", out: "2021-01-28 00:00:00 +0000 UTC"}, {in: "Thu Jan 28 2021 15:28:21 GMT+0000 (Coordinated Universal Time)", out: "2021-01-28 15:28:21 +0000 UTC"}, {in: "Thu Jan 28 2021 15:28:21 GMT+0100 (Coordinated Universal Time)", out: "2021-01-28 14:28:21 +0000 UTC"}, {in: "Thu Jan 28 2021 15:28:21 UTC+0000 (Coordinated Universal Time)", out: "2021-01-28 15:28:21 +0000 UTC"}, @@ -401,6 +463,7 @@ var testInputs = []dateTest{ {in: "Mon Dec 26 16:15:55.103786 2016", out: "2016-12-26 16:15:55.103786 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/109 {in: "Sun, 07 Jun 2020 00:00:00 +0100", out: "2020-06-06 23:00:00 +0000 UTC"}, + {in: "Sun, 07 Jun 2020", out: "2020-06-07 00:00:00 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/100#issuecomment-1118868154 {in: "1 Apr 2022 23:59", out: "2022-04-01 23:59:00 +0000 UTC"}, {in: "1 JANuary 2022 23:59", out: "2022-01-01 23:59:00 +0000 UTC"}, @@ -868,6 +931,7 @@ func TestParseLayout(t *testing.T) { // {in: "06/May/2008 15:04:05 -0700", out: "02/Jan/2006 15:04:05 -0700"}, {in: "06/May/2008:15:04:05 -0700", out: "02/Jan/2006:15:04:05 -0700"}, + {in: "06/June/2008 15:04:05 -0700", out: "02/January/2006 15:04:05 -0700"}, {in: "14 May 2019 19:11:40.164", out: "02 Jan 2006 15:04:05.000"}, {in: "171113 14:14:20", out: "060102 15:04:05"}, From fbf07cc2744279becd699ba2ee5dbd739d97065e Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 23:28:15 -0700 Subject: [PATCH 51/62] Optimize memory for error case New option SimpleErrorMessages that avoids allocation in the error path. It's off by default to preserve backwards compatibility. Added benchmark BenchmarkBigParseAnyErrors that takes the big set of test cases, and injects errors to make them fail at pseudo-random places. This optimization speeds up the error path runtime by 4x and reduces error path allocation bytes by 13x! --- README.md | 17 +- bench_test.go | 39 ++++- parseany.go | 407 +++++++++++++++++++++++++++-------------------- parseany_test.go | 82 +++++----- 4 files changed, 330 insertions(+), 215 deletions(-) diff --git a/README.md b/README.md index 0ca7b6a..8986738 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Go Date Parser --------------------------- -Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. +Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. See the critical note below about timezones. [![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse) @@ -9,7 +9,7 @@ Parse many date strings without knowing format in advance. Uses a scanner to re [![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse) [![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse) -**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. +**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. Some ambiguous formats can fail (e.g., trying to parse 31/03/2023 as the default month-first format `MM/DD/YYYY`), but can be automatically retried with `RetryAmbiguousDateWithSwap`. ```go @@ -21,11 +21,24 @@ t, err := dateparse.ParseStrict("3/1/2014") > returns error // Return a string that represents the layout to parse the given date-time. +// For certain highly complex date formats, ParseFormat may not be accurate, +// even if ParseAny is able to correctly parse it (e.g., anything that starts +// with a weekday). layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM") > "Jan 2, 2006 3:04:05 PM" ``` +Performance Considerations +---------------------------------- + +Internally a memory pool is used to minimize allocation overhead. If you could +be frequently parsing text that does not match any format, consider turning on +the the `SimpleErrorMessages` option. This will make error messages have no +contextual details, but will reduce allocation overhead 13x and will be 4x +faster (most of the time is spent in generating a complex error message if the +option is off (default)). + Timezone Considerations ---------------------------------- diff --git a/bench_test.go b/bench_test.go index a46e5dd..db371c8 100644 --- a/bench_test.go +++ b/bench_test.go @@ -71,9 +71,11 @@ func BenchmarkBigParseIn(b *testing.B) { func BenchmarkBigParseRetryAmbiguous(b *testing.B) { b.ReportAllocs() + opts := []ParserOption{RetryAmbiguousDateWithSwap(true)} + b.ResetTimer() for i := 0; i < b.N; i++ { for _, t := range testInputs { - _, _ = ParseAny(t.in, RetryAmbiguousDateWithSwap(true)) + _, _ = ParseAny(t.in, opts...) } } } @@ -90,17 +92,48 @@ func BenchmarkShotgunParseErrors(b *testing.B) { func BenchmarkParseAnyErrors(b *testing.B) { b.ReportAllocs() + opts := []ParserOption{SimpleErrorMessages(true)} + b.ResetTimer() for i := 0; i < b.N; i++ { for _, t := range testParseErrors { - _, _ = ParseAny(t.in) + _, _ = ParseAny(t.in, opts...) + } + } +} + +func BenchmarkBigParseAnyErrors(b *testing.B) { + b.ReportAllocs() + + opts := []ParserOption{SimpleErrorMessages(true)} + // manufacture a bunch of different tests with random errors put in them + var testBigErrorInputs []string + for index, t := range testInputs { + b := []byte(t.in) + spread := 4 + (index % 4) + startingIndex := spread % len(b) + for i := startingIndex; i < len(b); i += spread { + b[i] = '?' + } + testBigErrorInputs = append(testBigErrorInputs, string(b)) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, in := range testBigErrorInputs { + _, err := ParseAny(in, opts...) + if err == nil { + panic(fmt.Sprintf("expected parsing to fail: %s", in)) + } } } } func BenchmarkParseAmbiguous(b *testing.B) { b.ReportAllocs() + opts := []ParserOption{RetryAmbiguousDateWithSwap(true)} + b.ResetTimer() for i := 0; i < b.N; i++ { - MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true)) + MustParse("13/02/2014 04:08:09 +0000 UTC", opts...) } } diff --git a/parseany.go b/parseany.go index 87333ba..d0c516e 100644 --- a/parseany.go +++ b/parseany.go @@ -137,14 +137,25 @@ var ( ErrAmbiguousMMDD = fmt.Errorf("this date has ambiguous mm/dd vs dd/mm type format") ErrCouldntFindFormat = fmt.Errorf("could not find format for") ErrUnexpectedTail = fmt.Errorf("unexpected content after date/time: ") + ErrUnknownTZOffset = fmt.Errorf("TZ offset not recognized") + ErrUnknownTimeZone = fmt.Errorf("timezone not recognized") + ErrFracSecTooLong = fmt.Errorf("fractional seconds too long") ) -func unknownErr(datestr string) error { - return fmt.Errorf("%w %q", ErrCouldntFindFormat, datestr) +func (p *parser) unknownErr(datestr string) error { + if p == nil || !p.simpleErrorMessages { + return fmt.Errorf("%w %q", ErrCouldntFindFormat, datestr) + } else { + return ErrCouldntFindFormat + } } -func unexpectedTail(tail string) error { - return fmt.Errorf("%w %q", ErrUnexpectedTail, tail) +func (p *parser) unexpectedTail(tailStart int) error { + if p != nil && !p.simpleErrorMessages { + return fmt.Errorf("%w %q", ErrUnexpectedTail, p.datestr[tailStart:]) + } else { + return ErrUnexpectedTail + } } // go 1.20 allows us to convert a byte slice to a string without a memory allocation. @@ -283,12 +294,15 @@ iterRunes: // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, p.datestr) switch p.stateDate { case dateStart: - if unicode.IsDigit(r) { + // NOTE: don't use unicode.IsDigit and unicode.IsLetter here because + // we don't expect non-ANSI chars to start a valid date/time format. + // This will let us quickly reject strings that begin with any non-ANSI char. + if '0' <= r && r <= '9' { p.stateDate = dateDigit - } else if unicode.IsLetter(r) { + } else if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') { p.stateDate = dateAlpha } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateDigit: @@ -317,7 +331,7 @@ iterRunes: p.yearlen = i // since it was start of datestr, i=len p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitYearSlash } else { @@ -332,7 +346,7 @@ iterRunes: p.daylen = 2 p.dayi = 0 if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } continue } @@ -346,21 +360,21 @@ iterRunes: // 03/31/2005 p.molen = i if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { if p.daylen == 0 { p.daylen = i if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -373,7 +387,7 @@ iterRunes: p.yearlen = i p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { p.ambiguousMD = true @@ -382,21 +396,21 @@ iterRunes: if p.molen == 0 { p.molen = i if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { if p.daylen == 0 { p.daylen = i if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -410,7 +424,7 @@ iterRunes: p.yearlen = i p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if i <= 2 { p.ambiguousMD = true @@ -420,21 +434,21 @@ iterRunes: // 03.31.2005 p.molen = i if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { if p.daylen == 0 { p.daylen = i if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -454,7 +468,7 @@ iterRunes: p.yearlen = i p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateYearWs } else if i == 6 { @@ -470,10 +484,10 @@ iterRunes: p.yearlen = i - 2 p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ',': - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': // 1st January 2018 // 2nd Jan 2018 23:59 @@ -482,7 +496,7 @@ iterRunes: i-- default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } continue } @@ -503,18 +517,18 @@ iterRunes: // 2013-Feb-03 // 2013-February-03 switch r { - case '-': + case '-', '\u2212': p.molen = i - p.moi p.dayi = i + 1 p.stateDate = dateYearDashDash if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } default: if unicode.IsLetter(r) { p.stateDate = dateYearDashAlpha } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -532,14 +546,14 @@ iterRunes: p.daylen = i - p.dayi p.stateDate = dateYearDashDashOffset if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ' ': p.daylen = i - p.dayi p.stateDate = dateYearDashDashWs p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes case 'T': @@ -547,12 +561,12 @@ iterRunes: p.stateDate = dateYearDashDashT p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -561,7 +575,7 @@ iterRunes: // 2006-01-02T15:04:05Z07:00 // 2020-08-17T17:00:00:000+0100 // (this state should never be reached, we break out when in this state) - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case dateYearDashDashOffset: // 2020-07-20+00:00 @@ -570,7 +584,7 @@ iterRunes: p.set(p.offseti, "-07:00") default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -579,7 +593,7 @@ iterRunes: // 2013-Feb-03 // 2013-February-03 switch r { - case '-': + case '-', '\u2212': p.molen = i - p.moi // Must be a valid short or long month if p.molen == 3 { @@ -593,12 +607,12 @@ iterRunes: p.dayi = i + 1 p.stateDate = dateYearDashDash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -612,28 +626,28 @@ iterRunes: p.stateDate = dateDigitDashDigit p.moi = i } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateDigitDashAlpha: // 13-Feb-03 // 28-Feb-03 // 29-Jun-2016 switch r { - case '-': + case '-', '\u2212': p.molen = i - p.moi p.set(p.moi, "Jan") p.yeari = i + 1 p.stateDate = dateDigitDashAlphaDash default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case dateDigitDashDigit: // 29-06-2026 switch r { - case '-': + case '-', '\u2212': // X // 29-06-2026 p.molen = i - p.moi @@ -642,11 +656,11 @@ iterRunes: p.yeari = i + 1 p.stateDate = dateDigitDashDigitDash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -673,11 +687,11 @@ iterRunes: } } if !doubleColonTimeConnector { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } else if p.link > 0 { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } if r == ' ' || doubleColonTimeConnector { // we need to find if this was 4 digits, aka year @@ -696,7 +710,7 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if length == 2 { // We have no idea if this is @@ -713,17 +727,17 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -739,7 +753,7 @@ iterRunes: if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes @@ -747,13 +761,13 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -778,15 +792,15 @@ iterRunes: p.yeari = i + 1 p.stateDate = dateDigitSlashAlphaSlash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -799,13 +813,13 @@ iterRunes: if p.yearlen == 0 { p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -827,7 +841,7 @@ iterRunes: if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } @@ -835,7 +849,7 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } @@ -851,13 +865,13 @@ iterRunes: i++ } if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -878,17 +892,17 @@ iterRunes: if p.yearlen == 0 { p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes @@ -898,7 +912,7 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } @@ -906,7 +920,7 @@ iterRunes: if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } @@ -914,14 +928,14 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -941,7 +955,7 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart if i > p.daylen+len(" Sep") { // November etc @@ -956,7 +970,7 @@ iterRunes: p.fullMonth = possibleFullMonth p.stateDate = dateDigitWsMoYear } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { // If len=3, the might be Feb or May? Ie ambigous abbreviated but @@ -970,7 +984,7 @@ iterRunes: } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -984,19 +998,19 @@ iterRunes: case ',': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } i++ break iterRunes case ' ': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1017,11 +1031,11 @@ iterRunes: p.dayi = i + 1 p.stateDate = dateYearWsMonthWs } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } else if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateYearWsMonthWs: // 2013 Jan 06 15:04:05 @@ -1040,7 +1054,7 @@ iterRunes: break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1055,24 +1069,24 @@ iterRunes: p.molen = i - p.moi - 2 p.dayi = i + 1 if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case '日': // day p.daylen = i - p.dayi - 2 if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ' ': if p.daylen <= 0 { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitChineseYearWs p.stateTime = timeStart break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case dateDigitDot: @@ -1088,7 +1102,7 @@ iterRunes: p.daylen = i - p.dayi p.yeari = i + 1 if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitDotDot } else if p.dayi == 0 && p.yearlen == 0 { @@ -1096,7 +1110,7 @@ iterRunes: p.molen = i - p.moi p.yeari = i + 1 if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitDotDot } else { @@ -1105,12 +1119,12 @@ iterRunes: p.molen = i - p.moi p.dayi = i + 1 if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitDotDot } } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateDigitDotDot: @@ -1126,14 +1140,14 @@ iterRunes: p.daylen = i - p.dayi p.stateDate = dateDigitDotDotOffset if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ' ': p.daylen = i - p.dayi p.stateDate = dateDigitDotDotWs p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes case 'T': @@ -1141,12 +1155,12 @@ iterRunes: p.stateDate = dateDigitDotDotT p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1155,7 +1169,7 @@ iterRunes: // 2006-01-02T15:04:05Z07:00 // 2020-08-17T17:00:00:000+0100 // (should be unreachable, we break in this state) - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case dateDigitDotDotOffset: // 2020-07-20+00:00 @@ -1164,7 +1178,7 @@ iterRunes: p.set(p.offseti, "-07:00") default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1233,7 +1247,7 @@ iterRunes: p.dayi = i + 1 break } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if i == 3 { @@ -1245,7 +1259,7 @@ iterRunes: // May 8 17:57:51 2009 p.stateDate = dateAlphaWs } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case r == ',': @@ -1262,7 +1276,7 @@ iterRunes: p.skip = i + 2 i++ } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case r == '.': @@ -1278,7 +1292,7 @@ iterRunes: putBackParser(p) return parseTime(newDateStr, loc, opts...) } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case r == '/': // X @@ -1301,12 +1315,12 @@ iterRunes: p.fullMonth = possibleFullMonth p.stateDate = dateAlphaSlash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1338,7 +1352,7 @@ iterRunes: case r == ' ': // continue default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigit: @@ -1353,13 +1367,13 @@ iterRunes: if r == ',' { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateAlphaWsDigitMore } else if r == ' ' { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 p.stateDate = dateAlphaWsDigitYearMaybe @@ -1368,7 +1382,7 @@ iterRunes: p.stateDate = dateVariousDaySuffix i-- } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigitYearMaybe: // x @@ -1387,11 +1401,11 @@ iterRunes: // must be year format, not 15:04 p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigitMore: // x @@ -1404,7 +1418,7 @@ iterRunes: p.yeari = i + 1 p.stateDate = dateAlphaWsDigitMoreWs } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigitMoreWs: // x @@ -1425,13 +1439,13 @@ iterRunes: p.stateDate = dateAlphaWsDigitMoreWsYear p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes default: if r != '\'' && !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1448,7 +1462,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 'n', 'N': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { @@ -1457,7 +1471,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 's', 'S': if p.nextIs(i, 't') || p.nextIs(i, 'T') { if len(p.datestr) > i+2 { @@ -1466,7 +1480,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 'r', 'R': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { @@ -1475,9 +1489,9 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaFullMonthWs: @@ -1493,13 +1507,13 @@ iterRunes: if p.nextIs(i, ' ') { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 2 p.stateDate = dateAlphaFullMonthWsDayWs i++ } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case r == ' ': @@ -1507,7 +1521,7 @@ iterRunes: // January 02 2006, 15:04:05 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 p.stateDate = dateAlphaFullMonthWsDayWs @@ -1520,12 +1534,12 @@ iterRunes: // January 2nd, 2006, 15:04:05 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateVariousDaySuffix i-- default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaFullMonthWsDayWs: // X @@ -1537,7 +1551,7 @@ iterRunes: case ',': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart i++ @@ -1545,13 +1559,13 @@ iterRunes: case ' ': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1564,7 +1578,7 @@ iterRunes: p.stateDate = dateAlphaWsDigit p.dayi = i default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaSlash: @@ -1577,7 +1591,7 @@ iterRunes: p.stateDate = dateAlphaSlashDigit p.dayi = i default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaSlashDigit: @@ -1593,13 +1607,13 @@ iterRunes: p.yeari = i + 1 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateAlphaSlashDigitSlash case unicode.IsDigit(r): // continue default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaSlashDigitSlash: @@ -1610,7 +1624,7 @@ iterRunes: p.stateTime = timeStart break iterRunes default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateWeekdayComma: @@ -1624,12 +1638,12 @@ iterRunes: switch r { case ' ': fallthrough - case '-': + case '-', '\u2212': if p.moi == 0 { p.moi = i + 1 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if p.yeari == 0 { p.yeari = i + 1 @@ -1637,7 +1651,7 @@ iterRunes: if p.molen == 3 { p.set(p.moi, "Jan") } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { p.stateTime = timeStart @@ -1645,7 +1659,7 @@ iterRunes: } default: if !unicode.IsDigit(r) && !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case dateWeekdayAbbrevComma: @@ -1663,13 +1677,13 @@ iterRunes: offset++ } fallthrough - case '-': + case '-', '\u2212': if p.dayi == 0 { p.dayi = i + 1 } else if p.moi == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else if p.yeari == 0 { @@ -1677,30 +1691,30 @@ iterRunes: if p.molen == 3 { p.set(p.moi, "Jan") } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } else { p.yearlen = i - p.yeari - offset if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes } default: if !unicode.IsDigit(r) && !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } default: // Reaching an unhandled state unexpectedly should always fail parsing - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } if !p.coalesceDate(i) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } if p.stateTime == timeStart { // increment first one, since the i++ occurs at end of loop @@ -1811,7 +1825,7 @@ iterRunes: // skip 'M' i++ default: - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } } case ' ': @@ -1900,14 +1914,14 @@ iterRunes: case ' ': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case '+', '-': p.offseti = i p.stateTime = timeWsYearOffset default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case timeWsAlpha: @@ -1961,7 +1975,7 @@ iterRunes: if i+1 == len(p.datestr) { p.stateTime = timeWsAlphaRParen } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -2002,7 +2016,7 @@ iterRunes: p.yearlen = i - p.yeari + 1 if p.yearlen == 4 { if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -2016,7 +2030,7 @@ iterRunes: isTwoLetterWord := ((i+1) == len(p.datestr) || p.nextIs(i, ' ')) if (r == 'm' || r == 'M') && isTwoLetterWord { if p.parsedAMPM { - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } // This isn't a time zone after all... p.tzi = 0 @@ -2042,7 +2056,7 @@ iterRunes: p.stateTime = timeWs } else { // unexpected garbage after AM/PM indicator, fail - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } case timeWsOffset: @@ -2092,7 +2106,7 @@ iterRunes: p.yearlen = i - p.yeari + 1 if p.yearlen == 4 { if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case unicode.IsLetter(r): @@ -2178,11 +2192,11 @@ iterRunes: i++ p.stateTime = timePeriodAMPM default: - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } default: if !unicode.IsDigit(r) { - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } } case timePeriodAMPM: @@ -2193,11 +2207,11 @@ iterRunes: p.offseti = i p.stateTime = timeOffset default: - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } case timeZ: // nothing expected can come after Z - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } } @@ -2210,12 +2224,20 @@ iterRunes: // may or may not have a space on the end if offsetLen == 7 { if p.datestr[p.offseti+6] != ' ' { - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + } } } p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + } } // process timezone switch len(p.datestr) - p.tzi { @@ -2225,7 +2247,11 @@ iterRunes: case 4: p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) + } } case timeWsAlpha: switch len(p.datestr) - p.tzi { @@ -2235,7 +2261,11 @@ iterRunes: case 4: p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) + } } case timeWsAlphaRParen: @@ -2244,12 +2274,12 @@ iterRunes: case timeWsAlphaWs: p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case timeWsYear: p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case timeWsAlphaZoneOffsetWsExtra: p.trimExtra(false) @@ -2263,13 +2293,21 @@ iterRunes: case 6: p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:i]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:i]) + } } case timePeriod: p.mslen = i - p.msi if p.mslen >= 10 { - return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, p.datestr[p.msi:p.mslen]) + if p.simpleErrorMessages { + return p, ErrFracSecTooLong + } else { + return p, fmt.Errorf("%w in %q near %q", ErrFracSecTooLong, datestr, p.datestr[p.msi:p.mslen]) + } } case timeOffset, timeWsOffset, timeWsYearOffset: switch len(p.datestr) - p.offseti { @@ -2280,7 +2318,11 @@ iterRunes: // 19:55:00+0100 (or 19:55:00 +0100) p.set(p.offseti, "-0700") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:]) + } } case timeWsOffsetWs: @@ -2295,7 +2337,11 @@ iterRunes: // 13:31:51.999 +01:00 CEST p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) + } } } case timeOffsetColon, timeWsOffsetColon: @@ -2305,7 +2351,11 @@ iterRunes: case 6: p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:]) + } } } p.coalesceTime(i) @@ -2352,7 +2402,7 @@ iterRunes: p.setEntireFormat([]byte("2006")) return p, nil } else if len(p.datestr) < 4 { - return p, fmt.Errorf("unrecognized format, too short %v", datestr) + return p, p.unknownErr(datestr) } if !t.IsZero() { if loc == nil { @@ -2418,7 +2468,7 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if length == 2 { // We have no idea if this is @@ -2435,10 +2485,10 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -2452,7 +2502,7 @@ iterRunes: // 2014.05 p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } return p, nil } @@ -2495,7 +2545,7 @@ iterRunes: if p.stateTime == timeIgnore && p.yearlen == 0 { p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } return p, nil @@ -2507,7 +2557,7 @@ iterRunes: // oct 1, 1970 p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } return p, nil @@ -2579,7 +2629,7 @@ iterRunes: } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } type parser struct { @@ -2589,6 +2639,7 @@ type parser struct { ambiguousMD bool ambiguousRetryable bool allowPartialStringMatch bool + simpleErrorMessages bool stateDate dateState stateTime timeState format []byte @@ -2690,11 +2741,22 @@ func AllowPartialStringMatch(allowPartialStringMatch bool) ParserOption { } } +// SimpleErrorMessages is an option that will cause returned error messages to contain less detail, +// but it will avoid allocating any memory for the custom error message. If you expect to attempt +// to parse a lot of text that is not valid, this could help reduce GC pressure. +func SimpleErrorMessages(simpleErrorMessages bool) ParserOption { + return func(p *parser) error { + p.simpleErrorMessages = simpleErrorMessages + return nil + } +} + // Creates a new parser. The caller must call putBackParser on the returned parser when done with it. func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) { dateStrLen := len(dateStr) if dateStrLen > longestPossibleDateStr { - return nil, unknownErr(dateStr) + var nilParser *parser + return nil, nilParser.unknownErr(dateStr) } // Make sure to re-use the format byte slice from the pooled parser struct @@ -2936,7 +2998,8 @@ func (p *parser) trimExtra(onlyTrimFormat bool) { func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) (t time.Time, err error) { if p == nil { - return time.Time{}, unknownErr("") + var nilParser *parser + return time.Time{}, nilParser.unknownErr("") } if p.t != nil { return *p.t, nil @@ -2959,7 +3022,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) p.moi = p.dayi p.dayi = moi if !p.setDay() || !p.setMonth() { - err = unknownErr(p.datestr) + err = p.unknownErr(p.datestr) } else { if p.loc == nil { t, err = time.Parse(bytesToString(p.format), p.datestr) @@ -2993,7 +3056,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) // any numbers or letters in the format string. validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true) if validFormatTo < len(p.format) { - return time.Time{}, unexpectedTail(p.datestr[p.formatSetLen:]) + return time.Time{}, p.unexpectedTail(p.formatSetLen) } } diff --git a/parseany_test.go b/parseany_test.go index e99904e..fa9b8d2 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -756,46 +756,52 @@ func TestParse(t *testing.T) { assert.NotEqual(t, nil, err) }) - for _, th := range testInputs { - t.Run(th.in, func(t *testing.T) { - var ts time.Time - defer func() { - if r := recover(); r != nil { - t.Fatalf("error: %s", r) - } - }() - parserOptions := []ParserOption{PreferMonthFirst(!th.preferDayFirst), RetryAmbiguousDateWithSwap(th.retryAmbiguous)} - if len(th.loc) > 0 { - loc, err := time.LoadLocation(th.loc) - if err != nil { - t.Fatalf("Expected to load location %q but got %v", th.loc, err) - } - ts, err = ParseIn(th.in, loc, parserOptions...) - if err != nil { - t.Fatalf("expected to parse %q but got %v", th.in, err) - } - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - t.Fatalf("whoops, got %s, expected %s", got, th.out) - } - if len(th.zname) > 0 { - gotZone, _ := ts.Zone() - assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + for _, simpleErrorMessage := range []bool{false, true} { + for _, th := range testInputs { + t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, th.in), func(t *testing.T) { + var ts time.Time + defer func() { + if r := recover(); r != nil { + t.Fatalf("error: %s", r) + } + }() + parserOptions := []ParserOption{ + PreferMonthFirst(!th.preferDayFirst), + RetryAmbiguousDateWithSwap(th.retryAmbiguous), + SimpleErrorMessages(simpleErrorMessage), } - } else { - ts = MustParse(th.in, parserOptions...) - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - t.Fatalf("whoops, got %s, expected %s", got, th.out) - } - if len(th.zname) > 0 { - gotZone, _ := ts.Zone() - assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + if len(th.loc) > 0 { + loc, err := time.LoadLocation(th.loc) + if err != nil { + t.Fatalf("Expected to load location %q but got %v", th.loc, err) + } + ts, err = ParseIn(th.in, loc, parserOptions...) + if err != nil { + t.Fatalf("expected to parse %q but got %v", th.in, err) + } + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + } + } else { + ts = MustParse(th.in, parserOptions...) + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + } } - } - }) + }) + } } // some errors From a45d593447506c03418cf82ed39968f6a3b456c9 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 23:40:14 -0700 Subject: [PATCH 52/62] Optimize checks for day of week and full month Reduces CPU usage on large benchmarks by ~2%-3% and prepares for future with international month names in future. --- bench_test.go | 7 +++++ parseany.go | 75 ++++++++++++++++++++++----------------------------- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/bench_test.go b/bench_test.go index db371c8..a07cb43 100644 --- a/bench_test.go +++ b/bench_test.go @@ -137,6 +137,13 @@ func BenchmarkParseAmbiguous(b *testing.B) { } } +func BenchmarkParseWeekdayAndFullMonth(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + MustParse("Monday 02 December 2006 03:04:05 PM UTC") + } +} + /* func BenchmarkParseDateString(b *testing.B) { b.ReportAllocs() diff --git a/parseany.go b/parseany.go index d0c516e..ae0535c 100644 --- a/parseany.go +++ b/parseany.go @@ -19,36 +19,36 @@ import ( // gou.SetColorOutput() // } -var days = []string{ - "mon", - "tue", - "wed", - "thu", - "fri", - "sat", - "sun", - "monday", - "tuesday", - "wednesday", - "thursday", - "friday", - "saturday", - "sunday", +var knownDays = map[string]struct{}{ + "mon": {}, + "tue": {}, + "wed": {}, + "thu": {}, + "fri": {}, + "sat": {}, + "sun": {}, + "monday": {}, + "tuesday": {}, + "wednesday": {}, + "thursday": {}, + "friday": {}, + "saturday": {}, + "sunday": {}, } -var months = []string{ - "january", - "february", - "march", - "april", - "may", - "june", - "july", - "august", - "september", - "october", - "november", - "december", +var knownMonths = map[string]struct{}{ + "january": {}, + "february": {}, + "march": {}, + "april": {}, + "may": {}, + "june": {}, + "july": {}, + "august": {}, + "september": {}, + "october": {}, + "november": {}, + "december": {}, } type dateState uint8 @@ -3080,21 +3080,10 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) } } func isDay(alpha string) bool { - for _, day := range days { - if alpha == day { - return true - } - } - return false + _, ok := knownDays[alpha] + return ok } func isMonthFull(alpha string) bool { - if len(alpha) > len("september") { - return false - } - for _, month := range months { - if alpha == month { - return true - } - } - return false + _, ok := knownMonths[alpha] + return ok } From 89df0f8c4994fada621ad774e08858b3c988d646 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 18 Dec 2023 20:52:16 -0700 Subject: [PATCH 53/62] Comprehensive time validation --- parseany.go | 104 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 19 deletions(-) diff --git a/parseany.go b/parseany.go index ae0535c..8e6b5b7 100644 --- a/parseany.go +++ b/parseany.go @@ -1769,13 +1769,12 @@ iterRunes: if p.seci == 0 { // 22:18+0530 p.minlen = i - p.mini + } else if p.seclen == 0 { + p.seclen = i - p.seci + } else if p.msi > 0 && p.mslen == 0 { + p.mslen = i - p.msi } else { - if p.seclen == 0 { - p.seclen = i - p.seci - } - if p.msi > 0 && p.mslen == 0 { - p.mslen = i - p.msi - } + return p, p.unknownErr(datestr) } p.offseti = i case '.', ',': @@ -1806,6 +1805,8 @@ iterRunes: // September 17, 2012 at 5:00pm UTC-05 i++ // skip ' ' p.houri = 0 // reset hour + } else { + return p, p.unknownErr(datestr) } } else { // Could be AM/PM @@ -1813,13 +1814,17 @@ iterRunes: isTwoLetterWord := ((i+2) == len(p.datestr) || p.nextIs(i+1, ' ')) switch { case isLower && p.nextIs(i, 'm') && isTwoLetterWord && !p.parsedAMPM: - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } p.set(i, "pm") p.parsedAMPM = true // skip 'm' i++ case !isLower && p.nextIs(i, 'M') && isTwoLetterWord && !p.parsedAMPM: - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } p.set(i, "PM") p.parsedAMPM = true // skip 'M' @@ -1829,7 +1834,9 @@ iterRunes: } } case ' ': - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } p.stateTime = timeWs case ':': if p.mini == 0 { @@ -1841,7 +1848,11 @@ iterRunes: } else if p.seci > 0 { // 18:31:59:257 ms uses colon, wtf p.seclen = i - p.seci - p.set(p.seci, "05") + if p.seclen == 2 { + p.set(p.seci, "05") + } else { + return p, p.unknownErr(datestr) + } p.msi = i + 1 // gross, gross, gross. manipulating the datestr is horrible. @@ -1861,6 +1872,8 @@ iterRunes: // 15:04:05-07:00 if r == ':' { p.stateTime = timeOffsetColon + } else if !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) } case timeWs: // timeWsAlpha @@ -1905,6 +1918,10 @@ iterRunes: // 00:12:00 2008 p.stateTime = timeWsYear p.yeari = i + } else if r == '(' { + // (start of time zone description, ignore) + } else { + return p, p.unknownErr(datestr) } } case timeWsYear: @@ -1953,6 +1970,8 @@ iterRunes: p.set(p.tzi, " MST") } else if p.tzlen == 3 { p.set(p.tzi, "MST") + } else if p.tzlen > 0 { + return p, p.unknownErr(datestr) } p.stateTime = timeWsAlphaZoneOffset p.offseti = i @@ -1965,6 +1984,8 @@ iterRunes: p.set(p.tzi, " MST") } else if p.tzlen == 3 { p.set(p.tzi, "MST") + } else if p.tzlen > 0 { + return p, p.unknownErr(datestr) } if r == ' ' { p.stateTime = timeWsAlphaWs @@ -1997,6 +2018,10 @@ iterRunes: p.yeari = i + 1 } p.stateTime = timeWsAlphaZoneOffsetWs + default: + if r != ':' && !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) + } } case timeWsAlphaZoneOffsetWs: // timeWsAlphaZoneOffsetWs @@ -2018,7 +2043,11 @@ iterRunes: if !p.setYear() { return p, p.unknownErr(datestr) } + } else if p.yearlen > 4 { + return p, p.unknownErr(datestr) } + } else { + return p, p.unknownErr(datestr) } case timeWsAMPMMaybe: // timeWsAMPMMaybe @@ -2045,6 +2074,8 @@ iterRunes: p.set(p.houri, "03") } else if p.hourlen == 1 { p.set(p.houri, "3") + } else { + return p, p.unknownErr(datestr) } } else { p.stateTime = timeWsAlpha @@ -2078,6 +2109,10 @@ iterRunes: p.set(p.offseti, "-0700") p.yeari = i + 1 p.stateTime = timeWsOffsetWs + default: + if !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) + } } case timeWsOffsetWs: // 17:57:51 -0700 2009 @@ -2091,6 +2126,8 @@ iterRunes: if p.datestr[i-1] == 'm' { p.extra = i - 2 p.trimExtra(false) + } else { + return p, p.unknownErr(datestr) } case '+', '-', '(': // This really doesn't seem valid, but for some reason when round-tripping a go date @@ -2100,6 +2137,8 @@ iterRunes: p.extra = i - 1 p.stateTime = timeWsOffset p.trimExtra(false) + case ' ': + // continue default: switch { case unicode.IsDigit(r): @@ -2108,12 +2147,16 @@ iterRunes: if !p.setYear() { return p, p.unknownErr(datestr) } + } else if p.yearlen > 4 { + return p, p.unknownErr(datestr) } case unicode.IsLetter(r): // 15:04:05 -0700 MST if p.tzi == 0 { p.tzi = i } + default: + return p, p.unknownErr(datestr) } } @@ -2136,6 +2179,8 @@ iterRunes: } p.tzi = i break iterTimeRunes + } else if r != ' ' && !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) } case timePeriod: // 15:04:05.999999999 @@ -2155,7 +2200,9 @@ iterRunes: switch r { case ' ': p.mslen = i - p.msi - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } p.stateTime = timeWs case '+', '-': p.mslen = i - p.msi @@ -2177,7 +2224,9 @@ iterRunes: switch { case isLower && p.nextIs(i, 'm') && isTwoLetterWord && !p.parsedAMPM: p.mslen = i - p.msi - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } p.set(i, "pm") p.parsedAMPM = true // skip 'm' @@ -2185,7 +2234,9 @@ iterRunes: p.stateTime = timePeriodAMPM case !isLower && p.nextIs(i, 'M') && isTwoLetterWord && !p.parsedAMPM: p.mslen = i - p.msi - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } p.set(i, "PM") p.parsedAMPM = true // skip 'M' @@ -2269,7 +2320,7 @@ iterRunes: } case timeWsAlphaRParen: - // continue + // nothing extra to do case timeWsAlphaWs: p.yearlen = i - p.yeari @@ -2358,7 +2409,9 @@ iterRunes: } } } - p.coalesceTime(i) + if !p.coalesceTime(i) { + return p, p.unknownErr(datestr) + } } switch p.stateDate { @@ -2401,7 +2454,7 @@ iterRunes: } else if len(p.datestr) == len("2014") { p.setEntireFormat([]byte("2006")) return p, nil - } else if len(p.datestr) < 4 { + } else { return p, p.unknownErr(datestr) } if !t.IsZero() { @@ -2412,6 +2465,8 @@ iterRunes: t = t.In(loc) p.t = &t return p, nil + } else { + return p, p.unknownErr(datestr) } case dateDigitSt: // 171113 14:14:20 @@ -2435,6 +2490,8 @@ iterRunes: p.set(p.offseti, "-0700") case 6: p.set(p.offseti, "-07:00") + default: + return p, p.unknownErr(datestr) } return p, nil @@ -2530,6 +2587,8 @@ iterRunes: p.set(p.offseti, "-0700") case 6: p.set(p.offseti, "-07:00") + default: + return p, p.unknownErr(datestr) } return p, nil @@ -2906,7 +2965,7 @@ func (p *parser) ts() string { func (p *parser) ds() string { return fmt.Sprintf("%s d:(%d:%d) m:(%d:%d) y:(%d:%d)", p.datestr, p.dayi, p.daylen, p.moi, p.molen, p.yeari, p.yearlen) } -func (p *parser) coalesceTime(end int) { +func (p *parser) coalesceTime(end int) bool { // 03:04:05 // 15:04:05 // 3:04:05 @@ -2917,6 +2976,8 @@ func (p *parser) coalesceTime(end int) { p.set(p.houri, "15") } else if p.hourlen == 1 { p.set(p.houri, "3") + } else { + return false } } if p.mini > 0 { @@ -2925,8 +2986,10 @@ func (p *parser) coalesceTime(end int) { } if p.minlen == 2 { p.set(p.mini, "04") - } else { + } else if p.minlen == 1 { p.set(p.mini, "4") + } else { + return false } } if p.seci > 0 { @@ -2935,8 +2998,10 @@ func (p *parser) coalesceTime(end int) { } if p.seclen == 2 { p.set(p.seci, "05") - } else { + } else if p.seclen == 1 { p.set(p.seci, "5") + } else { + return false } } @@ -2949,6 +3014,7 @@ func (p *parser) coalesceTime(end int) { p.formatSetLen = endPos } } + return true } func (p *parser) setFullMonth(month string) { oldLen := len(p.format) From 7a3c9238201b1b13b41f0aca76861acc6bb93d82 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 18 Dec 2023 23:14:08 -0700 Subject: [PATCH 54/62] Fix mm.dd.yyyy (time) format --- parseany.go | 18 ++++++++++++++++-- parseany_test.go | 5 +++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/parseany.go b/parseany.go index 8e6b5b7..f1501bd 100644 --- a/parseany.go +++ b/parseany.go @@ -1143,12 +1143,26 @@ iterRunes: return p, p.unknownErr(datestr) } case ' ': + if p.daylen == 0 && p.molen > 0 && p.yearlen > 0 { p.daylen = i - p.dayi - p.stateDate = dateDigitDotDotWs - p.stateTime = timeStart if !p.setDay() { return p, p.unknownErr(datestr) } + } else if p.molen == 0 && p.daylen > 0 && p.yearlen > 0 { + p.molen = i - p.moi + if !p.setMonth() { + return p, p.unknownErr(datestr) + } + } else if p.yearlen == 0 && p.daylen > 0 && p.molen > 0 { + p.yearlen = i - p.yeari + if !p.setYear() { + return p, p.unknownErr(datestr) + } + } else { + return p, p.unknownErr(datestr) + } + p.stateDate = dateDigitDotDotWs + p.stateTime = timeStart break iterRunes case 'T': p.daylen = i - p.dayi diff --git a/parseany_test.go b/parseany_test.go index fa9b8d2..6284607 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -676,6 +676,7 @@ var testInputs = []dateTest{ {in: "3.31.2014", out: "2014-03-31 00:00:00 +0000 UTC"}, {in: "3.3.2014", out: "2014-03-03 00:00:00 +0000 UTC"}, {in: "03.31.2014", out: "2014-03-31 00:00:00 +0000 UTC"}, + {in: "03.31.2014 10:11:59 MST", out: "2014-03-31 10:11:59 +0000 UTC", zname: "MST"}, // mm.dd.yy {in: "08.21.71", out: "1971-08-21 00:00:00 +0000 UTC"}, // dd.mm.yyyy (see https://github.com/araddon/dateparse/issues/129 and https://github.com/araddon/dateparse/issues/28 and https://github.com/araddon/dateparse/pull/133) @@ -1198,6 +1199,6 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - ts := MustParse("03:08:2012 18:31:59+00:00", PreferMonthFirst(false)) - assert.Equal(t, "2012-08-03 18:31:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts := MustParse("03.31.2014 10:11:59 MST-0700", PreferMonthFirst(true)) + assert.Equal(t, "2014-03-31 17:11:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } From 65e6e8d1a93f8b60810d659969afe32c4fe71efe Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 18 Dec 2023 23:14:58 -0700 Subject: [PATCH 55/62] Add support for dd-month-year format --- parseany.go | 13 +++++++++++++ parseany_test.go | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index f1501bd..586efec 100644 --- a/parseany.go +++ b/parseany.go @@ -635,9 +635,22 @@ iterRunes: switch r { case '-', '\u2212': p.molen = i - p.moi + + // Must be a valid short or long month + if p.molen == 3 { p.set(p.moi, "Jan") p.yeari = i + 1 p.stateDate = dateDigitDashAlphaDash + } else { + possibleFullMonth := strings.ToLower(p.datestr[p.moi:(p.moi + p.molen)]) + if i > 3 && isMonthFull(possibleFullMonth) { + p.fullMonth = possibleFullMonth + p.yeari = i + 1 + p.stateDate = dateDigitDashAlphaDash + } else { + return p, p.unknownErr(datestr) + } + } default: if !unicode.IsLetter(r) { return p, p.unknownErr(datestr) diff --git a/parseany_test.go b/parseany_test.go index 6284607..5ffe5fd 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -403,7 +403,7 @@ var testInputs = []dateTest{ {in: "15-Jan-2017", out: "2017-01-15 00:00:00 +0000 UTC"}, {in: "28-Feb-02 15:16:17", out: "2002-02-28 15:16:17 +0000 UTC"}, {in: "15-Jan-18 15:16:17", out: "2018-01-15 15:16:17 +0000 UTC"}, - {in: "15-Jan-2017 15:16:17", out: "2017-01-15 15:16:17 +0000 UTC"}, + {in: "15-September-2017 15:16:17", out: "2017-09-15 15:16:17 +0000 UTC"}, // dd-mm-yy (digit month - potentially ambiguous) - https://github.com/araddon/dateparse/issues/139 {in: "28-02-02", out: "2002-02-28 00:00:00 +0000 UTC"}, {in: "15-01-18", out: "2018-01-15 00:00:00 +0000 UTC"}, From 4f7e8545ec19c1ad59d6aaccb259864704888312 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 18 Dec 2023 23:19:16 -0700 Subject: [PATCH 56/62] Update example and README.md with new formats Audited all test cases to make sure an example was listed for all known formats. --- README.md | 451 ++++++++++++++++++++++++++++++----------------- example/main.go | 447 ++++++++++++++++++++++++++++++---------------- parseany.go | 14 +- parseany_test.go | 19 +- 4 files changed, 601 insertions(+), 330 deletions(-) diff --git a/README.md b/README.md index 8986738..8ef2e23 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Go Date Parser --------------------------- -Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. See the critical note below about timezones. +Parse many date strings without knowing format in advance. Validates comprehensively to avoid false positives. Uses a scanner to read bytes with a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. See the critical note below about timezones. [![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse) @@ -72,51 +72,81 @@ import ( "fmt" "time" - "github.com/scylladb/termtables" "github.com/araddon/dateparse" + "github.com/scylladb/termtables" ) var examples = []string{ + // mon day year (time) "May 8, 2009 5:57:51 PM", "oct 7, 1970", "oct 7, '70", "oct. 7, 1970", "oct. 7, 70", - "Mon Jan 2 15:04:05 2006", - "Mon Jan 2 15:04:05 MST 2006", - "Mon Jan 02 15:04:05 -0700 2006", - "Monday, 02-Jan-06 15:04:05 MST", - "Mon, 02 Jan 2006 15:04:05 MST", - "Tue, 11 Jul 2017 16:28:13 +0200 (CEST)", - "Mon, 02 Jan 2006 15:04:05 -0700", - "Mon 30 Sep 2018 09:09:09 PM UTC", - "Mon Aug 10 15:44:11 UTC+0100 2015", - "Thu, 4 Jan 2018 17:53:36 +0000", - "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", - "Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)", - "September 17, 2012 10:09am", - "September 17, 2012 at 10:09am PST-08", - "September 17, 2012, 10:10:09", "October 7, 1970", "October 7th, 1970", + "Sept. 7, 1970 11:15:26pm", + "Sep 7 2009 11:15:26.123 PM PST", + "September 3rd, 2009 11:15:26.123456789pm", + "September 17 2012 10:09am", + "September 17, 2012, 10:10:09", + "Sep 17, 2012 at 10:02am (EST)", + // (PST-08 will have an offset of -0800, and a zone name of "PST") + "September 17, 2012 at 10:09am PST-08", + // (UTC-0700 has the same offset as -0700, and the returned zone name will be empty) + "September 17 2012 5:00pm UTC-0700", + "September 17 2012 5:00pm GMT-0700", + // (weekday) day mon year (time) + "7 oct 70", + "7 Oct 1970", + "7 September 1970 23:15", + "7 September 1970 11:15:26pm", + "03 February 2013", "12 Feb 2006, 19:17", "12 Feb 2006 19:17", "14 May 2019 19:11:40.164", - "7 oct 70", - "7 oct 1970", - "03 February 2013", - "1 July 2013", - "2013-Feb-03", - // dd/Mon/yyy alpha Months - "06/Jan/2008:15:04:05 -0700", + "4th Sep 2012", + "1st February 2018 13:58:24", + "Mon, 02 Jan 2006 15:04:05 MST", // RFC1123 + "Mon, 02 Jan 2006 15:04:05 -0700", + "Tue, 11 Jul 2017 16:28:13 +0200 (CEST)", + "Mon 30 Sep 2018 09:09:09 PM UTC", + "Sun, 07 Jun 2020 00:00:00 +0100", + "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", + // ANSIC and UnixDate - weekday month day time year + "Mon Jan 2 15:04:05 2006", + "Mon Jan 2 15:04:05 MST 2006", + "Monday Jan 02 15:04:05 -0700 2006", + "Mon Jan 2 15:04:05.103786 2006", + // RubyDate - weekday month day time offset year + "Mon Jan 02 15:04:05 -0700 2006", + // ANSIC_GLIBC - weekday day month year time + "Mon 02 Jan 2006 03:04:05 PM UTC", + "Monday 02 Jan 2006 03:04:05 PM MST", + // weekday month day time timezone-offset year + "Mon Aug 10 15:44:11 UTC+0000 2015", + // git log default date format + "Thu Apr 7 15:13:13 2005 -0700", + // Variants of the above with a (full time zone description) + "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", + "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", + "Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)", + // year month day + "2013 May 2", + "2013 May 02 11:37:55", + // dd/Mon/year alpha Months "06/Jan/2008 15:04:05 -0700", - // mm/dd/yy + "06/January/2008 15:04:05 -0700", + "06/Jan/2008:15:04:05 -0700", // ngnix-log + "06/January/2008:08:11:17 -0700", + // mm/dd/year (see also PreferMonthFirst and RetryAmbiguousDateWithSwap options) "3/31/2014", "03/31/2014", "08/21/71", "8/1/71", "4/8/2014 22:05", "04/08/2014 22:05", + "04/08/2014, 22:05", "4/8/14 22:05", "04/2/2014 03:00:51", "8/8/1965 12:00:00 AM", @@ -127,6 +157,10 @@ var examples = []string{ "4/02/2014 03:00:51", "03/19/2012 10:11:59", "03/19/2012 10:11:59.3186369", + // mon/dd/year + "Oct/ 7/1970", + "Oct/03/1970 22:33:44", + "February/03/1970 11:33:44.555 PM PST", // yyyy/mm/dd "2014/3/31", "2014/03/31", @@ -136,31 +170,29 @@ var examples = []string{ "2014/4/02 03:00:51", "2012/03/19 10:11:59", "2012/03/19 10:11:59.3186369", - // yyyy:mm:dd - "2014:3:31", - "2014:03:31", - "2014:4:8 22:05", - "2014:04:08 22:05", - "2014:04:2 03:00:51", - "2014:4:02 03:00:51", - "2012:03:19 10:11:59", - "2012:03:19 10:11:59.3186369", - // Chinese - "2014年04月08日", - // yyyy-mm-ddThh + // weekday, day-mon-yy time + "Fri, 03-Jul-15 08:08:08 CEST", + "Monday, 02-Jan-06 15:04:05 MST", // RFC850 + "Monday, 02 Jan 2006 15:04:05 -0600", + "02-Jan-06 15:04:05 MST", + // RFC3339 - yyyy-mm-ddThh "2006-01-02T15:04:05+0000", "2009-08-12T22:15:09-07:00", "2009-08-12T22:15:09", "2009-08-12T22:15:09.988", "2009-08-12T22:15:09Z", + "2009-08-12T22:15:09.52Z", "2017-07-19T03:21:51:897+0100", "2019-05-29T08:41-04", // no seconds, 2 digit TZ offset - // yyyy-mm-dd hh:mm:ss + // yyyy-mm-dd hh:mm:ss "2014-04-26 17:24:37.3186369", "2012-08-03 18:31:59.257000000", "2014-04-26 17:24:37.123", - "2013-04-01 22:43", - "2013-04-01 22:43:22", + "2014-04-01 12:01am", + "2014-04-01 12:01:59.765 AM", + "2014-04-01 12:01:59,765", + "2014-04-01 22:43", + "2014-04-01 22:43:22", "2014-12-16 06:20:00 UTC", "2014-12-16 06:20:00 GMT", "2014-04-26 05:24:37 PM", @@ -169,29 +201,74 @@ var examples = []string{ "2014-04-26 13:13:44 +09:00", "2012-08-03 18:31:59.257000000 +0000 UTC", "2015-09-30 18:48:56.35272715 +0000 UTC", - "2015-02-18 00:12:00 +0000 GMT", + "2015-02-18 00:12:00 +0000 GMT", // golang native format "2015-02-18 00:12:00 +0000 UTC", "2015-02-08 03:02:00 +0300 MSK m=+0.000000001", "2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001", "2017-07-19 03:21:51+00:00", + "2017-04-03 22:32:14.322 CET", + "2017-04-03 22:32:14,322 CET", + "2017-04-03 22:32:14:322 CET", + "2018-09-30 08:09:13.123PM PMDT", // PMDT time zone + "2018-09-30 08:09:13.123 am AMT", // AMT time zone "2014-04-26", "2014-04", "2014", - "2014-05-11 08:20:13,787", - // yyyy-mm-dd-07:00 + // yyyy-mm-dd(offset) "2020-07-20+08:00", - // mm.dd.yy + "2020-07-20+0800", + // year-mon-dd + "2013-Feb-03", + "2013-February-03 09:07:08.123", + // dd-mon-year + "03-Feb-13", + "03-Feb-2013", + "07-Feb-2004 09:07:07 +0200", + "07-February-2004 09:07:07 +0200", + // dd-mm-year (this format (common in Europe) always puts the day first, regardless of PreferMonthFirst) + "28-02-02", + "28-02-02 15:16:17", + "28-02-2002", + "28-02-2002 15:16:17", + // mm.dd.yy (see also PreferMonthFirst and RetryAmbiguousDateWithSwap options) "3.31.2014", + "03.31.14", "03.31.2014", - "08.21.71", + "03.31.2014 10:11:59 MST", + "03.31.2014 10:11:59.3186369Z", + // year.mm.dd "2014.03", "2014.03.30", - // yyyymmdd and similar + "2014.03.30 08:33pm", + "2014.03.30T08:33:44.555 PM -0700 MST", + "2014.03.30-0600", + // yyyy:mm:dd + "2014:3:31", + "2014:03:31", + "2014:4:8 22:05", + "2014:04:08 22:05", + "2014:04:2 03:00:51", + "2014:4:02 03:00:51", + "2012:03:19 10:11:59", + "2012:03:19 10:11:59.3186369", + // mm:dd:yyyy (see also PreferMonthFirst and RetryAmbiguousDateWithSwap options) + "08:03:2012", + "08:04:2012 18:31:59+00:00", + // yyyymmdd and similar "20140601", "20140722105203", - // yymmdd hh:mm:yy mysql log + "20140722105203.364", + // Chinese + "2014年4月25日", + "2014年04月08日", + "2014年04月08日 19:17:22 -0700", + // RabbitMQ log format + "8-Mar-2018::14:09:27", + "08-03-2018::02:09:29 PM", + // yymmdd hh:mm:yy mysql log // 080313 05:21:55 mysqld started "171113 14:14:20", + "190910 11:51:49", // unix seconds, ms, micro, nano "1332151919", "1384216367189", @@ -231,117 +308,175 @@ func main() { } /* -+-------------------------------------------------------+-----------------------------------------+ -| Input | Parsed, and Output as %v | -+-------------------------------------------------------+-----------------------------------------+ -| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | -| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | -| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | -| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | -| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | -| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | -| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | -| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | -| Mon Aug 10 15:44:11 UTC+0100 2015 | 2015-08-10 15:44:11 +0000 UTC | -| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC | -| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 GMT | -| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | -| September 17, 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | -| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | -| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | -| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | -| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | -| 7 oct 1970 | 1970-10-07 00:00:00 +0000 UTC | -| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | -| 1 July 2013 | 2013-07-01 00:00:00 +0000 UTC | -| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | -| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | -| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | -| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | -| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | -| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | -| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | -| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | -| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | -| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | -| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | -| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | -| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | -| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | -| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | -| 2013-04-01 22:43 | 2013-04-01 22:43:00 +0000 UTC | -| 2013-04-01 22:43:22 | 2013-04-01 22:43:22 +0000 UTC | -| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | -| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 UTC | -| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | -| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | -| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | -| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | -| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 UTC | -| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | -| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 +0300 | -| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 +0300 | -| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | -| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | -| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | -| 2014 | 2014-01-01 00:00:00 +0000 UTC | -| 2014-05-11 08:20:13,787 | 2014-05-11 08:20:13.787 +0000 UTC | -| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | -| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 08.21.71 | 1971-08-21 00:00:00 +0000 UTC | -| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | -| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | -| 20140601 | 2014-06-01 00:00:00 +0000 UTC | -| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | -| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | -| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | -| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | -| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | -| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | -+-------------------------------------------------------+-----------------------------------------+ ++----------------------------------------------------------+-----------------------------------------+ +| Input | Parsed, and Output as %v | ++----------------------------------------------------------+-----------------------------------------+ +| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | +| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | +| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| Sept. 7, 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| Sep 7 2009 11:15:26.123 PM PST | 2009-09-07 23:15:26.123 +0000 PST | +| September 3rd, 2009 11:15:26.123456789pm | 2009-09-03 23:15:26.123456789 +0000 UTC | +| September 17 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | +| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | +| Sep 17, 2012 at 10:02am (EST) | 2012-09-17 10:02:00 +0000 EST | +| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | +| September 17 2012 5:00pm UTC-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| September 17 2012 5:00pm GMT-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | +| 7 Oct 1970 | 1970-10-07 00:00:00 +0000 UTC | +| 7 September 1970 23:15 | 1970-09-07 23:15:00 +0000 UTC | +| 7 September 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | +| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | +| 4th Sep 2012 | 2012-09-04 00:00:00 +0000 UTC | +| 1st February 2018 13:58:24 | 2018-02-01 13:58:24 +0000 UTC | +| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | +| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | +| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | +| Sun, 07 Jun 2020 00:00:00 +0100 | 2020-06-07 00:00:00 +0100 +0100 | +| Wed, 8 Feb 2023 19:00:46 +1100 (AEDT) | 2023-02-08 19:00:46 +1100 +1100 | +| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | +| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | +| Monday Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon Jan 2 15:04:05.103786 2006 | 2006-01-02 15:04:05.103786 +0000 UTC | +| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon 02 Jan 2006 03:04:05 PM UTC | 2006-01-02 15:04:05 +0000 UTC | +| Monday 02 Jan 2006 03:04:05 PM MST | 2006-01-02 15:04:05 +0000 MST | +| Mon Aug 10 15:44:11 UTC+0000 2015 | 2015-08-10 15:44:11 +0000 UTC | +| Thu Apr 7 15:13:13 2005 -0700 | 2005-04-07 15:13:13 -0700 -0700 | +| Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time) | 2015-07-03 06:04:07 -0700 PST | +| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 +0100 | +| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | +| 2013 May 2 | 2013-05-02 00:00:00 +0000 UTC | +| 2013 May 02 11:37:55 | 2013-05-02 11:37:55 +0000 UTC | +| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008:08:11:17 -0700 | 2008-01-06 08:11:17 -0700 -0700 | +| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | +| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | +| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014, 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC | +| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | +| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | +| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Oct/ 7/1970 | 1970-10-07 00:00:00 +0000 UTC | +| Oct/03/1970 22:33:44 | 1970-10-03 22:33:44 +0000 UTC | +| February/03/1970 11:33:44.555 PM PST | 1970-02-03 23:33:44.555 +0000 PST | +| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Fri, 03-Jul-15 08:08:08 CEST | 2015-07-03 08:08:08 +0000 CEST | +| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Monday, 02 Jan 2006 15:04:05 -0600 | 2006-01-02 15:04:05 -0600 -0600 | +| 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | +| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | +| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | +| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.52Z | 2009-08-12 22:15:09.52 +0000 UTC | +| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | +| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | +| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | +| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | +| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | +| 2014-04-01 12:01am | 2014-04-01 00:01:00 +0000 UTC | +| 2014-04-01 12:01:59.765 AM | 2014-04-01 00:01:59.765 +0000 UTC | +| 2014-04-01 12:01:59,765 | 2014-04-01 12:01:59.765 +0000 UTC | +| 2014-04-01 22:43 | 2014-04-01 22:43:00 +0000 UTC | +| 2014-04-01 22:43:22 | 2014-04-01 22:43:22 +0000 UTC | +| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | +| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 GMT | +| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | +| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | +| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | +| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | +| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 GMT | +| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | +| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 MSK | +| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 MSK | +| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | +| 2017-04-03 22:32:14.322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14,322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14:322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2018-09-30 08:09:13.123PM PMDT | 2018-09-30 20:09:13.123 +0000 PMDT | +| 2018-09-30 08:09:13.123 am AMT | 2018-09-30 08:09:13.123 +0000 AMT | +| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | +| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | +| 2014 | 2014-01-01 00:00:00 +0000 UTC | +| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | +| 2020-07-20+0800 | 2020-07-20 00:00:00 +0800 +0800 | +| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | +| 2013-February-03 09:07:08.123 | 2013-02-03 09:07:08.123 +0000 UTC | +| 03-Feb-13 | 2013-02-03 00:00:00 +0000 UTC | +| 03-Feb-2013 | 2013-02-03 00:00:00 +0000 UTC | +| 07-Feb-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 07-February-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 28-02-02 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-02 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 28-02-2002 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-2002 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.14 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 10:11:59 MST | 2014-03-31 10:11:59 +0000 MST | +| 03.31.2014 10:11:59.3186369Z | 2014-03-31 10:11:59.3186369 +0000 UTC | +| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | +| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | +| 2014.03.30 08:33pm | 2014-03-30 20:33:00 +0000 UTC | +| 2014.03.30T08:33:44.555 PM -0700 MST | 2014-03-30 20:33:44.555 -0700 MST | +| 2014.03.30-0600 | 2014-03-30 00:00:00 -0600 -0600 | +| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| 08:03:2012 | 2012-08-03 00:00:00 +0000 UTC | +| 08:04:2012 18:31:59+00:00 | 2012-08-04 18:31:59 +0000 UTC | +| 20140601 | 2014-06-01 00:00:00 +0000 UTC | +| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | +| 20140722105203.364 | 2014-07-22 10:52:03.364 +0000 UTC | +| 2014年4月25日 | 2014-04-25 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 19:17:22 -0700 | 2014-04-08 19:17:22 -0700 -0700 | +| 8-Mar-2018::14:09:27 | 2018-03-08 14:09:27 +0000 UTC | +| 08-03-2018::02:09:29 PM | 2018-03-08 14:09:29 +0000 UTC | +| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | +| 190910 11:51:49 | 2019-09-10 11:51:49 +0000 UTC | +| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | +| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | +| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | +| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | ++----------------------------------------------------------+-----------------------------------------+ */ ``` diff --git a/example/main.go b/example/main.go index 9896f04..d670f14 100644 --- a/example/main.go +++ b/example/main.go @@ -10,46 +10,76 @@ import ( ) var examples = []string{ + // mon day year (time) "May 8, 2009 5:57:51 PM", "oct 7, 1970", "oct 7, '70", "oct. 7, 1970", "oct. 7, 70", - "Mon Jan 2 15:04:05 2006", - "Mon Jan 2 15:04:05 MST 2006", - "Mon Jan 02 15:04:05 -0700 2006", - "Monday, 02-Jan-06 15:04:05 MST", - "Mon, 02 Jan 2006 15:04:05 MST", - "Tue, 11 Jul 2017 16:28:13 +0200 (CEST)", - "Mon, 02 Jan 2006 15:04:05 -0700", - "Mon 30 Sep 2018 09:09:09 PM UTC", - "Mon Aug 10 15:44:11 UTC+0100 2015", - "Thu, 4 Jan 2018 17:53:36 +0000", - "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", - "Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)", - "September 17, 2012 10:09am", - "September 17, 2012 at 10:09am PST-08", - "September 17, 2012, 10:10:09", "October 7, 1970", "October 7th, 1970", + "Sept. 7, 1970 11:15:26pm", + "Sep 7 2009 11:15:26.123 PM PST", + "September 3rd, 2009 11:15:26.123456789pm", + "September 17 2012 10:09am", + "September 17, 2012, 10:10:09", + "Sep 17, 2012 at 10:02am (EST)", + // (PST-08 will have an offset of -0800, and a zone name of "PST") + "September 17, 2012 at 10:09am PST-08", + // (UTC-0700 has the same offset as -0700, and the returned zone name will be empty) + "September 17 2012 5:00pm UTC-0700", + "September 17 2012 5:00pm GMT-0700", + // (weekday) day mon year (time) + "7 oct 70", + "7 Oct 1970", + "7 September 1970 23:15", + "7 September 1970 11:15:26pm", + "03 February 2013", "12 Feb 2006, 19:17", "12 Feb 2006 19:17", "14 May 2019 19:11:40.164", - "7 oct 70", - "7 oct 1970", - "03 February 2013", - "1 July 2013", - "2013-Feb-03", - // dd/Mon/yyy alpha Months - "06/Jan/2008:15:04:05 -0700", + "4th Sep 2012", + "1st February 2018 13:58:24", + "Mon, 02 Jan 2006 15:04:05 MST", // RFC1123 + "Mon, 02 Jan 2006 15:04:05 -0700", + "Tue, 11 Jul 2017 16:28:13 +0200 (CEST)", + "Mon 30 Sep 2018 09:09:09 PM UTC", + "Sun, 07 Jun 2020 00:00:00 +0100", + "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", + // ANSIC and UnixDate - weekday month day time year + "Mon Jan 2 15:04:05 2006", + "Mon Jan 2 15:04:05 MST 2006", + "Monday Jan 02 15:04:05 -0700 2006", + "Mon Jan 2 15:04:05.103786 2006", + // RubyDate - weekday month day time offset year + "Mon Jan 02 15:04:05 -0700 2006", + // ANSIC_GLIBC - weekday day month year time + "Mon 02 Jan 2006 03:04:05 PM UTC", + "Monday 02 Jan 2006 03:04:05 PM MST", + // weekday month day time timezone-offset year + "Mon Aug 10 15:44:11 UTC+0000 2015", + // git log default date format + "Thu Apr 7 15:13:13 2005 -0700", + // Variants of the above with a (full time zone description) + "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", + "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", + "Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)", + // year month day + "2013 May 2", + "2013 May 02 11:37:55", + // dd/Mon/year alpha Months "06/Jan/2008 15:04:05 -0700", - // mm/dd/yy + "06/January/2008 15:04:05 -0700", + "06/Jan/2008:15:04:05 -0700", // ngnix-log + "06/January/2008:08:11:17 -0700", + // mm/dd/year (see also PreferMonthFirst and RetryAmbiguousDateWithSwap options) "3/31/2014", "03/31/2014", "08/21/71", "8/1/71", "4/8/2014 22:05", "04/08/2014 22:05", + "04/08/2014, 22:05", "4/8/14 22:05", "04/2/2014 03:00:51", "8/8/1965 12:00:00 AM", @@ -60,6 +90,10 @@ var examples = []string{ "4/02/2014 03:00:51", "03/19/2012 10:11:59", "03/19/2012 10:11:59.3186369", + // mon/dd/year + "Oct/ 7/1970", + "Oct/03/1970 22:33:44", + "February/03/1970 11:33:44.555 PM PST", // yyyy/mm/dd "2014/3/31", "2014/03/31", @@ -69,31 +103,29 @@ var examples = []string{ "2014/4/02 03:00:51", "2012/03/19 10:11:59", "2012/03/19 10:11:59.3186369", - // yyyy:mm:dd - "2014:3:31", - "2014:03:31", - "2014:4:8 22:05", - "2014:04:08 22:05", - "2014:04:2 03:00:51", - "2014:4:02 03:00:51", - "2012:03:19 10:11:59", - "2012:03:19 10:11:59.3186369", - // Chinese - "2014年04月08日", - // yyyy-mm-ddThh + // weekday, day-mon-yy time + "Fri, 03-Jul-15 08:08:08 CEST", + "Monday, 02-Jan-06 15:04:05 MST", // RFC850 + "Monday, 02 Jan 2006 15:04:05 -0600", + "02-Jan-06 15:04:05 MST", + // RFC3339 - yyyy-mm-ddThh "2006-01-02T15:04:05+0000", "2009-08-12T22:15:09-07:00", "2009-08-12T22:15:09", "2009-08-12T22:15:09.988", "2009-08-12T22:15:09Z", + "2009-08-12T22:15:09.52Z", "2017-07-19T03:21:51:897+0100", "2019-05-29T08:41-04", // no seconds, 2 digit TZ offset - // yyyy-mm-dd hh:mm:ss + // yyyy-mm-dd hh:mm:ss "2014-04-26 17:24:37.3186369", "2012-08-03 18:31:59.257000000", "2014-04-26 17:24:37.123", - "2013-04-01 22:43", - "2013-04-01 22:43:22", + "2014-04-01 12:01am", + "2014-04-01 12:01:59.765 AM", + "2014-04-01 12:01:59,765", + "2014-04-01 22:43", + "2014-04-01 22:43:22", "2014-12-16 06:20:00 UTC", "2014-12-16 06:20:00 GMT", "2014-04-26 05:24:37 PM", @@ -102,29 +134,74 @@ var examples = []string{ "2014-04-26 13:13:44 +09:00", "2012-08-03 18:31:59.257000000 +0000 UTC", "2015-09-30 18:48:56.35272715 +0000 UTC", - "2015-02-18 00:12:00 +0000 GMT", + "2015-02-18 00:12:00 +0000 GMT", // golang native format "2015-02-18 00:12:00 +0000 UTC", "2015-02-08 03:02:00 +0300 MSK m=+0.000000001", "2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001", "2017-07-19 03:21:51+00:00", + "2017-04-03 22:32:14.322 CET", + "2017-04-03 22:32:14,322 CET", + "2017-04-03 22:32:14:322 CET", + "2018-09-30 08:09:13.123PM PMDT", // PMDT time zone + "2018-09-30 08:09:13.123 am AMT", // AMT time zone "2014-04-26", "2014-04", "2014", - "2014-05-11 08:20:13,787", - // yyyy-mm-dd-07:00 + // yyyy-mm-dd(offset) "2020-07-20+08:00", - // mm.dd.yy + "2020-07-20+0800", + // year-mon-dd + "2013-Feb-03", + "2013-February-03 09:07:08.123", + // dd-mon-year + "03-Feb-13", + "03-Feb-2013", + "07-Feb-2004 09:07:07 +0200", + "07-February-2004 09:07:07 +0200", + // dd-mm-year (this format (common in Europe) always puts the day first, regardless of PreferMonthFirst) + "28-02-02", + "28-02-02 15:16:17", + "28-02-2002", + "28-02-2002 15:16:17", + // mm.dd.yy (see also PreferMonthFirst and RetryAmbiguousDateWithSwap options) "3.31.2014", + "03.31.14", "03.31.2014", - "08.21.71", + "03.31.2014 10:11:59 MST", + "03.31.2014 10:11:59.3186369Z", + // year.mm.dd "2014.03", "2014.03.30", - // yyyymmdd and similar + "2014.03.30 08:33pm", + "2014.03.30T08:33:44.555 PM -0700 MST", + "2014.03.30-0600", + // yyyy:mm:dd + "2014:3:31", + "2014:03:31", + "2014:4:8 22:05", + "2014:04:08 22:05", + "2014:04:2 03:00:51", + "2014:4:02 03:00:51", + "2012:03:19 10:11:59", + "2012:03:19 10:11:59.3186369", + // mm:dd:yyyy (see also PreferMonthFirst and RetryAmbiguousDateWithSwap options) + "08:03:2012", + "08:04:2012 18:31:59+00:00", + // yyyymmdd and similar "20140601", "20140722105203", - // yymmdd hh:mm:yy mysql log + "20140722105203.364", + // Chinese + "2014年4月25日", + "2014年04月08日", + "2014年04月08日 19:17:22 -0700", + // RabbitMQ log format + "8-Mar-2018::14:09:27", + "08-03-2018::02:09:29 PM", + // yymmdd hh:mm:yy mysql log // 080313 05:21:55 mysqld started "171113 14:14:20", + "190910 11:51:49", // unix seconds, ms, micro, nano "1332151919", "1384216367189", @@ -164,115 +241,173 @@ func main() { } /* -+-------------------------------------------------------+-----------------------------------------+ -| Input | Parsed, and Output as %v | -+-------------------------------------------------------+-----------------------------------------+ -| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | -| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | -| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | -| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | -| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | -| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | -| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | -| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | -| Mon Aug 10 15:44:11 UTC+0100 2015 | 2015-08-10 15:44:11 +0000 UTC | -| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC | -| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 GMT | -| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | -| September 17, 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | -| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | -| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | -| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | -| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | -| 7 oct 1970 | 1970-10-07 00:00:00 +0000 UTC | -| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | -| 1 July 2013 | 2013-07-01 00:00:00 +0000 UTC | -| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | -| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | -| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | -| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | -| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | -| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | -| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | -| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | -| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | -| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | -| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | -| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | -| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | -| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | -| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | -| 2013-04-01 22:43 | 2013-04-01 22:43:00 +0000 UTC | -| 2013-04-01 22:43:22 | 2013-04-01 22:43:22 +0000 UTC | -| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | -| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 UTC | -| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | -| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | -| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | -| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | -| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 UTC | -| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | -| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 +0300 | -| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 +0300 | -| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | -| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | -| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | -| 2014 | 2014-01-01 00:00:00 +0000 UTC | -| 2014-05-11 08:20:13,787 | 2014-05-11 08:20:13.787 +0000 UTC | -| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | -| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 08.21.71 | 1971-08-21 00:00:00 +0000 UTC | -| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | -| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | -| 20140601 | 2014-06-01 00:00:00 +0000 UTC | -| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | -| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | -| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | -| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | -| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | -| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | -+-------------------------------------------------------+-----------------------------------------+ ++----------------------------------------------------------+-----------------------------------------+ +| Input | Parsed, and Output as %v | ++----------------------------------------------------------+-----------------------------------------+ +| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | +| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | +| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| Sept. 7, 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| Sep 7 2009 11:15:26.123 PM PST | 2009-09-07 23:15:26.123 +0000 PST | +| September 3rd, 2009 11:15:26.123456789pm | 2009-09-03 23:15:26.123456789 +0000 UTC | +| September 17 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | +| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | +| Sep 17, 2012 at 10:02am (EST) | 2012-09-17 10:02:00 +0000 EST | +| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | +| September 17 2012 5:00pm UTC-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| September 17 2012 5:00pm GMT-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | +| 7 Oct 1970 | 1970-10-07 00:00:00 +0000 UTC | +| 7 September 1970 23:15 | 1970-09-07 23:15:00 +0000 UTC | +| 7 September 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | +| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | +| 4th Sep 2012 | 2012-09-04 00:00:00 +0000 UTC | +| 1st February 2018 13:58:24 | 2018-02-01 13:58:24 +0000 UTC | +| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | +| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | +| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | +| Sun, 07 Jun 2020 00:00:00 +0100 | 2020-06-07 00:00:00 +0100 +0100 | +| Wed, 8 Feb 2023 19:00:46 +1100 (AEDT) | 2023-02-08 19:00:46 +1100 +1100 | +| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | +| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | +| Monday Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon Jan 2 15:04:05.103786 2006 | 2006-01-02 15:04:05.103786 +0000 UTC | +| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon 02 Jan 2006 03:04:05 PM UTC | 2006-01-02 15:04:05 +0000 UTC | +| Monday 02 Jan 2006 03:04:05 PM MST | 2006-01-02 15:04:05 +0000 MST | +| Mon Aug 10 15:44:11 UTC+0000 2015 | 2015-08-10 15:44:11 +0000 UTC | +| Thu Apr 7 15:13:13 2005 -0700 | 2005-04-07 15:13:13 -0700 -0700 | +| Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time) | 2015-07-03 06:04:07 -0700 PST | +| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 +0100 | +| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | +| 2013 May 2 | 2013-05-02 00:00:00 +0000 UTC | +| 2013 May 02 11:37:55 | 2013-05-02 11:37:55 +0000 UTC | +| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008:08:11:17 -0700 | 2008-01-06 08:11:17 -0700 -0700 | +| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | +| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | +| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014, 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC | +| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | +| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | +| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Oct/ 7/1970 | 1970-10-07 00:00:00 +0000 UTC | +| Oct/03/1970 22:33:44 | 1970-10-03 22:33:44 +0000 UTC | +| February/03/1970 11:33:44.555 PM PST | 1970-02-03 23:33:44.555 +0000 PST | +| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Fri, 03-Jul-15 08:08:08 CEST | 2015-07-03 08:08:08 +0000 CEST | +| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Monday, 02 Jan 2006 15:04:05 -0600 | 2006-01-02 15:04:05 -0600 -0600 | +| 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | +| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | +| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | +| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.52Z | 2009-08-12 22:15:09.52 +0000 UTC | +| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | +| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | +| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | +| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | +| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | +| 2014-04-01 12:01am | 2014-04-01 00:01:00 +0000 UTC | +| 2014-04-01 12:01:59.765 AM | 2014-04-01 00:01:59.765 +0000 UTC | +| 2014-04-01 12:01:59,765 | 2014-04-01 12:01:59.765 +0000 UTC | +| 2014-04-01 22:43 | 2014-04-01 22:43:00 +0000 UTC | +| 2014-04-01 22:43:22 | 2014-04-01 22:43:22 +0000 UTC | +| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | +| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 GMT | +| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | +| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | +| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | +| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | +| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 GMT | +| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | +| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 MSK | +| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 MSK | +| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | +| 2017-04-03 22:32:14.322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14,322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14:322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2018-09-30 08:09:13.123PM PMDT | 2018-09-30 20:09:13.123 +0000 PMDT | +| 2018-09-30 08:09:13.123 am AMT | 2018-09-30 08:09:13.123 +0000 AMT | +| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | +| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | +| 2014 | 2014-01-01 00:00:00 +0000 UTC | +| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | +| 2020-07-20+0800 | 2020-07-20 00:00:00 +0800 +0800 | +| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | +| 2013-February-03 09:07:08.123 | 2013-02-03 09:07:08.123 +0000 UTC | +| 03-Feb-13 | 2013-02-03 00:00:00 +0000 UTC | +| 03-Feb-2013 | 2013-02-03 00:00:00 +0000 UTC | +| 07-Feb-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 07-February-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 28-02-02 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-02 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 28-02-2002 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-2002 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.14 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 10:11:59 MST | 2014-03-31 10:11:59 +0000 MST | +| 03.31.2014 10:11:59.3186369Z | 2014-03-31 10:11:59.3186369 +0000 UTC | +| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | +| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | +| 2014.03.30 08:33pm | 2014-03-30 20:33:00 +0000 UTC | +| 2014.03.30T08:33:44.555 PM -0700 MST | 2014-03-30 20:33:44.555 -0700 MST | +| 2014.03.30-0600 | 2014-03-30 00:00:00 -0600 -0600 | +| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| 08:03:2012 | 2012-08-03 00:00:00 +0000 UTC | +| 08:04:2012 18:31:59+00:00 | 2012-08-04 18:31:59 +0000 UTC | +| 20140601 | 2014-06-01 00:00:00 +0000 UTC | +| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | +| 20140722105203.364 | 2014-07-22 10:52:03.364 +0000 UTC | +| 2014年4月25日 | 2014-04-25 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 19:17:22 -0700 | 2014-04-08 19:17:22 -0700 -0700 | +| 8-Mar-2018::14:09:27 | 2018-03-08 14:09:27 +0000 UTC | +| 08-03-2018::02:09:29 PM | 2018-03-08 14:09:29 +0000 UTC | +| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | +| 190910 11:51:49 | 2019-09-10 11:51:49 +0000 UTC | +| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | +| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | +| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | +| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | ++----------------------------------------------------------+-----------------------------------------+ */ diff --git a/parseany.go b/parseany.go index 586efec..46bad4a 100644 --- a/parseany.go +++ b/parseany.go @@ -638,9 +638,9 @@ iterRunes: // Must be a valid short or long month if p.molen == 3 { - p.set(p.moi, "Jan") - p.yeari = i + 1 - p.stateDate = dateDigitDashAlphaDash + p.set(p.moi, "Jan") + p.yeari = i + 1 + p.stateDate = dateDigitDashAlphaDash } else { possibleFullMonth := strings.ToLower(p.datestr[p.moi:(p.moi + p.molen)]) if i > 3 && isMonthFull(possibleFullMonth) { @@ -1157,10 +1157,10 @@ iterRunes: } case ' ': if p.daylen == 0 && p.molen > 0 && p.yearlen > 0 { - p.daylen = i - p.dayi - if !p.setDay() { - return p, p.unknownErr(datestr) - } + p.daylen = i - p.dayi + if !p.setDay() { + return p, p.unknownErr(datestr) + } } else if p.molen == 0 && p.daylen > 0 && p.yearlen > 0 { p.molen = i - p.moi if !p.setMonth() { diff --git a/parseany_test.go b/parseany_test.go index 5ffe5fd..000faf2 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -192,7 +192,7 @@ var testInputs = []dateTest{ {in: "1st September 2012", out: "2012-09-01 00:00:00 +0000 UTC"}, {in: "2nd September 2012", out: "2012-09-02 00:00:00 +0000 UTC"}, {in: "3rd September 2012", out: "2012-09-03 00:00:00 +0000 UTC"}, - {in: "4th September 2012", out: "2012-09-04 00:00:00 +0000 UTC"}, + {in: "4th Sep 2012", out: "2012-09-04 00:00:00 +0000 UTC"}, {in: "2nd January 2018", out: "2018-01-02 00:00:00 +0000 UTC"}, {in: "3rd Feb 2018 13:58:24", out: "2018-02-03 13:58:24 +0000 UTC"}, {in: "1st February 2018 13:58:24", out: "2018-02-01 13:58:24 +0000 UTC"}, @@ -268,8 +268,9 @@ var testInputs = []dateTest{ {in: "2013-Feb-03", out: "2013-02-03 00:00:00 +0000 UTC"}, {in: "2013-Feb-03 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, {in: "2013-February-03", out: "2013-02-03 00:00:00 +0000 UTC"}, - {in: "2013-February-03 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, + {in: "2013-February-03 09:07:08.123", out: "2013-02-03 09:07:08.123 +0000 UTC"}, // 03 February 2013 + {in: "13 Feb 2013", out: "2013-02-13 00:00:00 +0000 UTC"}, {in: "03 February 2013", out: "2013-02-03 00:00:00 +0000 UTC"}, {in: "03 February 2013 09:07:08pm", out: "2013-02-03 21:07:08 +0000 UTC"}, {in: "3 February 2013", out: "2013-02-03 00:00:00 +0000 UTC"}, @@ -279,7 +280,7 @@ var testInputs = []dateTest{ {in: "2014年4月8日", out: "2014-04-08 00:00:00 +0000 UTC"}, {in: "2014年04月08日 19:17:22", out: "2014-04-08 19:17:22 +0000 UTC"}, {in: "2014年04月08日 19:17:22 MDT", out: "2014-04-08 19:17:22 +0000 UTC", zname: "MDT"}, - {in: "2014年04月08日 19:17:22 MDT-0700", out: "2014-04-09 02:17:22 +0000 UTC", zname: "MDT"}, + {in: "2014年04月08日 19:17:22 -0700", out: "2014-04-09 02:17:22 +0000 UTC"}, {in: "2014年4月8日 19:17:22", out: "2014-04-08 19:17:22 +0000 UTC"}, {in: "2014年4月8日 19:17:22 MDT", out: "2014-04-08 19:17:22 +0000 UTC", zname: "MDT"}, {in: "2014年4月8日 19:17:22 MDT-0700", out: "2014-04-09 02:17:22 +0000 UTC", zname: "MDT"}, @@ -492,11 +493,10 @@ var testInputs = []dateTest{ {in: "2018-09-30 08:09:13.123 am AMT", out: "2018-09-30 08:09:13.123 +0000 UTC", zname: "AMT"}, {in: "2018-09-30 08:09:13.123am AMT", out: "2018-09-30 08:09:13.123 +0000 UTC", zname: "AMT"}, /// yyyy mmm dd https://github.com/araddon/dateparse/issues/141 + {in: "2013 May 2", out: "2013-05-02 00:00:00 +0000 UTC"}, {in: "2013 May 02 11:37:55", out: "2013-05-02 11:37:55 +0000 UTC"}, {in: "2013 June 02 11:37:55", out: "2013-06-02 11:37:55 +0000 UTC"}, {in: "2013 December 02 11:37:55", out: "2013-12-02 11:37:55 +0000 UTC"}, - // https://github.com/araddon/dateparse/issues/143 - {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/71 and https://github.com/araddon/dateparse/issues/72 {in: "2017-12-31T16:00:00Z", out: "2017-12-31 16:00:00 +0000 UTC", loc: "America/Denver", zname: "UTC"}, {in: "Jul 9, 2012 at 5:02am (EST)", out: "2012-07-09 05:02:00 +0000 UTC", zname: "EST"}, @@ -515,6 +515,7 @@ var testInputs = []dateTest{ {in: "08-03-2018::02:09:29 PM", out: "2018-03-08 14:09:29 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, + {in: "2014-05-11 08:20:13:787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 {in: "2012-08-03 18:31:59 +0000", out: "2012-08-03 18:31:59 +0000 UTC"}, {in: "2012-08-03 13:31:59 -0600", out: "2012-08-03 19:31:59 +0000 UTC"}, @@ -532,8 +533,6 @@ var testInputs = []dateTest{ {in: "2018-06-29 19:09:57.77297118 +0300 +0300", out: "2018-06-29 16:09:57.77297118 +0000 UTC"}, {in: "2018-06-29 19:09:57 +0300 +03", out: "2018-06-29 16:09:57 +0000 UTC"}, {in: "2018-06-29 19:09:57 +0300 +0300", out: "2018-06-29 16:09:57 +0000 UTC"}, - - // 13:31:51.999 -07:00 MST // yyyy-mm-dd hh:mm:ss +00:00 {in: "2012-08-03 18:31:59 +00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, {in: "2014-05-01 08:02:13 +00:00", out: "2014-05-01 08:02:13 +0000 UTC"}, @@ -669,9 +668,9 @@ var testInputs = []dateTest{ {in: "2009-08-12T22:15:09.99999999Z", out: "2009-08-12 22:15:09.99999999 +0000 UTC"}, {in: "2009-08-12T22:15:9.99999999Z", out: "2009-08-12 22:15:09.99999999 +0000 UTC"}, // yyyy.mm + {in: "2014", out: "2014-01-01 00:00:00 +0000 UTC"}, {in: "2014.05", out: "2014-05-01 00:00:00 +0000 UTC"}, {in: "2018.09.30", out: "2018-09-30 00:00:00 +0000 UTC"}, - // mm.dd.yyyy {in: "3.31.2014", out: "2014-03-31 00:00:00 +0000 UTC"}, {in: "3.3.2014", out: "2014-03-03 00:00:00 +0000 UTC"}, @@ -709,6 +708,8 @@ var testInputs = []dateTest{ {in: "2014", out: "2014-01-01 00:00:00 +0000 UTC"}, {in: "20140601", out: "2014-06-01 00:00:00 +0000 UTC"}, {in: "20140722105203", out: "2014-07-22 10:52:03 +0000 UTC"}, + // https://github.com/araddon/dateparse/issues/143 + {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC"}, // yymmdd hh:mm:yy mysql log https://github.com/araddon/dateparse/issues/119 // 080313 05:21:55 mysqld started // 080313 5:21:55 InnoDB: Started; log sequence number 0 43655 @@ -723,12 +724,12 @@ var testInputs = []dateTest{ {in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC"}, {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC"}, + // other {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, {in: "FRI, 16 AUG 2013 9:39:51 +1000", out: "2013-08-15 23:39:51 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/158 {in: "Mon, 1 Dec 2008 14:48:22 GMT-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, {in: "Mon, 1 Dec 2008 14:48:22 UTC-07:00", out: "2008-12-01 21:48:22 +0000 UTC"}, - // Fixes for bugs mentioned in https://github.com/araddon/dateparse/pull/134 {in: "2014.02.13", out: "2014-02-13 00:00:00 +0000 UTC"}, {in: "2014-02-13 00:00:00", out: "2014-02-13 00:00:00 +0000 UTC"}, From 4d76f597be8f9b93aeddc67acb051c3585a701bc Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 18 Dec 2023 23:40:08 -0700 Subject: [PATCH 57/62] Fix ambiguous mm/dd that start with weekday Options were not being properly passed to recursive parseTime call. --- parseany.go | 2 +- parseany_test.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/parseany.go b/parseany.go index 46bad4a..76f14a9 100644 --- a/parseany.go +++ b/parseany.go @@ -1259,7 +1259,7 @@ iterRunes: // using skip throws off indices used by other code; saner to restart newDateStr := p.datestr[i+1:] putBackParser(p) - return parseTime(newDateStr, loc) + return parseTime(newDateStr, loc, opts...) } // X diff --git a/parseany_test.go b/parseany_test.go index 000faf2..6eff9cf 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -701,6 +701,9 @@ var testInputs = []dateTest{ {in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", retryAmbiguous: true}, {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", preferDayFirst: true}, + // For certain parse modes that restart parsing, make sure that parsing options are passed along! + {in: "Monday 19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", retryAmbiguous: true}, + {in: "Monday 19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, // https://github.com/araddon/dateparse/issues/105 {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", retryAmbiguous: true}, {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", preferDayFirst: true}, From 5cb27939bdcf933d569bd015904559884d91adcf Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Mon, 18 Dec 2023 23:52:17 -0700 Subject: [PATCH 58/62] Update benchmark results --- bench_test.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/bench_test.go b/bench_test.go index a07cb43..b5b1f7a 100644 --- a/bench_test.go +++ b/bench_test.go @@ -9,9 +9,6 @@ import ( /* go test -bench Parse -BenchmarkShotgunParse 50000 37588 ns/op 13258 B/op 167 allocs/op -BenchmarkDateparseParseAny 500000 5752 ns/op 0 B/op 0 allocs/op - // Aarons Laptop Lenovo 900 Feb 2018 BenchmarkShotgunParse-4 50000 30045 ns/op 13136 B/op 169 allocs/op BenchmarkParseAny-4 200000 8627 ns/op 144 B/op 3 allocs/op @@ -20,6 +17,20 @@ BenchmarkParseAny-4 200000 8627 ns/op 144 B/op 3 allo BenchmarkShotgunParse-8 50000 33940 ns/op 13136 B/op 169 allocs/op BenchmarkParseAny-8 200000 10146 ns/op 912 B/op 29 allocs/op BenchmarkParseDateString-8 10000 123077 ns/op 208 B/op 13 allocs/op + +// Klondike Dragon Dec 2023 +cpu: 12th Gen Intel(R) Core(TM) i7-1255U +BenchmarkShotgunParse-12 62788 18113 ns/op 19448 B/op 474 allocs/op +BenchmarkParseAny-12 347020 3455 ns/op 48 B/op 2 allocs/op +BenchmarkBigShotgunParse-12 1226 951271 ns/op 1214937 B/op 27245 allocs/op +BenchmarkBigParseAny-12 4234 267893 ns/op 27492 B/op 961 allocs/op +BenchmarkBigParseIn-12 4032 280900 ns/op 30422 B/op 1033 allocs/op +BenchmarkBigParseRetryAmbiguous-12 4453 282475 ns/op 29558 B/op 1030 allocs/op +BenchmarkShotgunParseErrors-12 19240 62641 ns/op 67080 B/op 1679 allocs/op +BenchmarkParseAnyErrors-12 185677 6179 ns/op 752 B/op 23 allocs/op +BenchmarkBigParseAnyErrors-12 26688 44885 ns/op 480 B/op 94 allocs/op +BenchmarkParseAmbiguous-12 1590302 752.9 ns/op 296 B/op 7 allocs/op +BenchmarkParseWeekdayAndFullMonth-12 2141109 555.0 ns/op 16 B/op 2 allocs/op */ func BenchmarkShotgunParse(b *testing.B) { b.ReportAllocs() From 9f7bdf710107b65a1a198021edd29c17df7b95e1 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Tue, 19 Dec 2023 21:50:19 -0700 Subject: [PATCH 59/62] Update go doc --- dateparse/main.go | 2 +- parseany.go | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/dateparse/main.go b/dateparse/main.go index 6c3c20a..f103658 100644 --- a/dateparse/main.go +++ b/dateparse/main.go @@ -6,8 +6,8 @@ import ( "os" "time" - "github.com/scylladb/termtables" "github.com/araddon/dateparse" + "github.com/scylladb/termtables" ) var ( diff --git a/parseany.go b/parseany.go index 76f14a9..c192f3e 100644 --- a/parseany.go +++ b/parseany.go @@ -1,6 +1,20 @@ // Package dateparse parses date-strings without knowing the format // in advance, using a fast lex based approach to eliminate shotgun -// attempts. It leans towards US style dates when there is a conflict. +// attempts. Validates comprehensively to avoid false positives. +// +// By default it leans towards US style dates when there is a +// conflict. This can be adjusted using the `PreferMonthFirst` +// parser option. Some ambiguous formats can fail (e.g., trying to +// parse `31/03/2023“ as the default month-first format +// `MM/DD/YYYY`), but can be automatically retried with +// `RetryAmbiguousDateWithSwap`. +// +// Consider turning on the the `SimpleErrorMessages` option if you +// will be attempting to parse many strings that do not match any +// known format and you need to maximize performance. +// +// See README.md for key points on how timezone/location parsing +// works in go, as this can be counterintuitive initially. package dateparse import ( @@ -184,7 +198,7 @@ func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) { // rules. Using location arg, if timezone/offset info exists in the // datestring, it uses the given location rules for any zone interpretation. // That is, MST means one thing when using America/Denver and something else -// in other locations. +// in other locations. See README for a more detailed explanation. func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Time, error) { p, err := parseTime(datestr, loc, opts...) defer putBackParser(p) @@ -232,9 +246,13 @@ func MustParse(datestr string, opts ...ParserOption) time.Time { return t } -// ParseFormat parse's an unknown date-time string and returns a layout +// ParseFormat parses an unknown date-time string and returns a layout // string that can parse this (and exact same format) other date-time strings. // +// In certain edge cases, this may produce a format string of a different +// length than the input string. If this happens, it's an edge case that +// requires individually parsing each time. +// // layout, err := dateparse.ParseFormat("2013-02-01 00:00:00") // // layout = "2006-01-02 15:04:05" func ParseFormat(datestr string, opts ...ParserOption) (string, error) { From fd21b1ee3e37ffb5eec6c9e0b36d92c9886c008c Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 23 Dec 2023 20:04:38 -0700 Subject: [PATCH 60/62] Allow weekday prefix for most date formats This is implemented now using the "skip" parser field, indicating to skip the first N characters. This also avoids a recursive parse in one more case (more efficient). This simplifies the state machine a little bit, while the rest of the code needs to properly account for the value of the skip field. Also allow whitespace prefix without penalty. Modify the test suite to psuedo-randomly add a weekday prefix to the formats that allow it (all except the purely numeric ones). --- parseany.go | 276 +++++++++++++++++------------------------------ parseany_test.go | 146 ++++++++++++++----------- 2 files changed, 182 insertions(+), 240 deletions(-) diff --git a/parseany.go b/parseany.go index c192f3e..b96a8b7 100644 --- a/parseany.go +++ b/parseany.go @@ -112,8 +112,6 @@ const ( dateAlphaSlash dateAlphaSlashDigit dateAlphaSlashDigitSlash - dateWeekdayComma - dateWeekdayAbbrevComma dateYearWs dateYearWsMonthWs ) @@ -308,10 +306,13 @@ iterRunes: if bytesConsumed > 1 { i += bytesConsumed - 1 } + adjustedI := i - p.skip // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, p.datestr) switch p.stateDate { case dateStart: + // Note that we can reach this state either at the very start of the string, + // or after skipping something (like a weekday, etc). // NOTE: don't use unicode.IsDigit and unicode.IsLetter here because // we don't expect non-ANSI chars to start a valid date/time format. // This will let us quickly reject strings that begin with any non-ANSI char. @@ -319,6 +320,10 @@ iterRunes: p.stateDate = dateDigit } else if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') { p.stateDate = dateAlpha + } else if r == ' ' { + // we can safely ignore whitespace at the start of strings (helps with + // situations where we skipped a weekday and came back to this state) + p.skip = i + 1 } else { return p, p.unknownErr(datestr) } @@ -330,12 +335,12 @@ iterRunes: // 2013-Feb-03 // 13-Feb-03 // 29-Jun-2016 - if i == 4 { + if adjustedI == 4 { p.stateDate = dateYearDash - p.yeari = 0 - p.yearlen = i + p.yeari = p.skip + p.yearlen = i - p.skip p.moi = i + 1 - p.set(0, "2006") + p.set(p.skip, "2006") } else { p.stateDate = dateDigitDash } @@ -344,9 +349,10 @@ iterRunes: // 03/31/2005 // 2014/02/24 p.stateDate = dateDigitSlash - if i == 4 { + if adjustedI == 4 { // 2014/02/24 - Year first / - p.yearlen = i // since it was start of datestr, i=len + p.yeari = p.skip + p.yearlen = i - p.skip p.moi = i + 1 if !p.setYear() { return p, p.unknownErr(datestr) @@ -362,7 +368,7 @@ iterRunes: p.stateDate = dateDigitSlashAlpha p.moi = i + 1 p.daylen = 2 - p.dayi = 0 + p.dayi = p.skip if !p.setDay() { return p, p.unknownErr(datestr) } @@ -376,7 +382,8 @@ iterRunes: if p.preferMonthFirst { if p.molen == 0 { // 03/31/2005 - p.molen = i + p.moi = p.skip + p.molen = i - p.skip if !p.setMonth() { return p, p.unknownErr(datestr) } @@ -386,7 +393,8 @@ iterRunes: } } else { if p.daylen == 0 { - p.daylen = i + p.dayi = p.skip + p.daylen = i - p.skip if !p.setDay() { return p, p.unknownErr(datestr) } @@ -401,8 +409,9 @@ iterRunes: // 03:31:2005 // 2014:02:24 p.stateDate = dateDigitColon - if i == 4 { - p.yearlen = i + if adjustedI == 4 { + p.yeari = p.skip + p.yearlen = i - p.skip p.moi = i + 1 if !p.setYear() { return p, p.unknownErr(datestr) @@ -412,7 +421,8 @@ iterRunes: p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { - p.molen = i + p.moi = p.skip + p.molen = i - p.skip if !p.setMonth() { return p, p.unknownErr(datestr) } @@ -422,7 +432,8 @@ iterRunes: } } else { if p.daylen == 0 { - p.daylen = i + p.dayi = p.skip + p.daylen = i - p.skip if !p.setDay() { return p, p.unknownErr(datestr) } @@ -438,19 +449,21 @@ iterRunes: // 08.21.71 // 2014.05 p.stateDate = dateDigitDot - if i == 4 { - p.yearlen = i + if adjustedI == 4 { + p.yeari = p.skip + p.yearlen = i - p.skip p.moi = i + 1 if !p.setYear() { return p, p.unknownErr(datestr) } - } else if i <= 2 { + } else if adjustedI <= 2 { p.ambiguousMD = true p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { // 03.31.2005 - p.molen = i + p.moi = p.skip + p.molen = i - p.skip if !p.setMonth() { return p, p.unknownErr(datestr) } @@ -460,7 +473,8 @@ iterRunes: } } else { if p.daylen == 0 { - p.daylen = i + p.dayi = p.skip + p.daylen = i - p.skip if !p.setDay() { return p, p.unknownErr(datestr) } @@ -482,24 +496,26 @@ iterRunes: // 12 Feb 2006, 19:17 // 12 Feb 2006, 19:17:22 // 2013 Jan 06 15:04:05 - if i == 4 { - p.yearlen = i + if adjustedI == 4 { + p.yeari = p.skip + p.yearlen = i - p.skip p.moi = i + 1 if !p.setYear() { return p, p.unknownErr(datestr) } p.stateDate = dateYearWs - } else if i == 6 { + } else if adjustedI == 6 { p.stateDate = dateDigitSt } else { p.stateDate = dateDigitWs - p.dayi = 0 - p.daylen = i + p.dayi = p.skip + p.daylen = i - p.skip } case '年': // Chinese Year p.stateDate = dateDigitChineseYear - p.yearlen = i - 2 + p.yeari = p.skip + p.yearlen = i - 2 - p.skip p.moi = i + 1 if !p.setYear() { return p, p.unknownErr(datestr) @@ -518,10 +534,10 @@ iterRunes: } continue } - p.part1Len = i + p.part1Len = i - p.skip case dateDigitSt: - p.set(0, "060102") + p.set(p.skip, "060102") i = i - 1 p.stateTime = timeStart break iterRunes @@ -738,7 +754,7 @@ iterRunes: p.yearlen = 4 p.set(p.yeari, "2006") // We now also know that part1 was the day - p.dayi = 0 + p.dayi = p.skip p.daylen = p.part1Len if !p.setDay() { return p, p.unknownErr(datestr) @@ -755,7 +771,7 @@ iterRunes: p.yearlen = 2 p.set(p.yeari, "06") // We now also know that part1 was the day - p.dayi = 0 + p.dayi = p.skip p.daylen = p.part1Len if !p.setDay() { return p, p.unknownErr(datestr) @@ -983,13 +999,13 @@ iterRunes: case ' ': p.yeari = i + 1 //p.yearlen = 4 - p.dayi = 0 + p.dayi = p.skip p.daylen = p.part1Len if !p.setDay() { return p, p.unknownErr(datestr) } p.stateTime = timeStart - if i > p.daylen+len(" Sep") { // November etc + if adjustedI > p.daylen+len(" Sep") { // November etc // If this is a legit full month, then change the string we're parsing // to compensate for the longest month, and do the same with the format string. We // must maintain a corresponding length/content and this is the easiest @@ -1252,18 +1268,6 @@ iterRunes: // Oct/07/1970 // February/ 7/1970 // February/07/1970 - // - // dateWeekdayComma - // Monday, 02 Jan 2006 15:04:05 MST - // Monday, 02-Jan-06 15:04:05 MST - // Monday, 02 Jan 2006 15:04:05 -0700 - // Monday, 02 Jan 2006 15:04:05 +0100 - // dateWeekdayAbbrevComma - // Mon, 02 Jan 2006 15:04:05 MST - // Mon, 02 Jan 2006 15:04:05 -0700 - // Thu, 13 Jul 2017 08:58:40 +0100 - // Tue, 11 Jul 2017 16:28:13 +0200 (CEST) - // Mon, 02-Jan-06 15:04:05 MST switch { case r == ' ': // This could be a weekday or a month, detect and parse both cases. @@ -1272,21 +1276,17 @@ iterRunes: // Tuesday 05 May 2020, 05:05:05 // Mon Jan 2 15:04:05 2006 // Monday Jan 2 15:04:05 2006 - maybeDayOrMonth := strings.ToLower(p.datestr[0:i]) + maybeDayOrMonth := strings.ToLower(p.datestr[p.skip:i]) if isDay(maybeDayOrMonth) { - // using skip throws off indices used by other code; saner to restart - newDateStr := p.datestr[i+1:] - putBackParser(p) - return parseTime(newDateStr, loc, opts...) - } - - // X - // April 8, 2009 - if i > 3 { + p.skip = i + 1 + p.stateDate = dateStart + } else if adjustedI > 3 { + // X + // April 8, 2009 // Expecting a full month name at this point if isMonthFull(maybeDayOrMonth) { - p.moi = 0 - p.molen = i + p.moi = p.skip + p.molen = i - p.skip p.fullMonth = maybeDayOrMonth p.stateDate = dateAlphaFullMonthWs p.dayi = i + 1 @@ -1295,7 +1295,7 @@ iterRunes: return p, p.unknownErr(datestr) } - } else if i == 3 { + } else if adjustedI == 3 { // dateAlphaWs // May 05, 2005, 05:05:05 // May 05 2005, 05:05:05 @@ -1309,14 +1309,11 @@ iterRunes: case r == ',': // Mon, 02 Jan 2006 - - if i == 3 { - p.stateDate = dateWeekdayAbbrevComma - p.set(0, "Mon") - } else { - maybeDay := strings.ToLower(p.datestr[0:i]) + // Monday, 02 Jan 2006 + if adjustedI >= 3 && p.nextIs(i, ' ') { + maybeDay := strings.ToLower(p.datestr[p.skip:i]) if isDay(maybeDay) { - p.stateDate = dateWeekdayComma + p.stateDate = dateStart // Just skip past the weekday, it contains no valuable info p.skip = i + 2 i++ @@ -1328,12 +1325,13 @@ iterRunes: // sept. 28, 2017 // jan. 28, 2017 p.stateDate = dateAlphaPeriodWsDigit - if i == 3 { - p.molen = i - p.set(0, "Jan") - } else if i == 4 { + if adjustedI == 3 { + p.moi = p.skip + p.molen = i - p.skip + p.set(p.skip, "Jan") + } else if adjustedI == 4 { // gross - newDateStr := p.datestr[0:i-1] + p.datestr[i:] + newDateStr := p.datestr[p.skip:i-1] + p.datestr[i:] putBackParser(p) return parseTime(newDateStr, loc, opts...) } else { @@ -1347,15 +1345,15 @@ iterRunes: // February/ 7/1970 // February/07/1970 // Must be a valid short or long month - if i == 3 { - p.moi = 0 + if adjustedI == 3 { + p.moi = p.skip p.molen = i - p.moi p.set(p.moi, "Jan") p.stateDate = dateAlphaSlash } else { - possibleFullMonth := strings.ToLower(p.datestr[:i]) - if i > 3 && isMonthFull(possibleFullMonth) { - p.moi = 0 + possibleFullMonth := strings.ToLower(p.datestr[p.skip:i]) + if adjustedI > 3 && isMonthFull(possibleFullMonth) { + p.moi = p.skip p.molen = i - p.moi p.fullMonth = possibleFullMonth p.stateDate = dateAlphaSlash @@ -1385,13 +1383,24 @@ iterRunes: // May 08 17:57:51 2009 // oct 1, 1970 // oct 7, '70 + // (this state is only entered if the skip-adjusted length is 3) switch { case unicode.IsLetter(r): - p.set(0, "Mon") - p.stateDate = dateAlphaWsAlpha - p.set(i, "Jan") + // have to have a day of week and then at least a 3 digit month to follow + if adjustedI >= 3 && (i+3) < len(p.datestr) { + maybeDay := strings.ToLower(p.datestr[p.skip:i]) + if isDay(maybeDay) { + p.skip = i + p.stateDate = dateAlphaWsAlpha + p.set(i, "Jan") + } else { + return p, p.unknownErr(datestr) + } + } else { + return p, p.unknownErr(datestr) + } case unicode.IsDigit(r): - p.set(0, "Jan") + p.set(p.skip, "Jan") p.stateDate = dateAlphaWsDigit p.dayi = i case r == ' ': @@ -1438,9 +1447,9 @@ iterRunes: // Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) if r == ':' { // Guessed wrong; was not a year + p.yeari = 0 i = i - 3 p.stateDate = dateAlphaWsDigit - p.yeari = 0 break iterRunes } else if r == ' ' { // must be year format, not 15:04 @@ -1502,7 +1511,7 @@ iterRunes: case 't', 'T': if p.nextIs(i, 'h') || p.nextIs(i, 'H') { if len(p.datestr) > i+2 { - newDateStr := p.datestr[0:i] + p.datestr[i+2:] + newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:] putBackParser(p) return parseTime(newDateStr, loc, opts...) } @@ -1511,7 +1520,7 @@ iterRunes: case 'n', 'N': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { - newDateStr := p.datestr[0:i] + p.datestr[i+2:] + newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:] putBackParser(p) return parseTime(newDateStr, loc, opts...) } @@ -1520,7 +1529,7 @@ iterRunes: case 's', 'S': if p.nextIs(i, 't') || p.nextIs(i, 'T') { if len(p.datestr) > i+2 { - newDateStr := p.datestr[0:i] + p.datestr[i+2:] + newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:] putBackParser(p) return parseTime(newDateStr, loc, opts...) } @@ -1529,7 +1538,7 @@ iterRunes: case 'r', 'R': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { - newDateStr := p.datestr[0:i] + p.datestr[i+2:] + newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:] putBackParser(p) return parseTime(newDateStr, loc, opts...) } @@ -1672,87 +1681,6 @@ iterRunes: return p, p.unknownErr(datestr) } - case dateWeekdayComma: - // Monday, 02 Jan 2006 15:04:05 MST - // Monday, 02 Jan 2006 15:04:05 -0700 - // Monday, 02 Jan 2006 15:04:05 +0100 - // Monday, 02-Jan-06 15:04:05 MST - if p.dayi == 0 { - p.dayi = i - } - switch r { - case ' ': - fallthrough - case '-', '\u2212': - if p.moi == 0 { - p.moi = i + 1 - p.daylen = i - p.dayi - if !p.setDay() { - return p, p.unknownErr(datestr) - } - } else if p.yeari == 0 { - p.yeari = i + 1 - p.molen = i - p.moi - if p.molen == 3 { - p.set(p.moi, "Jan") - } else { - return p, p.unknownErr(datestr) - } - } else { - p.stateTime = timeStart - break iterRunes - } - default: - if !unicode.IsDigit(r) && !unicode.IsLetter(r) { - return p, p.unknownErr(datestr) - } - } - case dateWeekdayAbbrevComma: - // Mon, 02 Jan 2006 15:04:05 MST - // Mon, 02 Jan 2006 15:04:05 -0700 - // Thu, 13 Jul 2017 08:58:40 +0100 - // Thu, 4 Jan 2018 17:53:36 +0000 - // Tue, 11 Jul 2017 16:28:13 +0200 (CEST) - // Mon, 02-Jan-06 15:04:05 MST - var offset int - switch r { - case ' ': - for i+1 < len(p.datestr) && p.datestr[i+1] == ' ' { - i++ - offset++ - } - fallthrough - case '-', '\u2212': - if p.dayi == 0 { - p.dayi = i + 1 - } else if p.moi == 0 { - p.daylen = i - p.dayi - if !p.setDay() { - return p, p.unknownErr(datestr) - } - p.moi = i + 1 - } else if p.yeari == 0 { - p.molen = i - p.moi - offset - if p.molen == 3 { - p.set(p.moi, "Jan") - } else { - return p, p.unknownErr(datestr) - } - p.yeari = i + 1 - } else { - p.yearlen = i - p.yeari - offset - if !p.setYear() { - return p, p.unknownErr(datestr) - } - p.stateTime = timeStart - break iterRunes - } - default: - if !unicode.IsDigit(r) && !unicode.IsLetter(r) { - return p, p.unknownErr(datestr) - } - } - default: // Reaching an unhandled state unexpectedly should always fail parsing return p, p.unknownErr(datestr) @@ -2567,7 +2495,7 @@ iterRunes: p.yearlen = 4 p.set(p.yeari, "2006") // We now also know that part1 was the day - p.dayi = 0 + p.dayi = p.skip p.daylen = p.part1Len if !p.setDay() { return p, p.unknownErr(datestr) @@ -2584,7 +2512,7 @@ iterRunes: p.yearlen = 2 p.set(p.yeari, "06") // We now also know that part1 was the day - p.dayi = 0 + p.dayi = p.skip p.daylen = p.part1Len if !p.setDay() { return p, p.unknownErr(datestr) @@ -2715,17 +2643,6 @@ iterRunes: // February/07/1970 return p, nil - case dateWeekdayComma: - // Monday, 02 Jan 2006 15:04:05 -0700 - // Monday, 02 Jan 2006 15:04:05 +0100 - // Monday, 02-Jan-06 15:04:05 MST - return p, nil - - case dateWeekdayAbbrevComma: - // Mon, 02-Jan-06 15:04:05 MST - // Mon, 02 Jan 2006 15:04:05 MST - return p, nil - case dateYearWsMonthWs: // 2013 May 02 11:37:55 // 2013 December 02 11:37:55 @@ -3129,8 +3046,9 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) if err != nil && strings.Contains(err.Error(), "month out of range") { // simple optimized case where mm and dd can be swapped directly if p.molen == 2 && p.daylen == 2 { - moi := p.moi - p.moi = p.dayi + // skipped bytes have already been removed, so compensate for that + moi := p.moi - p.skip + p.moi = p.dayi - p.skip p.dayi = moi if !p.setDay() || !p.setMonth() { err = p.unknownErr(p.datestr) diff --git a/parseany_test.go b/parseany_test.go index 6eff9cf..54a83d7 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -19,6 +19,7 @@ type dateTest struct { preferDayFirst bool retryAmbiguous bool expectAmbiguous bool + allowWeekdayPrefix bool } var testInputs = []dateTest{ @@ -81,6 +82,7 @@ var testInputs = []dateTest{ {in: "Mon 30 Sep 2018 09:09:09 PM CEST", out: "2018-09-30 21:09:09 +0000 UTC", zname: "CEST"}, {in: "Mon 02 Jan 2006", out: "2006-01-02 00:00:00 +0000 UTC"}, {in: "Monday 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC", zname: "UTC"}, + {in: "SUNDAY, July 05 2015", out: "2015-07-05 00:00:00 +0000 UTC", zname: "UTC"}, // RubyDate = "Mon Jan 02 15:04:05 -0700 2006" {in: "Mon Jan 02 15:04:05 -0700 2006", out: "2006-01-02 22:04:05 +0000 UTC"}, {in: "Thu May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"}, @@ -222,8 +224,9 @@ var testInputs = []dateTest{ {in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 03-Jul-15 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles", zname: "PDT"}, - {in: "Fri, 03-Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, - {in: "Fri, 03-Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, + {in: "Fri, 03-Jul-2015", out: "2015-07-03 00:00:00 +0000 UTC"}, + {in: "Fri, 03-Jul-2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, + {in: "Fri, 03-Jul-2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 3-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 3-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 03-Jul-15 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, @@ -697,22 +700,22 @@ var testInputs = []dateTest{ {in: "08/04/2014 22:05", out: "2014-04-08 22:05:00 +0000 UTC", preferDayFirst: true}, {in: "2/04/2014 03:00:51", out: "2014-02-04 03:00:51 +0000 UTC", preferDayFirst: false}, {in: "2/04/2014 03:00:51", out: "2014-04-02 03:00:51 +0000 UTC", preferDayFirst: true}, - {in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", retryAmbiguous: true}, - {in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, - {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", retryAmbiguous: true}, + {in: "19/03/2012 10:11:56", out: "2012-03-19 10:11:56 +0000 UTC", retryAmbiguous: true}, + {in: "19/03/2012 10:11:57", out: "2012-03-19 10:11:57 +0000 UTC", preferDayFirst: true}, + {in: "19/03/2012 10:11:58.3186369", out: "2012-03-19 10:11:58.3186369 +0000 UTC", retryAmbiguous: true}, {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", preferDayFirst: true}, // For certain parse modes that restart parsing, make sure that parsing options are passed along! - {in: "Monday 19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", retryAmbiguous: true}, - {in: "Monday 19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, + {in: "Monday 19/03/2012 10:11:50", out: "2012-03-19 10:11:50 +0000 UTC", retryAmbiguous: true}, + {in: "Monday 19/03/2012 10:11:51", out: "2012-03-19 10:11:51 +0000 UTC", preferDayFirst: true}, // https://github.com/araddon/dateparse/issues/105 {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", retryAmbiguous: true}, {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", preferDayFirst: true}, // yyyymmdd and similar - {in: "2014", out: "2014-01-01 00:00:00 +0000 UTC"}, - {in: "20140601", out: "2014-06-01 00:00:00 +0000 UTC"}, - {in: "20140722105203", out: "2014-07-22 10:52:03 +0000 UTC"}, + {in: "2014", out: "2014-01-01 00:00:00 +0000 UTC", allowWeekdayPrefix: false}, + {in: "20140601", out: "2014-06-01 00:00:00 +0000 UTC", allowWeekdayPrefix: false}, + {in: "20140722105203", out: "2014-07-22 10:52:03 +0000 UTC", allowWeekdayPrefix: false}, // https://github.com/araddon/dateparse/issues/143 - {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC"}, + {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC", allowWeekdayPrefix: false}, // yymmdd hh:mm:yy mysql log https://github.com/araddon/dateparse/issues/119 // 080313 05:21:55 mysqld started // 080313 5:21:55 InnoDB: Started; log sequence number 0 43655 @@ -721,11 +724,11 @@ var testInputs = []dateTest{ {in: "190910 11:51:49", out: "2019-09-10 11:51:49 +0000 UTC"}, // all digits: unix secs, ms etc - {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", zname: "UTC"}, - {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", loc: "America/Denver", zname: "MDT"}, - {in: "1384216367111", out: "2013-11-12 00:32:47.111 +0000 UTC"}, - {in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC"}, - {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC"}, + {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", zname: "UTC", allowWeekdayPrefix: false}, + {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", loc: "America/Denver", zname: "MDT", allowWeekdayPrefix: false}, + {in: "1384216367111", out: "2013-11-12 00:32:47.111 +0000 UTC", allowWeekdayPrefix: false}, + {in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC", allowWeekdayPrefix: false}, + {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC", allowWeekdayPrefix: false}, // other {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, @@ -742,6 +745,10 @@ var testInputs = []dateTest{ {in: "2014.02.13T08:33:44.555", out: "2014-02-13 08:33:44.555 +0000 UTC"}, {in: "2014.02.13T08:33:44.555 PM -0700 MST", out: "2014-02-14 03:33:44.555 +0000 UTC", zname: "MST"}, {in: "2014.02.13-0200", out: "2014-02-13 02:00:00 +0000 UTC"}, + // Whitespace up front is now allowed + {in: " 2018-01-02 17:08:09 -07:00", out: "2018-01-03 00:08:09 +0000 UTC"}, + {in: " 2018-01-02 17:08:09 -07:00", out: "2018-01-03 00:08:09 +0000 UTC"}, + {in: " 2018-01-02 17:08:09 -07:00", out: "2018-01-03 00:08:09 +0000 UTC"}, } func TestParse(t *testing.T) { @@ -761,51 +768,70 @@ func TestParse(t *testing.T) { assert.NotEqual(t, nil, err) }) + allDays := make([]string, 0, len(knownDays)) + for day := range knownDays { + allDays = append(allDays, day) + } + + i := 0 for _, simpleErrorMessage := range []bool{false, true} { - for _, th := range testInputs { - t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, th.in), func(t *testing.T) { - var ts time.Time - defer func() { - if r := recover(); r != nil { - t.Fatalf("error: %s", r) + for _, addWeekday := range []bool{false, true} { + for _, th := range testInputs { + i++ + prefix := "" + if addWeekday && th.allowWeekdayPrefix { + prefix = allDays[i%len(allDays)] + if i%2 == 1 { + prefix += "," } - }() - parserOptions := []ParserOption{ - PreferMonthFirst(!th.preferDayFirst), - RetryAmbiguousDateWithSwap(th.retryAmbiguous), - SimpleErrorMessages(simpleErrorMessage), + prefix += " " } - if len(th.loc) > 0 { - loc, err := time.LoadLocation(th.loc) - if err != nil { - t.Fatalf("Expected to load location %q but got %v", th.loc, err) - } - ts, err = ParseIn(th.in, loc, parserOptions...) - if err != nil { - t.Fatalf("expected to parse %q but got %v", th.in, err) - } - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - t.Fatalf("whoops, got %s, expected %s", got, th.out) - } - if len(th.zname) > 0 { - gotZone, _ := ts.Zone() - assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) - } - } else { - ts = MustParse(th.in, parserOptions...) - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - t.Fatalf("whoops, got %s, expected %s", got, th.out) + fullInput := prefix + th.in + + t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, fullInput), func(t *testing.T) { + var ts time.Time + defer func() { + if r := recover(); r != nil { + t.Fatalf("error: %s", r) + } + }() + parserOptions := []ParserOption{ + PreferMonthFirst(!th.preferDayFirst), + RetryAmbiguousDateWithSwap(th.retryAmbiguous), + SimpleErrorMessages(simpleErrorMessage), } - if len(th.zname) > 0 { - gotZone, _ := ts.Zone() - assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + if len(th.loc) > 0 { + loc, err := time.LoadLocation(th.loc) + if err != nil { + t.Fatalf("Expected to load location %q but got %v", th.loc, err) + } + ts, err = ParseIn(fullInput, loc, parserOptions...) + if err != nil { + t.Fatalf("expected to parse %q but got %v", fullInput, err) + } + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, fullInput) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, fullInput) + } + } else { + ts = MustParse(fullInput, parserOptions...) + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, fullInput) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, fullInput) + } } - } - }) + }) + } } } @@ -868,12 +894,10 @@ var testParseErrors = []dateTest{ {in: `{"hello"}`, err: true}, {in: "2009-15-12T22:15Z", err: true}, {in: "5,000-9,999", err: true}, - {in: "xyzq-baad"}, + {in: "xyzq-baad", err: true}, {in: "oct.-7-1970", err: true}, {in: "septe. 7, 1970", err: true}, {in: "SeptemberRR 7th, 1970", err: true}, - // this is just testing the empty space up front - {in: " 2018-01-02 17:08:09 -07:00", err: true}, // a semantic version number should not be interpreted as a date {in: "1.22.3-78888", err: true}, // a semantic version number that starts with a date should not be interpreted as a date @@ -1203,6 +1227,6 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - ts := MustParse("03.31.2014 10:11:59 MST-0700", PreferMonthFirst(true)) - assert.Equal(t, "2014-03-31 17:11:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts := MustParse("Monday 19/03/2012 00:00:00", RetryAmbiguousDateWithSwap(true)) + assert.Equal(t, "2012-03-19 00:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } From c4de5d4f6a89afee2bbf3e273da5cace1d06aca0 Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 30 Dec 2023 01:10:43 -0700 Subject: [PATCH 61/62] Unify/fix timezone offset/name states * Merge duplicate states (fixes lots of edge cases) * Support for +00:00 is consistent with +0000 now * Support (timezone description) after any offset/name * Update tests to cover positive/negative cases * Update example with new supported formats --- README.md | 369 ++++++++++++++++++---------------- example/main.go | 369 ++++++++++++++++++---------------- parseany.go | 503 ++++++++++++++++++++++++++--------------------- parseany_test.go | 86 +++++++- 4 files changed, 763 insertions(+), 564 deletions(-) diff --git a/README.md b/README.md index 8ef2e23..4e0f1aa 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,15 @@ var examples = []string{ "Mon Aug 10 15:44:11 UTC+0000 2015", // git log default date format "Thu Apr 7 15:13:13 2005 -0700", + // variants of git log default date format + "Thu Apr 7 15:13:13 2005 -07:00", + "Thu Apr 7 15:13:13 2005 -07:00 PST", + "Thu Apr 7 15:13:13 2005 -07:00 PST (Pacific Standard Time)", + "Thu Apr 7 15:13:13 -0700 2005", + "Thu Apr 7 15:13:13 -07:00 2005", + "Thu Apr 7 15:13:13 -0700 PST 2005", + "Thu Apr 7 15:13:13 -07:00 PST 2005", + "Thu Apr 7 15:13:13 PST 2005", // Variants of the above with a (full time zone description) "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", @@ -149,11 +158,15 @@ var examples = []string{ "04/08/2014, 22:05", "4/8/14 22:05", "04/2/2014 03:00:51", - "8/8/1965 12:00:00 AM", - "8/8/1965 01:00:01 PM", - "8/8/1965 01:00 PM", "8/8/1965 1:00 PM", + "8/8/1965 01:00 PM", "8/8/1965 12:00 AM", + "8/8/1965 12:00:00AM", + "8/8/1965 01:00:01 PM", + "8/8/1965 01:00:01PM -0700", + "8/8/1965 13:00:01 -0700 PST", + "8/8/1965 01:00:01 PM -0700 PST", + "8/8/1965 01:00:01 PM -07:00 PST (Pacific Standard Time)", "4/02/2014 03:00:51", "03/19/2012 10:11:59", "03/19/2012 10:11:59.3186369", @@ -308,175 +321,187 @@ func main() { } /* -+----------------------------------------------------------+-----------------------------------------+ -| Input | Parsed, and Output as %v | -+----------------------------------------------------------+-----------------------------------------+ -| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | -| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | -| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| Sept. 7, 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | -| Sep 7 2009 11:15:26.123 PM PST | 2009-09-07 23:15:26.123 +0000 PST | -| September 3rd, 2009 11:15:26.123456789pm | 2009-09-03 23:15:26.123456789 +0000 UTC | -| September 17 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | -| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | -| Sep 17, 2012 at 10:02am (EST) | 2012-09-17 10:02:00 +0000 EST | -| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | -| September 17 2012 5:00pm UTC-0700 | 2012-09-17 17:00:00 -0700 -0700 | -| September 17 2012 5:00pm GMT-0700 | 2012-09-17 17:00:00 -0700 -0700 | -| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | -| 7 Oct 1970 | 1970-10-07 00:00:00 +0000 UTC | -| 7 September 1970 23:15 | 1970-09-07 23:15:00 +0000 UTC | -| 7 September 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | -| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | -| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | -| 4th Sep 2012 | 2012-09-04 00:00:00 +0000 UTC | -| 1st February 2018 13:58:24 | 2018-02-01 13:58:24 +0000 UTC | -| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | -| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | -| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | -| Sun, 07 Jun 2020 00:00:00 +0100 | 2020-06-07 00:00:00 +0100 +0100 | -| Wed, 8 Feb 2023 19:00:46 +1100 (AEDT) | 2023-02-08 19:00:46 +1100 +1100 | -| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | -| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | -| Monday Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | -| Mon Jan 2 15:04:05.103786 2006 | 2006-01-02 15:04:05.103786 +0000 UTC | -| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | -| Mon 02 Jan 2006 03:04:05 PM UTC | 2006-01-02 15:04:05 +0000 UTC | -| Monday 02 Jan 2006 03:04:05 PM MST | 2006-01-02 15:04:05 +0000 MST | -| Mon Aug 10 15:44:11 UTC+0000 2015 | 2015-08-10 15:44:11 +0000 UTC | -| Thu Apr 7 15:13:13 2005 -0700 | 2005-04-07 15:13:13 -0700 -0700 | -| Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time) | 2015-07-03 06:04:07 -0700 PST | -| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 +0100 | -| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | -| 2013 May 2 | 2013-05-02 00:00:00 +0000 UTC | -| 2013 May 02 11:37:55 | 2013-05-02 11:37:55 +0000 UTC | -| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/January/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/January/2008:08:11:17 -0700 | 2008-01-06 08:11:17 -0700 -0700 | -| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | -| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | -| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/08/2014, 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | -| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| Oct/ 7/1970 | 1970-10-07 00:00:00 +0000 UTC | -| Oct/03/1970 22:33:44 | 1970-10-03 22:33:44 +0000 UTC | -| February/03/1970 11:33:44.555 PM PST | 1970-02-03 23:33:44.555 +0000 PST | -| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| Fri, 03-Jul-15 08:08:08 CEST | 2015-07-03 08:08:08 +0000 CEST | -| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Monday, 02 Jan 2006 15:04:05 -0600 | 2006-01-02 15:04:05 -0600 -0600 | -| 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | -| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | -| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | -| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | -| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | -| 2009-08-12T22:15:09.52Z | 2009-08-12 22:15:09.52 +0000 UTC | -| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | -| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | -| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | -| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | -| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | -| 2014-04-01 12:01am | 2014-04-01 00:01:00 +0000 UTC | -| 2014-04-01 12:01:59.765 AM | 2014-04-01 00:01:59.765 +0000 UTC | -| 2014-04-01 12:01:59,765 | 2014-04-01 12:01:59.765 +0000 UTC | -| 2014-04-01 22:43 | 2014-04-01 22:43:00 +0000 UTC | -| 2014-04-01 22:43:22 | 2014-04-01 22:43:22 +0000 UTC | -| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | -| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 GMT | -| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | -| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | -| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | -| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | -| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 GMT | -| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | -| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 MSK | -| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 MSK | -| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | -| 2017-04-03 22:32:14.322 CET | 2017-04-03 22:32:14.322 +0000 CET | -| 2017-04-03 22:32:14,322 CET | 2017-04-03 22:32:14.322 +0000 CET | -| 2017-04-03 22:32:14:322 CET | 2017-04-03 22:32:14.322 +0000 CET | -| 2018-09-30 08:09:13.123PM PMDT | 2018-09-30 20:09:13.123 +0000 PMDT | -| 2018-09-30 08:09:13.123 am AMT | 2018-09-30 08:09:13.123 +0000 AMT | -| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | -| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | -| 2014 | 2014-01-01 00:00:00 +0000 UTC | -| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | -| 2020-07-20+0800 | 2020-07-20 00:00:00 +0800 +0800 | -| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | -| 2013-February-03 09:07:08.123 | 2013-02-03 09:07:08.123 +0000 UTC | -| 03-Feb-13 | 2013-02-03 00:00:00 +0000 UTC | -| 03-Feb-2013 | 2013-02-03 00:00:00 +0000 UTC | -| 07-Feb-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | -| 07-February-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | -| 28-02-02 | 2002-02-28 00:00:00 +0000 UTC | -| 28-02-02 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | -| 28-02-2002 | 2002-02-28 00:00:00 +0000 UTC | -| 28-02-2002 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | -| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.14 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.2014 10:11:59 MST | 2014-03-31 10:11:59 +0000 MST | -| 03.31.2014 10:11:59.3186369Z | 2014-03-31 10:11:59.3186369 +0000 UTC | -| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | -| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | -| 2014.03.30 08:33pm | 2014-03-30 20:33:00 +0000 UTC | -| 2014.03.30T08:33:44.555 PM -0700 MST | 2014-03-30 20:33:44.555 -0700 MST | -| 2014.03.30-0600 | 2014-03-30 00:00:00 -0600 -0600 | -| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 08:03:2012 | 2012-08-03 00:00:00 +0000 UTC | -| 08:04:2012 18:31:59+00:00 | 2012-08-04 18:31:59 +0000 UTC | -| 20140601 | 2014-06-01 00:00:00 +0000 UTC | -| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | -| 20140722105203.364 | 2014-07-22 10:52:03.364 +0000 UTC | -| 2014年4月25日 | 2014-04-25 00:00:00 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | -| 2014年04月08日 19:17:22 -0700 | 2014-04-08 19:17:22 -0700 -0700 | -| 8-Mar-2018::14:09:27 | 2018-03-08 14:09:27 +0000 UTC | -| 08-03-2018::02:09:29 PM | 2018-03-08 14:09:29 +0000 UTC | -| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | -| 190910 11:51:49 | 2019-09-10 11:51:49 +0000 UTC | -| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | -| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | -| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | -| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | -+----------------------------------------------------------+-----------------------------------------+ ++------------------------------------------------------------+-----------------------------------------+ +| Input | Parsed, and Output as %v | ++------------------------------------------------------------+-----------------------------------------+ +| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | +| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | +| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| Sept. 7, 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| Sep 7 2009 11:15:26.123 PM PST | 2009-09-07 23:15:26.123 +0000 PST | +| September 3rd, 2009 11:15:26.123456789pm | 2009-09-03 23:15:26.123456789 +0000 UTC | +| September 17 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | +| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | +| Sep 17, 2012 at 10:02am (EST) | 2012-09-17 10:02:00 +0000 EST | +| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | +| September 17 2012 5:00pm UTC-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| September 17 2012 5:00pm GMT-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | +| 7 Oct 1970 | 1970-10-07 00:00:00 +0000 UTC | +| 7 September 1970 23:15 | 1970-09-07 23:15:00 +0000 UTC | +| 7 September 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | +| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | +| 4th Sep 2012 | 2012-09-04 00:00:00 +0000 UTC | +| 1st February 2018 13:58:24 | 2018-02-01 13:58:24 +0000 UTC | +| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | +| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | +| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | +| Sun, 07 Jun 2020 00:00:00 +0100 | 2020-06-07 00:00:00 +0100 +0100 | +| Wed, 8 Feb 2023 19:00:46 +1100 (AEDT) | 2023-02-08 19:00:46 +1100 +1100 | +| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | +| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | +| Monday Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon Jan 2 15:04:05.103786 2006 | 2006-01-02 15:04:05.103786 +0000 UTC | +| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon 02 Jan 2006 03:04:05 PM UTC | 2006-01-02 15:04:05 +0000 UTC | +| Monday 02 Jan 2006 03:04:05 PM MST | 2006-01-02 15:04:05 +0000 MST | +| Mon Aug 10 15:44:11 UTC+0000 2015 | 2015-08-10 15:44:11 +0000 UTC | +| Thu Apr 7 15:13:13 2005 -0700 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 2005 -07:00 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 2005 -07:00 PST | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 2005 -07:00 PST (Pacific Standard Time) | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 -0700 2005 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 -07:00 2005 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 -0700 PST 2005 | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 -07:00 PST 2005 | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 PST 2005 | 2005-04-07 15:13:13 +0000 PST | +| Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time) | 2015-07-03 06:04:07 -0700 PST | +| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 +0100 | +| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | +| 2013 May 2 | 2013-05-02 00:00:00 +0000 UTC | +| 2013 May 02 11:37:55 | 2013-05-02 11:37:55 +0000 UTC | +| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008:08:11:17 -0700 | 2008-01-06 08:11:17 -0700 -0700 | +| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | +| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | +| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014, 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | +| 8/8/1965 12:00:00AM | 1965-08-08 00:00:00 +0000 UTC | +| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | +| 8/8/1965 01:00:01PM -0700 | 1965-08-08 13:00:01 -0700 -0700 | +| 8/8/1965 13:00:01 -0700 PST | 1965-08-08 13:00:01 -0700 PST | +| 8/8/1965 01:00:01 PM -0700 PST | 1965-08-08 13:00:01 -0700 PST | +| 8/8/1965 01:00:01 PM -07:00 PST (Pacific Standard Time) | 1965-08-08 13:00:01 -0700 PST | +| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Oct/ 7/1970 | 1970-10-07 00:00:00 +0000 UTC | +| Oct/03/1970 22:33:44 | 1970-10-03 22:33:44 +0000 UTC | +| February/03/1970 11:33:44.555 PM PST | 1970-02-03 23:33:44.555 +0000 PST | +| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Fri, 03-Jul-15 08:08:08 CEST | 2015-07-03 08:08:08 +0000 CEST | +| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Monday, 02 Jan 2006 15:04:05 -0600 | 2006-01-02 15:04:05 -0600 -0600 | +| 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | +| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | +| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | +| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.52Z | 2009-08-12 22:15:09.52 +0000 UTC | +| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | +| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | +| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | +| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | +| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | +| 2014-04-01 12:01am | 2014-04-01 00:01:00 +0000 UTC | +| 2014-04-01 12:01:59.765 AM | 2014-04-01 00:01:59.765 +0000 UTC | +| 2014-04-01 12:01:59,765 | 2014-04-01 12:01:59.765 +0000 UTC | +| 2014-04-01 22:43 | 2014-04-01 22:43:00 +0000 UTC | +| 2014-04-01 22:43:22 | 2014-04-01 22:43:22 +0000 UTC | +| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | +| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 GMT | +| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | +| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | +| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | +| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | +| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 GMT | +| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | +| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 MSK | +| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 MSK | +| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | +| 2017-04-03 22:32:14.322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14,322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14:322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2018-09-30 08:09:13.123PM PMDT | 2018-09-30 20:09:13.123 +0000 PMDT | +| 2018-09-30 08:09:13.123 am AMT | 2018-09-30 08:09:13.123 +0000 AMT | +| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | +| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | +| 2014 | 2014-01-01 00:00:00 +0000 UTC | +| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | +| 2020-07-20+0800 | 2020-07-20 00:00:00 +0800 +0800 | +| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | +| 2013-February-03 09:07:08.123 | 2013-02-03 09:07:08.123 +0000 UTC | +| 03-Feb-13 | 2013-02-03 00:00:00 +0000 UTC | +| 03-Feb-2013 | 2013-02-03 00:00:00 +0000 UTC | +| 07-Feb-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 07-February-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 28-02-02 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-02 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 28-02-2002 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-2002 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.14 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 10:11:59 MST | 2014-03-31 10:11:59 +0000 MST | +| 03.31.2014 10:11:59.3186369Z | 2014-03-31 10:11:59.3186369 +0000 UTC | +| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | +| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | +| 2014.03.30 08:33pm | 2014-03-30 20:33:00 +0000 UTC | +| 2014.03.30T08:33:44.555 PM -0700 MST | 2014-03-30 20:33:44.555 -0700 MST | +| 2014.03.30-0600 | 2014-03-30 00:00:00 -0600 -0600 | +| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| 08:03:2012 | 2012-08-03 00:00:00 +0000 UTC | +| 08:04:2012 18:31:59+00:00 | 2012-08-04 18:31:59 +0000 UTC | +| 20140601 | 2014-06-01 00:00:00 +0000 UTC | +| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | +| 20140722105203.364 | 2014-07-22 10:52:03.364 +0000 UTC | +| 2014年4月25日 | 2014-04-25 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 19:17:22 -0700 | 2014-04-08 19:17:22 -0700 -0700 | +| 8-Mar-2018::14:09:27 | 2018-03-08 14:09:27 +0000 UTC | +| 08-03-2018::02:09:29 PM | 2018-03-08 14:09:29 +0000 UTC | +| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | +| 190910 11:51:49 | 2019-09-10 11:51:49 +0000 UTC | +| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | +| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | +| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | +| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | ++------------------------------------------------------------+-----------------------------------------+ */ ``` diff --git a/example/main.go b/example/main.go index d670f14..b10857c 100644 --- a/example/main.go +++ b/example/main.go @@ -60,6 +60,15 @@ var examples = []string{ "Mon Aug 10 15:44:11 UTC+0000 2015", // git log default date format "Thu Apr 7 15:13:13 2005 -0700", + // variants of git log default date format + "Thu Apr 7 15:13:13 2005 -07:00", + "Thu Apr 7 15:13:13 2005 -07:00 PST", + "Thu Apr 7 15:13:13 2005 -07:00 PST (Pacific Standard Time)", + "Thu Apr 7 15:13:13 -0700 2005", + "Thu Apr 7 15:13:13 -07:00 2005", + "Thu Apr 7 15:13:13 -0700 PST 2005", + "Thu Apr 7 15:13:13 -07:00 PST 2005", + "Thu Apr 7 15:13:13 PST 2005", // Variants of the above with a (full time zone description) "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", @@ -82,11 +91,15 @@ var examples = []string{ "04/08/2014, 22:05", "4/8/14 22:05", "04/2/2014 03:00:51", - "8/8/1965 12:00:00 AM", - "8/8/1965 01:00:01 PM", - "8/8/1965 01:00 PM", "8/8/1965 1:00 PM", + "8/8/1965 01:00 PM", "8/8/1965 12:00 AM", + "8/8/1965 12:00:00AM", + "8/8/1965 01:00:01 PM", + "8/8/1965 01:00:01PM -0700", + "8/8/1965 13:00:01 -0700 PST", + "8/8/1965 01:00:01 PM -0700 PST", + "8/8/1965 01:00:01 PM -07:00 PST (Pacific Standard Time)", "4/02/2014 03:00:51", "03/19/2012 10:11:59", "03/19/2012 10:11:59.3186369", @@ -241,173 +254,185 @@ func main() { } /* -+----------------------------------------------------------+-----------------------------------------+ -| Input | Parsed, and Output as %v | -+----------------------------------------------------------+-----------------------------------------+ -| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | -| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | -| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | -| Sept. 7, 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | -| Sep 7 2009 11:15:26.123 PM PST | 2009-09-07 23:15:26.123 +0000 PST | -| September 3rd, 2009 11:15:26.123456789pm | 2009-09-03 23:15:26.123456789 +0000 UTC | -| September 17 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | -| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | -| Sep 17, 2012 at 10:02am (EST) | 2012-09-17 10:02:00 +0000 EST | -| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | -| September 17 2012 5:00pm UTC-0700 | 2012-09-17 17:00:00 -0700 -0700 | -| September 17 2012 5:00pm GMT-0700 | 2012-09-17 17:00:00 -0700 -0700 | -| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | -| 7 Oct 1970 | 1970-10-07 00:00:00 +0000 UTC | -| 7 September 1970 23:15 | 1970-09-07 23:15:00 +0000 UTC | -| 7 September 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | -| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | -| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | -| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | -| 4th Sep 2012 | 2012-09-04 00:00:00 +0000 UTC | -| 1st February 2018 13:58:24 | 2018-02-01 13:58:24 +0000 UTC | -| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | -| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | -| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | -| Sun, 07 Jun 2020 00:00:00 +0100 | 2020-06-07 00:00:00 +0100 +0100 | -| Wed, 8 Feb 2023 19:00:46 +1100 (AEDT) | 2023-02-08 19:00:46 +1100 +1100 | -| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | -| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | -| Monday Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | -| Mon Jan 2 15:04:05.103786 2006 | 2006-01-02 15:04:05.103786 +0000 UTC | -| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | -| Mon 02 Jan 2006 03:04:05 PM UTC | 2006-01-02 15:04:05 +0000 UTC | -| Monday 02 Jan 2006 03:04:05 PM MST | 2006-01-02 15:04:05 +0000 MST | -| Mon Aug 10 15:44:11 UTC+0000 2015 | 2015-08-10 15:44:11 +0000 UTC | -| Thu Apr 7 15:13:13 2005 -0700 | 2005-04-07 15:13:13 -0700 -0700 | -| Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time) | 2015-07-03 06:04:07 -0700 PST | -| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 +0100 | -| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | -| 2013 May 2 | 2013-05-02 00:00:00 +0000 UTC | -| 2013 May 02 11:37:55 | 2013-05-02 11:37:55 +0000 UTC | -| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/January/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | -| 06/January/2008:08:11:17 -0700 | 2008-01-06 08:11:17 -0700 -0700 | -| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | -| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | -| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | -| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/08/2014, 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | -| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | -| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | -| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| Oct/ 7/1970 | 1970-10-07 00:00:00 +0000 UTC | -| Oct/03/1970 22:33:44 | 1970-10-03 22:33:44 +0000 UTC | -| February/03/1970 11:33:44.555 PM PST | 1970-02-03 23:33:44.555 +0000 PST | -| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| Fri, 03-Jul-15 08:08:08 CEST | 2015-07-03 08:08:08 +0000 CEST | -| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| Monday, 02 Jan 2006 15:04:05 -0600 | 2006-01-02 15:04:05 -0600 -0600 | -| 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | -| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | -| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | -| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | -| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | -| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | -| 2009-08-12T22:15:09.52Z | 2009-08-12 22:15:09.52 +0000 UTC | -| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | -| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | -| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | -| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | -| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | -| 2014-04-01 12:01am | 2014-04-01 00:01:00 +0000 UTC | -| 2014-04-01 12:01:59.765 AM | 2014-04-01 00:01:59.765 +0000 UTC | -| 2014-04-01 12:01:59,765 | 2014-04-01 12:01:59.765 +0000 UTC | -| 2014-04-01 22:43 | 2014-04-01 22:43:00 +0000 UTC | -| 2014-04-01 22:43:22 | 2014-04-01 22:43:22 +0000 UTC | -| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | -| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 GMT | -| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | -| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | -| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | -| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | -| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | -| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 GMT | -| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | -| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 MSK | -| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 MSK | -| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | -| 2017-04-03 22:32:14.322 CET | 2017-04-03 22:32:14.322 +0000 CET | -| 2017-04-03 22:32:14,322 CET | 2017-04-03 22:32:14.322 +0000 CET | -| 2017-04-03 22:32:14:322 CET | 2017-04-03 22:32:14.322 +0000 CET | -| 2018-09-30 08:09:13.123PM PMDT | 2018-09-30 20:09:13.123 +0000 PMDT | -| 2018-09-30 08:09:13.123 am AMT | 2018-09-30 08:09:13.123 +0000 AMT | -| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | -| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | -| 2014 | 2014-01-01 00:00:00 +0000 UTC | -| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | -| 2020-07-20+0800 | 2020-07-20 00:00:00 +0800 +0800 | -| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | -| 2013-February-03 09:07:08.123 | 2013-02-03 09:07:08.123 +0000 UTC | -| 03-Feb-13 | 2013-02-03 00:00:00 +0000 UTC | -| 03-Feb-2013 | 2013-02-03 00:00:00 +0000 UTC | -| 07-Feb-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | -| 07-February-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | -| 28-02-02 | 2002-02-28 00:00:00 +0000 UTC | -| 28-02-02 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | -| 28-02-2002 | 2002-02-28 00:00:00 +0000 UTC | -| 28-02-2002 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | -| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.14 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | -| 03.31.2014 10:11:59 MST | 2014-03-31 10:11:59 +0000 MST | -| 03.31.2014 10:11:59.3186369Z | 2014-03-31 10:11:59.3186369 +0000 UTC | -| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | -| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | -| 2014.03.30 08:33pm | 2014-03-30 20:33:00 +0000 UTC | -| 2014.03.30T08:33:44.555 PM -0700 MST | 2014-03-30 20:33:44.555 -0700 MST | -| 2014.03.30-0600 | 2014-03-30 00:00:00 -0600 -0600 | -| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | -| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | -| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | -| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | -| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 08:03:2012 | 2012-08-03 00:00:00 +0000 UTC | -| 08:04:2012 18:31:59+00:00 | 2012-08-04 18:31:59 +0000 UTC | -| 20140601 | 2014-06-01 00:00:00 +0000 UTC | -| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | -| 20140722105203.364 | 2014-07-22 10:52:03.364 +0000 UTC | -| 2014年4月25日 | 2014-04-25 00:00:00 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | -| 2014年04月08日 19:17:22 -0700 | 2014-04-08 19:17:22 -0700 -0700 | -| 8-Mar-2018::14:09:27 | 2018-03-08 14:09:27 +0000 UTC | -| 08-03-2018::02:09:29 PM | 2018-03-08 14:09:29 +0000 UTC | -| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | -| 190910 11:51:49 | 2019-09-10 11:51:49 +0000 UTC | -| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | -| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | -| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | -| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | -+----------------------------------------------------------+-----------------------------------------+ ++------------------------------------------------------------+-----------------------------------------+ +| Input | Parsed, and Output as %v | ++------------------------------------------------------------+-----------------------------------------+ +| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC | +| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC | +| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC | +| Sept. 7, 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| Sep 7 2009 11:15:26.123 PM PST | 2009-09-07 23:15:26.123 +0000 PST | +| September 3rd, 2009 11:15:26.123456789pm | 2009-09-03 23:15:26.123456789 +0000 UTC | +| September 17 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | +| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC | +| Sep 17, 2012 at 10:02am (EST) | 2012-09-17 10:02:00 +0000 EST | +| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | +| September 17 2012 5:00pm UTC-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| September 17 2012 5:00pm GMT-0700 | 2012-09-17 17:00:00 -0700 -0700 | +| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC | +| 7 Oct 1970 | 1970-10-07 00:00:00 +0000 UTC | +| 7 September 1970 23:15 | 1970-09-07 23:15:00 +0000 UTC | +| 7 September 1970 11:15:26pm | 1970-09-07 23:15:26 +0000 UTC | +| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC | +| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC | +| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC | +| 4th Sep 2012 | 2012-09-04 00:00:00 +0000 UTC | +| 1st February 2018 13:58:24 | 2018-02-01 13:58:24 +0000 UTC | +| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | +| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | +| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | +| Sun, 07 Jun 2020 00:00:00 +0100 | 2020-06-07 00:00:00 +0100 +0100 | +| Wed, 8 Feb 2023 19:00:46 +1100 (AEDT) | 2023-02-08 19:00:46 +1100 +1100 | +| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC | +| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST | +| Monday Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon Jan 2 15:04:05.103786 2006 | 2006-01-02 15:04:05.103786 +0000 UTC | +| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 | +| Mon 02 Jan 2006 03:04:05 PM UTC | 2006-01-02 15:04:05 +0000 UTC | +| Monday 02 Jan 2006 03:04:05 PM MST | 2006-01-02 15:04:05 +0000 MST | +| Mon Aug 10 15:44:11 UTC+0000 2015 | 2015-08-10 15:44:11 +0000 UTC | +| Thu Apr 7 15:13:13 2005 -0700 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 2005 -07:00 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 2005 -07:00 PST | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 2005 -07:00 PST (Pacific Standard Time) | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 -0700 2005 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 -07:00 2005 | 2005-04-07 15:13:13 -0700 -0700 | +| Thu Apr 7 15:13:13 -0700 PST 2005 | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 -07:00 PST 2005 | 2005-04-07 15:13:13 -0700 PST | +| Thu Apr 7 15:13:13 PST 2005 | 2005-04-07 15:13:13 +0000 PST | +| Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time) | 2015-07-03 06:04:07 -0700 PST | +| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 +0100 | +| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 | +| 2013 May 2 | 2013-05-02 00:00:00 +0000 UTC | +| 2013 May 02 11:37:55 | 2013-05-02 11:37:55 +0000 UTC | +| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 | +| 06/January/2008:08:11:17 -0700 | 2008-01-06 08:11:17 -0700 -0700 | +| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC | +| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC | +| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC | +| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/08/2014, 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC | +| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC | +| 8/8/1965 12:00:00AM | 1965-08-08 00:00:00 +0000 UTC | +| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC | +| 8/8/1965 01:00:01PM -0700 | 1965-08-08 13:00:01 -0700 -0700 | +| 8/8/1965 13:00:01 -0700 PST | 1965-08-08 13:00:01 -0700 PST | +| 8/8/1965 01:00:01 PM -0700 PST | 1965-08-08 13:00:01 -0700 PST | +| 8/8/1965 01:00:01 PM -07:00 PST (Pacific Standard Time) | 1965-08-08 13:00:01 -0700 PST | +| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Oct/ 7/1970 | 1970-10-07 00:00:00 +0000 UTC | +| Oct/03/1970 22:33:44 | 1970-10-03 22:33:44 +0000 UTC | +| February/03/1970 11:33:44.555 PM PST | 1970-02-03 23:33:44.555 +0000 PST | +| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| Fri, 03-Jul-15 08:08:08 CEST | 2015-07-03 08:08:08 +0000 CEST | +| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| Monday, 02 Jan 2006 15:04:05 -0600 | 2006-01-02 15:04:05 -0600 -0600 | +| 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | +| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | +| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | +| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC | +| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC | +| 2009-08-12T22:15:09.52Z | 2009-08-12 22:15:09.52 +0000 UTC | +| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 | +| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 | +| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC | +| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC | +| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC | +| 2014-04-01 12:01am | 2014-04-01 00:01:00 +0000 UTC | +| 2014-04-01 12:01:59.765 AM | 2014-04-01 00:01:59.765 +0000 UTC | +| 2014-04-01 12:01:59,765 | 2014-04-01 12:01:59.765 +0000 UTC | +| 2014-04-01 22:43 | 2014-04-01 22:43:00 +0000 UTC | +| 2014-04-01 22:43:22 | 2014-04-01 22:43:22 +0000 UTC | +| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC | +| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 GMT | +| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC | +| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 | +| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 | +| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC | +| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC | +| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 GMT | +| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC | +| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 MSK | +| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 MSK | +| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC | +| 2017-04-03 22:32:14.322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14,322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2017-04-03 22:32:14:322 CET | 2017-04-03 22:32:14.322 +0000 CET | +| 2018-09-30 08:09:13.123PM PMDT | 2018-09-30 20:09:13.123 +0000 PMDT | +| 2018-09-30 08:09:13.123 am AMT | 2018-09-30 08:09:13.123 +0000 AMT | +| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC | +| 2014-04 | 2014-04-01 00:00:00 +0000 UTC | +| 2014 | 2014-01-01 00:00:00 +0000 UTC | +| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 | +| 2020-07-20+0800 | 2020-07-20 00:00:00 +0800 +0800 | +| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC | +| 2013-February-03 09:07:08.123 | 2013-02-03 09:07:08.123 +0000 UTC | +| 03-Feb-13 | 2013-02-03 00:00:00 +0000 UTC | +| 03-Feb-2013 | 2013-02-03 00:00:00 +0000 UTC | +| 07-Feb-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 07-February-2004 09:07:07 +0200 | 2004-02-07 09:07:07 +0200 +0200 | +| 28-02-02 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-02 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 28-02-2002 | 2002-02-28 00:00:00 +0000 UTC | +| 28-02-2002 15:16:17 | 2002-02-28 15:16:17 +0000 UTC | +| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.14 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC | +| 03.31.2014 10:11:59 MST | 2014-03-31 10:11:59 +0000 MST | +| 03.31.2014 10:11:59.3186369Z | 2014-03-31 10:11:59.3186369 +0000 UTC | +| 2014.03 | 2014-03-01 00:00:00 +0000 UTC | +| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC | +| 2014.03.30 08:33pm | 2014-03-30 20:33:00 +0000 UTC | +| 2014.03.30T08:33:44.555 PM -0700 MST | 2014-03-30 20:33:44.555 -0700 MST | +| 2014.03.30-0600 | 2014-03-30 00:00:00 -0600 -0600 | +| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC | +| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC | +| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | +| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | +| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | +| 08:03:2012 | 2012-08-03 00:00:00 +0000 UTC | +| 08:04:2012 18:31:59+00:00 | 2012-08-04 18:31:59 +0000 UTC | +| 20140601 | 2014-06-01 00:00:00 +0000 UTC | +| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC | +| 20140722105203.364 | 2014-07-22 10:52:03.364 +0000 UTC | +| 2014年4月25日 | 2014-04-25 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 19:17:22 -0700 | 2014-04-08 19:17:22 -0700 -0700 | +| 8-Mar-2018::14:09:27 | 2018-03-08 14:09:27 +0000 UTC | +| 08-03-2018::02:09:29 PM | 2018-03-08 14:09:29 +0000 UTC | +| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC | +| 190910 11:51:49 | 2019-09-10 11:51:49 +0000 UTC | +| 1332151919 | 2012-03-19 10:11:59 +0000 UTC | +| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC | +| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC | +| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC | ++------------------------------------------------------------+-----------------------------------------+ */ diff --git a/parseany.go b/parseany.go index b96a8b7..7e6327f 100644 --- a/parseany.go +++ b/parseany.go @@ -123,21 +123,19 @@ const ( timeWsAlpha timeWsAlphaRParen timeWsAlphaWs - timeWsAlphaZoneOffset // 6 + timeWsAlphaWsYear + timeWsAlphaZoneOffset // 7 timeWsAlphaZoneOffsetWs timeWsAlphaZoneOffsetWsYear - timeWsAlphaZoneOffsetWsExtra + timeWsOffsetWsTZDescInParen // overloaded, can come from timeWsAlphaWs, timeWsAlphaZoneOffsetWs, timeWsOffsetWs, timeWsOffsetWsAlphaZoneWs timeWsAMPMMaybe - timeWsAMPM // 11 - timeWsOffset - timeWsOffsetWs // 13 - timeWsOffsetColonAlpha - timeWsOffsetColon - timeWsYear // 16 - timeWsYearOffset - timeOffset - timeOffsetColon - timeOffsetColonAlpha + timeWsAMPM // 12 + timeWsOffset // overloaded, can come from timeWs or timeWsYear + timeWsOffsetWs // 14 + timeWsOffsetWsYear // overloaded, can come from timeWsOffsetWs or timeWsOffsetWsAlphaZoneWs (ensures year is only set once) + timeWsOffsetWsAlphaZone + timeWsOffsetWsAlphaZoneWs + timeWsYear timePeriod timePeriodAMPM timeZ @@ -613,13 +611,8 @@ iterRunes: case dateYearDashDashOffset: // 2020-07-20+00:00 - switch r { - case ':': - p.set(p.offseti, "-07:00") - default: - if !unicode.IsDigit(r) { - return p, p.unknownErr(datestr) - } + if r != ':' && !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) } case dateYearDashAlpha: @@ -1234,13 +1227,8 @@ iterRunes: case dateDigitDotDotOffset: // 2020-07-20+00:00 - switch r { - case ':': - p.set(p.offseti, "-07:00") - default: - if !unicode.IsDigit(r) { - return p, p.unknownErr(datestr) - } + if r != ':' && !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) } case dateAlpha: @@ -1251,6 +1239,7 @@ iterRunes: // Mon Jan 02 15:04:05 2006 -0700 // Mon Aug 10 15:44:11 UTC+0100 2015 // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) + // Fri Jul 03 2015 18:04:07 GMT+01:00 (GMT Daylight Time) // dateAlphaWsDigit // May 8, 2009 5:57:51 PM // oct 1, 1970 @@ -1374,6 +1363,7 @@ iterRunes: // Mon Jan 02 15:04:05 -0700 2006 // Mon Jan 02 15:04:05 2006 -0700 // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) + // Fri Jul 03 2015 18:04:07 GMT+01:00 (GMT Daylight Time) // Mon Aug 10 15:44:11 UTC+0100 2015 // dateAlphaWsDigit // May 8, 2009 5:57:51 PM @@ -1445,6 +1435,7 @@ iterRunes: // May 8 17:57:51 2009 // May 08 17:57:51 2009 // Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) + // Jul 03 2015 18:04:07 GMT+01:00 (GMT Daylight Time) if r == ':' { // Guessed wrong; was not a year p.yeari = 0 @@ -1702,7 +1693,6 @@ iterRunes: } } - iterTimeRunes: for ; i < len(p.datestr); i++ { r := rune(p.datestr[i]) @@ -1718,14 +1708,15 @@ iterRunes: // 05:24:37 PM // 06:20:00 UTC // 06:20:00 UTC-05 - // 00:12:00 +0000 UTC - // 22:18:00 +0000 UTC m=+0.000000001 - // 15:04:05 -0700 - // 15:04:05 -07:00 - // 15:04:05 2008 - // timeOffset - // 03:21:51+00:00 - // 19:55:00+0100 + // timeWsYear + // 15:04:05 2008 + // timeWsOffset + // 00:12:00 +0000 UTC + // 22:18:00 +0000 UTC m=+0.000000001 + // 03:21:51+00:00 + // 19:55:00+0100 + // 15:04:05 -0700 + // 15:04:05 -07:00 // timePeriod // 17:24:37.3186369 // 00:07:31.945167 @@ -1738,7 +1729,8 @@ iterRunes: switch r { case '-', '+': // 03:21:51+00:00 - p.stateTime = timeOffset + p.offseti = i + p.stateTime = timeWsOffset if p.seci == 0 { // 22:18+0530 p.minlen = i - p.mini @@ -1746,10 +1738,11 @@ iterRunes: p.seclen = i - p.seci } else if p.msi > 0 && p.mslen == 0 { p.mslen = i - p.msi + } else if p.parsedAMPM { + // time fully parsed, plus AM/PM indicator, this is OK } else { return p, p.unknownErr(datestr) } - p.offseti = i case '.', ',': // NOTE: go 1.20 can now parse a string that has a comma delimiter properly p.stateTime = timePeriod @@ -1782,9 +1775,9 @@ iterRunes: return p, p.unknownErr(datestr) } } else { - // Could be AM/PM + // Could be AM/PM (followed by whitespace or an offset) isLower := r == 'a' || r == 'p' - isTwoLetterWord := ((i+2) == len(p.datestr) || p.nextIs(i+1, ' ')) + isTwoLetterWord := ((i+2) == len(p.datestr) || (len(p.datestr) > i+2 && (p.datestr[i+2] == ' ' || p.datestr[i+2] == '+' || p.datestr[i+2] == '-'))) switch { case isLower && p.nextIs(i, 'm') && isTwoLetterWord && !p.parsedAMPM: if !p.coalesceTime(i) { @@ -1838,37 +1831,27 @@ iterRunes: p.stateTime = timePeriod } } - case timeOffset: - // 19:55:00+0100 - // timeOffsetColon - // 15:04:05+07:00 - // 15:04:05-07:00 - if r == ':' { - p.stateTime = timeOffsetColon - } else if !unicode.IsDigit(r) { - return p, p.unknownErr(datestr) - } case timeWs: // timeWsAlpha // 06:20:00 UTC // 06:20:00 UTC-05 // 15:44:11 UTC+0100 2015 // 18:04:07 GMT+0100 (GMT Daylight Time) + // 18:04:07 GMT+01:00 (GMT Daylight Time) // 17:57:51 MST 2009 // timeWsAMPMMaybe // 05:24:37 PM // timeWsOffset // 15:04:05 -0700 // 00:12:00 +0000 UTC - // timeWsOffsetColon - // 15:04:05 -07:00 - // 17:57:51 -0700 2009 - // timeWsOffsetColonAlpha - // 00:12:00 +00:00 UTC + // 15:04:05 -07:00 + // 17:57:51 -0700 2009 + // 00:12:00 +00:00 UTC // timeWsYear // 00:12:00 2008 - // timeWsYearOffset + // merge to state timeWsOffset // 00:12:00 2008 -0700 + // 00:12:00 2008 -07:00 // timeZ // 15:04:05.99Z switch r { @@ -1889,8 +1872,12 @@ iterRunes: p.stateTime = timeWsAlpha } else if unicode.IsDigit(r) { // 00:12:00 2008 - p.stateTime = timeWsYear - p.yeari = i + if p.yeari == 0 { + p.stateTime = timeWsYear + p.yeari = i + } else { + return p, p.unknownErr(datestr) + } } else if r == '(' { // (start of time zone description, ignore) } else { @@ -1898,19 +1885,33 @@ iterRunes: } } case timeWsYear: - // timeWsYearOffset + // merge to state timeWsOffset // 00:12:00 2008 -0700 + // 00:12:00 2008 -07:00 switch r { case ' ': - p.yearlen = i - p.yeari - if !p.setYear() { - return p, p.unknownErr(datestr) + if p.yearlen == 0 { + p.yearlen = i - p.yeari + if !p.setYear() { + return p, p.unknownErr(datestr) + } + } else { + // allow multiple trailing whitespace } case '+', '-': - p.offseti = i - p.stateTime = timeWsYearOffset + // The year must be followed by a space before an offset! + if p.yearlen > 0 { + p.offseti = i + p.stateTime = timeWsOffset + } else { + return p, p.unknownErr(datestr) + } default: - if !unicode.IsDigit(r) { + if unicode.IsDigit(r) { + if p.yearlen > 0 { + return p, p.unknownErr(datestr) + } + } else { return p, p.unknownErr(datestr) } } @@ -1919,11 +1920,13 @@ iterRunes: // 06:20:00 UTC-05 // 06:20:00 (EST) // timeWsAlphaWs - // 17:57:51 MST 2009 + // timeWsAlphaWsYear + // 17:57:51 MST 2009 // timeWsAlphaZoneOffset // timeWsAlphaZoneOffsetWs // timeWsAlphaZoneOffsetWsExtra // 18:04:07 GMT+0100 (GMT Daylight Time) + // 18:04:07 GMT+01:00 (GMT Daylight Time) // timeWsAlphaZoneOffsetWsYear // 15:44:11 UTC+0100 2015 switch r { @@ -1962,7 +1965,6 @@ iterRunes: } if r == ' ' { p.stateTime = timeWsAlphaWs - p.yeari = i + 1 } else { // 06:20:00 (EST) // This must be the end of the datetime or the format is unknown @@ -1974,7 +1976,23 @@ iterRunes: } } case timeWsAlphaWs: + // timeWsAlphaWsYear // 17:57:51 MST 2009 + if unicode.IsDigit(r) { + if p.yeari == 0 { + p.yeari = i + } else { + return p, p.unknownErr(datestr) + } + p.stateTime = timeWsAlphaWsYear + } else if r == '(' { + p.extra = i - 1 + p.stateTime = timeWsOffsetWsTZDescInParen + } + case timeWsAlphaWsYear: + if !unicode.IsDigit(r) { + return p, p.unknownErr(datestr) + } case timeWsAlphaZoneOffset: // 06:20:00 UTC-05 @@ -1982,13 +2000,13 @@ iterRunes: // timeWsAlphaZoneOffsetWs // timeWsAlphaZoneOffsetWsExtra // 18:04:07 GMT+0100 (GMT Daylight Time) + // 18:04:07 GMT+01:00 (GMT Daylight Time) // timeWsAlphaZoneOffsetWsYear // 15:44:11 UTC+0100 2015 switch r { case ' ': - p.set(p.offseti, "-0700") - if p.yeari == 0 { - p.yeari = i + 1 + if err := p.setTZOffset(i, datestr); err != nil { + return p, err } p.stateTime = timeWsAlphaZoneOffsetWs default: @@ -2000,14 +2018,36 @@ iterRunes: // timeWsAlphaZoneOffsetWs // timeWsAlphaZoneOffsetWsExtra // 18:04:07 GMT+0100 (GMT Daylight Time) + // 18:04:07 GMT+01:00 (GMT Daylight Time) // timeWsAlphaZoneOffsetWsYear // 15:44:11 UTC+0100 2015 if unicode.IsDigit(r) { - p.stateTime = timeWsAlphaZoneOffsetWsYear - } else { + if p.yeari == 0 { + p.yeari = i + p.stateTime = timeWsAlphaZoneOffsetWsYear + } else { + return p, p.unknownErr(datestr) + } + } else if r == '(' { p.extra = i - 1 - p.stateTime = timeWsAlphaZoneOffsetWsExtra + p.stateTime = timeWsOffsetWsTZDescInParen + } else { + return p, p.unknownErr(datestr) + } + case timeWsOffsetWsTZDescInParen: + // timeWsAlphaZoneOffsetWs + // timeWsAlphaZoneOffsetWsExtra + // 18:04:07 GMT+0100 (GMT Daylight Time) + // 18:04:07 GMT+01:00 (GMT Daylight Time) + if r == '(' { + return p, p.unknownErr(datestr) + } else if r == ')' { + // must be the end + if i != len(p.datestr)-1 { + return p, p.unknownErr(datestr) + } } + // any other char is OK case timeWsAlphaZoneOffsetWsYear: // 15:44:11 UTC+0100 2015 if unicode.IsDigit(r) { @@ -2069,18 +2109,24 @@ iterRunes: // timeWsOffsetWsOffset // 17:57:51 -0700 -07 // timeWsOffsetWs - // 17:57:51 -0700 2009 - // 00:12:00 +0000 UTC - // timeWsOffsetColon // 15:04:05 -07:00 - // timeWsOffsetColonAlpha + // timeWsOffsetWsYear + // 17:57:51 -0700 2009 + // timeWsOffsetWsAlphaZone + // 00:12:00 +0000 UTC // 00:12:00 +00:00 UTC + // timeWsOffsetWsAlphaZoneWs --> timeWsOffsetWsYear (overloaded) + // 00:12:00 +00:00 UTC 2009 + // timeWsOffsetWsTZDescInParen + // 00:12:00 +00:00 UTC (Universal Coordinated Time) switch r { case ':': - p.stateTime = timeWsOffsetColon + // Parse the case where an offset has a colon the same as timeWsOffset! + // continue case ' ': - p.set(p.offseti, "-0700") - p.yeari = i + 1 + if err := p.setTZOffset(i, datestr); err != nil { + return p, err + } p.stateTime = timeWsOffsetWs default: if !unicode.IsDigit(r) { @@ -2088,73 +2134,139 @@ iterRunes: } } case timeWsOffsetWs: - // 17:57:51 -0700 2009 - // 00:12:00 +0000 UTC - // 22:18:00.001 +0000 UTC m=+0.000000001 + // timeWsOffsetWs + // timeWsOffsetWsYear + // 17:57:51 -0700 2009 + // 17:57:51 -07:00 2009 + // timeWsOffsetWsAlphaZone + // 00:12:00 +0000 UTC + // 00:12:00 +00:00 UTC + // 22:18:00.001 +0000 UTC m=+0.000000001 + // 22:18:00.001 +00:00 UTC m=+0.000000001 // w Extra // 17:57:51 -0700 -07 + // 17:57:51 -07:00 -07 + // 22:18:00.001 +0000 m=+0.000000001 + // 00:00:00 +0300 (European Daylight Time) + // 00:00:00 +03:00 (European Daylight Time) switch r { - case '=': - // eff you golang - if p.datestr[i-1] == 'm' { - p.extra = i - 2 - p.trimExtra(false) - } else { - return p, p.unknownErr(datestr) - } - case '+', '-', '(': + case '+', '-': // This really doesn't seem valid, but for some reason when round-tripping a go date // their is an extra +03 printed out. seems like go bug to me, but, parsing anyway. // 00:00:00 +0300 +03 // 00:00:00 +0300 +0300 + // 00:00:00 +03:00 +03 + // 00:00:00 +03:00 +0300 p.extra = i - 1 - p.stateTime = timeWsOffset p.trimExtra(false) + p.stateTime = timeWsOffset + case '(': + // 00:00:00 +0300 (European Daylight Time) + // 00:00:00 +03:00 (European Daylight Time) + p.extra = i - 1 + p.stateTime = timeWsOffsetWsTZDescInParen case ' ': // continue default: switch { case unicode.IsDigit(r): - p.yearlen = i - p.yeari + 1 - if p.yearlen == 4 { - if !p.setYear() { - return p, p.unknownErr(datestr) - } - } else if p.yearlen > 4 { + if p.yeari == 0 { + p.yeari = i + } else { return p, p.unknownErr(datestr) } + p.stateTime = timeWsOffsetWsYear case unicode.IsLetter(r): - // 15:04:05 -0700 MST - if p.tzi == 0 { - p.tzi = i + if r == 'm' && p.nextIs(i, '=') { + // 22:18:00.001 +0000 UTC m=+0.000000001 + // 22:18:00.001 +00:00 UTC m=+0.000000001 + // very strange syntax! + p.extra = i - 1 + p.trimExtra(false) + } else { + // 15:04:05 -0700 MST + // 15:04:05 -07:00 MST + // 15:04:05 -07:00 MST (Mountain Standard Time) + // 15:04:05 -07:00 MST 2006 + if p.tzi == 0 { + p.tzi = i + } else { + return p, p.unknownErr(datestr) + } + p.stateTime = timeWsOffsetWsAlphaZone } default: return p, p.unknownErr(datestr) } } - case timeOffsetColon, timeWsOffsetColon: - // timeOffsetColon - // 15:04:05-07:00 - // timeOffsetColonAlpha - // 2015-02-18 00:12:00+00:00 UTC - // timeWsOffsetColon - // 15:04:05 -07:00 - // timeWsOffsetColonAlpha - // 2015-02-18 00:12:00 +00:00 UTC - if unicode.IsLetter(r) { - // TODO: do we need to handle the m=+0.000000001 case? - // 2015-02-18 00:12:00 +00:00 UTC - if p.stateTime == timeWsOffsetColon { - p.stateTime = timeWsOffsetColonAlpha + case timeWsOffsetWsAlphaZone: + switch { + case r == ' ': + if p.tzi > 0 { + p.tzlen = i - p.tzi + switch p.tzlen { + case 3: + // 13:31:51.999 +01:00 CET + p.set(p.tzi, "MST") + case 4: + // 13:31:51.999 +01:00 CEST + p.set(p.tzi, "MST ") + default: + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen]) + } + } } else { - p.stateTime = timeOffsetColonAlpha + return p, p.unknownErr(datestr) } - p.tzi = i - break iterTimeRunes - } else if r != ' ' && !unicode.IsDigit(r) { + p.stateTime = timeWsOffsetWsAlphaZoneWs + case unicode.IsLetter(r): + // continue + } + + case timeWsOffsetWsAlphaZoneWs: + switch r { + case '=': + // 22:18:00.001 +0000 UTC m=+0.000000001 + // very strange syntax! + if p.datestr[i-1] == 'm' { + p.extra = i - 2 + p.trimExtra(false) + } else { + return p, p.unknownErr(datestr) + } + case '(': + // 00:00:00 -0600 MDT (Mountain Daylight Time) + // 00:00:00 -06:00 MDT (Mountain Daylight Time) + p.extra = i - 1 + p.stateTime = timeWsOffsetWsTZDescInParen + case ' ': + // continue (extra whitespace) + case 'm': + if !p.nextIs(i, '=') { + return p, p.unknownErr(datestr) + } + default: + if unicode.IsDigit(r) { + if p.yeari == 0 { + p.yeari = i + } else { + return p, p.unknownErr(datestr) + } + p.stateTime = timeWsOffsetWsYear + } else { + return p, p.unknownErr(datestr) + } + } + + case timeWsOffsetWsYear: + if !unicode.IsDigit(r) { return p, p.unknownErr(datestr) } + case timePeriod: // 15:04:05.999999999 // 15:04:05.999999999 @@ -2167,7 +2279,7 @@ iterRunes: // 00:07:31.945167 // 18:31:59.257000000 // 00:00:00.000 - // (note: if we have an offset (+/-) or whitespace (Ws) after this state, re-enter the timeWs or timeOffset + // (note: if we have an offset (+/-) or whitespace (Ws) after this state, re-enter the timeWs or timeWsOffset // state above so that we do not have to duplicate all of the logic again for this parsing just because we // have parsed a fractional second...) switch r { @@ -2180,7 +2292,7 @@ iterRunes: case '+', '-': p.mslen = i - p.msi p.offseti = i - p.stateTime = timeOffset + p.stateTime = timeWsOffset case 'Z': p.stateTime = timeZ p.mslen = i - p.msi @@ -2229,7 +2341,7 @@ iterRunes: p.stateTime = timeWs case '+', '-': p.offseti = i - p.stateTime = timeOffset + p.stateTime = timeWsOffset default: return p, p.unexpectedTail(i) } @@ -2240,43 +2352,6 @@ iterRunes: } switch p.stateTime { - case timeOffsetColonAlpha, timeWsOffsetColonAlpha: - // process offset - offsetLen := i - p.offseti - switch offsetLen { - case 6, 7: - // may or may not have a space on the end - if offsetLen == 7 { - if p.datestr[p.offseti+6] != ' ' { - if p.simpleErrorMessages { - return p, ErrUnknownTZOffset - } else { - return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:p.offseti+offsetLen]) - } - } - } - p.set(p.offseti, "-07:00") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTZOffset - } else { - return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:p.offseti+offsetLen]) - } - } - // process timezone - switch len(p.datestr) - p.tzi { - case 3: - // 13:31:51.999 +01:00 CET - p.set(p.tzi, "MST") - case 4: - p.set(p.tzi, "MST ") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTimeZone - } else { - return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) - } - } case timeWsAlpha: switch len(p.datestr) - p.tzi { case 3: @@ -2295,33 +2370,21 @@ iterRunes: case timeWsAlphaRParen: // nothing extra to do - case timeWsAlphaWs: + case timeWsYear, timeWsAlphaWsYear: p.yearlen = i - p.yeari if !p.setYear() { return p, p.unknownErr(datestr) } - case timeWsYear: - p.yearlen = i - p.yeari - if !p.setYear() { + case timeWsOffsetWsTZDescInParen: + // The last character must be a closing ')' + if len(p.datestr) <= 0 || p.datestr[i-1] != ')' { return p, p.unknownErr(datestr) } - case timeWsAlphaZoneOffsetWsExtra: p.trimExtra(false) case timeWsAlphaZoneOffset: // 06:20:00 UTC-05 - switch i - p.offseti { - case 2, 3, 4: - p.set(p.offseti, "-07") - case 5: - p.set(p.offseti, "-0700") - case 6: - p.set(p.offseti, "-07:00") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTZOffset - } else { - return p, fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:i]) - } + if err := p.setTZOffset(i, datestr); err != nil { + return p, err } case timePeriod: @@ -2333,24 +2396,26 @@ iterRunes: return p, fmt.Errorf("%w in %q near %q", ErrFracSecTooLong, datestr, p.datestr[p.msi:p.mslen]) } } - case timeOffset, timeWsOffset, timeWsYearOffset: - switch len(p.datestr) - p.offseti { - case 3: - // 19:55:00+01 (or 19:55:00 +01) - p.set(p.offseti, "-07") - case 5: - // 19:55:00+0100 (or 19:55:00 +0100) - p.set(p.offseti, "-0700") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTZOffset - } else { - return p, fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:]) - } + case timeWsOffset: + // 17:57:51 -07:00 (or 19:55:00.799 +01:00) + // 15:04:05+07:00 (or 19:55:00.799+01:00) + // 17:57:51 2006 -07:00 (or 19:55:00.799 +01:00) + if err := p.setTZOffset(len(p.datestr), datestr); err != nil { + return p, err } - case timeWsOffsetWs: + case timeWsOffsetWsYear: // 17:57:51 -0700 2009 + p.yearlen = len(p.datestr) - p.yeari + if p.yearlen == 4 { + if !p.setYear() { + return p, p.unknownErr(datestr) + } + } else if p.yearlen > 4 { + return p, p.unknownErr(datestr) + } + + case timeWsOffsetWsAlphaZone: // 00:12:00 +0000 UTC if p.tzi > 0 { switch len(p.datestr) - p.tzi { @@ -2367,19 +2432,8 @@ iterRunes: return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) } } - } - case timeOffsetColon, timeWsOffsetColon: - // 17:57:51 -07:00 (or 19:55:00.799 +01:00) - // 15:04:05+07:00 (or 19:55:00.799+01:00) - switch len(p.datestr) - p.offseti { - case 6: - p.set(p.offseti, "-07:00") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTZOffset - } else { - return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:]) - } + } else { + return p, p.unknownErr(datestr) } } if !p.coalesceTime(i) { @@ -2458,13 +2512,8 @@ iterRunes: case dateYearDashDashOffset: /// 2020-07-20+00:00 - switch len(p.datestr) - p.offseti { - case 5: - p.set(p.offseti, "-0700") - case 6: - p.set(p.offseti, "-07:00") - default: - return p, p.unknownErr(datestr) + if err := p.setTZOffset(len(p.datestr), datestr); err != nil { + return p, err } return p, nil @@ -2555,13 +2604,8 @@ iterRunes: case dateDigitDotDotOffset: // 2020.07.20+00:00 - switch len(p.datestr) - p.offseti { - case 5: - p.set(p.offseti, "-0700") - case 6: - p.set(p.offseti, "-07:00") - default: - return p, p.unknownErr(datestr) + if err := p.setTZOffset(len(p.datestr), datestr); err != nil { + return p, err } return p, nil @@ -2840,6 +2884,7 @@ func (p *parser) set(start int, val string) { p.formatSetLen = endingPos } } + func (p *parser) setMonth() bool { if p.molen == 2 { p.set(p.moi, "01") @@ -2863,6 +2908,7 @@ func (p *parser) setDay() bool { return false } } + func (p *parser) setYear() bool { if p.yearlen == 2 { p.set(p.yeari, "06") @@ -2875,6 +2921,25 @@ func (p *parser) setYear() bool { } } +func (p *parser) setTZOffset(i int, datestr string) error { + offsetlen := i - p.offseti + switch offsetlen { + case 3: + p.set(p.offseti, "-07") + case 5: + p.set(p.offseti, "-0700") + case 6: + p.set(p.offseti, "-07:00") + default: + if p.simpleErrorMessages { + return ErrUnknownTZOffset + } else { + return fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:i]) + } + } + return nil +} + // Find the proper end of the current component (scanning chars starting from start and going // up until the end, and either returning at end or returning the first character that is // not allowed, as determined by allowNumeric, allowAlpha, and allowOther) diff --git a/parseany_test.go b/parseany_test.go index 54a83d7..52e4e42 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -105,11 +105,19 @@ var testInputs = []dateTest{ {in: "Mon Aug 1 5:44:11 CEST+0200 2015", out: "2015-08-01 03:44:11 +0000 UTC", zname: "CEST"}, // ?? {in: "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", out: "2015-07-03 17:04:07 +0000 UTC"}, + {in: "Fri Jul 03 2015 18:04:07 GMT+01:00 (GMT Daylight Time)", out: "2015-07-03 17:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 GMT+0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, {in: "Fri Jul 03 2015 18:04:07 UTC+0100 (GMT Daylight Time)", out: "2015-07-03 17:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 UTC+0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "PST"}, + {in: "Fri Jul 3 2015 06:04:07 PST-07:00 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "PST"}, {in: "Fri Jul 3 2015 06:04:07 CEST-0700 (Central European Summer Time)", out: "2015-07-03 13:04:07 +0000 UTC", zname: "CEST"}, + {in: "Fri Jul 03 2015 18:04:07 GMT (GMT Daylight Time)", out: "2015-07-03 18:04:07 +0000 UTC", zname: "GMT"}, + {in: "Fri Jul 3 2015 06:04:07 +0100 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, + {in: "Fri Jul 03 2015 18:04:07 UTC (GMT Daylight Time)", out: "2015-07-03 18:04:07 +0000 UTC"}, + {in: "Fri Jul 3 2015 06:04:07 +01:00 (GMT Daylight Time)", out: "2015-07-03 05:04:07 +0000 UTC"}, + {in: "Fri Jul 3 2015 06:04:07 PST (Pacific Daylight Time)", out: "2015-07-03 06:04:07 +0000 UTC", zname: "PST"}, + {in: "Fri Jul 3 2015 06:04:07 -07:00 (Pacific Daylight Time)", out: "2015-07-03 13:04:07 +0000 UTC"}, {in: "Fri Jul 3 2015", out: "2015-07-03 00:00:00 +0000 UTC"}, {in: "Fri Jul 3 2015 11:15:26pm", out: "2015-07-03 23:15:26 +0000 UTC"}, // Month dd, yyyy at time @@ -342,6 +350,14 @@ var testInputs = []dateTest{ {in: "04/02/2014 04:08:09.123 AM", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "04/02/2014 04:08:09.123PM", out: "2014-04-02 16:08:09.123 +0000 UTC"}, {in: "04/02/2014 04:08:09.123 PM", out: "2014-04-02 16:08:09.123 +0000 UTC"}, + {in: "04/02/2014 04:08:09pm-0700", out: "2014-04-02 23:08:09 +0000 UTC"}, + {in: "04/02/2014 04:08:09PM-0700 PST", out: "2014-04-02 23:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09pm-0700 PST (Pacific Standard Time)", out: "2014-04-02 23:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09pm-0700 (Pacific Standard Time)", out: "2014-04-02 23:08:09 +0000 UTC"}, + {in: "04/02/2014 04:08:09am+02:00", out: "2014-04-02 02:08:09 +0000 UTC"}, + {in: "04/02/2014 04:08:09AM+02:00 CET", out: "2014-04-02 02:08:09 +0000 UTC", zname: "CET"}, + {in: "04/02/2014 04:08:09am+02:00 CET (Central European Time)", out: "2014-04-02 02:08:09 +0000 UTC", zname: "CET"}, + {in: "04/02/2014 04:08:09am+02:00 (Central European Time)", out: "2014-04-02 02:08:09 +0000 UTC"}, // yyyy/mm/dd {in: "2014/04/02", out: "2014-04-02 00:00:00 +0000 UTC"}, {in: "2014/03/31", out: "2014-03-31 00:00:00 +0000 UTC"}, @@ -513,6 +529,15 @@ var testInputs = []dateTest{ // Git log default date format - https://github.com/araddon/dateparse/pull/92 {in: "Thu Apr 7 15:13:13 2005 -0700", out: "2005-04-07 22:13:13 +0000 UTC"}, {in: "Tue Dec 12 23:07:11 2023 -0700", out: "2023-12-13 06:07:11 +0000 UTC"}, + // Variants with different offset formats, or that place the year after the offset and/or timezone + {in: "Thu Apr 7 15:13:13 2005 -07:00", out: "2005-04-07 22:13:13 +0000 UTC"}, + {in: "Thu Apr 7 15:13:13 2005 -07:00 PST", out: "2005-04-07 22:13:13 +0000 UTC", zname: "PST"}, + {in: "Thu Apr 7 15:13:13 2005 -07:00 PST (Pacific Standard Time)", out: "2005-04-07 22:13:13 +0000 UTC", zname: "PST"}, + {in: "Thu Apr 7 15:13:13 -0700 2005", out: "2005-04-07 22:13:13 +0000 UTC"}, + {in: "Thu Apr 7 15:13:13 -07:00 2005", out: "2005-04-07 22:13:13 +0000 UTC"}, + {in: "Thu Apr 7 15:13:13 -0700 PST 2005", out: "2005-04-07 22:13:13 +0000 UTC", zname: "PST"}, + {in: "Thu Apr 7 15:13:13 -07:00 PST 2005", out: "2005-04-07 22:13:13 +0000 UTC", zname: "PST"}, + {in: "Thu Apr 7 15:13:13 PST 2005", out: "2005-04-07 15:13:13 +0000 UTC", zname: "PST"}, // RabbitMQ log format - https://github.com/araddon/dateparse/pull/122 {in: "8-Mar-2018::14:09:27", out: "2018-03-08 14:09:27 +0000 UTC"}, {in: "08-03-2018::02:09:29 PM", out: "2018-03-08 14:09:29 +0000 UTC"}, @@ -531,6 +556,8 @@ var testInputs = []dateTest{ {in: "2014-04-26 17:24:37.1 +0000", out: "2014-04-26 17:24:37.1 +0000 UTC"}, {in: "2014-05-11 08:20:13 +0000", out: "2014-05-11 08:20:13 +0000 UTC"}, {in: "2014-05-11 08:20:13 +0530", out: "2014-05-11 02:50:13 +0000 UTC"}, + {in: "2014-05-11 08:20:13 +0530 m=+0.000000001", out: "2014-05-11 02:50:13 +0000 UTC"}, + {in: "2014-05-11 08:20:13.123456 +0530 m=+0.000000001", out: "2014-05-11 02:50:13.123456 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0300 +03 ?? issue author said this is from golang? {in: "2018-06-29 19:09:57.77297118 +0300 +03", out: "2018-06-29 16:09:57.77297118 +0000 UTC"}, {in: "2018-06-29 19:09:57.77297118 +0300 +0300", out: "2018-06-29 16:09:57.77297118 +0000 UTC"}, @@ -549,6 +576,8 @@ var testInputs = []dateTest{ {in: "2014-04-26 17:24:37.123456 +00:00", out: "2014-04-26 17:24:37.123456 +0000 UTC"}, {in: "2014-04-26 17:24:37.12 +00:00", out: "2014-04-26 17:24:37.12 +0000 UTC"}, {in: "2014-04-26 17:24:37.1 +00:00", out: "2014-04-26 17:24:37.1 +0000 UTC"}, + {in: "2014-04-26 17:24:37 +00:00 m=+0.000000001", out: "2014-04-26 17:24:37 +0000 UTC"}, + {in: "2014-04-26 17:24:37.123456 +00:00 m=+0.000000001", out: "2014-04-26 17:24:37.123456 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 TZ // Golang Native Format {in: "2012-08-03 18:31:59 +0000 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, @@ -570,6 +599,9 @@ var testInputs = []dateTest{ {in: "2015-02-08 03:02:00 +0200 CEST m=+0.000000001", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "2015-02-08 03:02:00 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00 +0000 UTC", zname: "MSK"}, {in: "2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001", out: "2015-02-08 00:02:00.001 +0000 UTC", zname: "MSK"}, + // Variant with colon in offset + {in: "2015-02-08 03:02:00 +02:00 CEST", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, + {in: "2015-02-08 03:02:00 +02:00 CEST (Central European Standard Time)", out: "2015-02-08 01:02:00 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, // yyyy-mm-dd hh:mm:ss TZ {in: "2012-08-03 18:31:59 UTC", out: "2012-08-03 18:31:59 +0000 UTC", zname: "UTC"}, {in: "2012-08-03 18:31:59 CEST", out: "2012-08-03 18:31:59 +0000 UTC", zname: "CEST"}, @@ -663,6 +695,8 @@ var testInputs = []dateTest{ // yyyy-mm-ddThh:mm:ss:000+0000 - weird format with additional colon in front of milliseconds {in: "2012-08-17T18:31:59:257", out: "2012-08-17 18:31:59.257 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/137 {in: "2012-08-17T18:31:59:257+0100", out: "2012-08-17 17:31:59.257 +0000 UTC"}, // https://github.com/araddon/dateparse/issues/117 + {in: "2012-08-17T18:31:59:257+0200 CET", out: "2012-08-17 16:31:59.257 +0000 UTC", zname: "CET"}, + {in: "2012-08-17T18:31:59:257+0200 CET (Central European Time)", out: "2012-08-17 16:31:59.257 +0000 UTC", zname: "CET"}, // yyyy-mm-ddThh:mm:ssZ {in: "2009-08-12T22:15Z", out: "2009-08-12 22:15:00 +0000 UTC"}, {in: "2009-08-12T22:15:09Z", out: "2009-08-12 22:15:09 +0000 UTC"}, @@ -788,7 +822,7 @@ func TestParse(t *testing.T) { } fullInput := prefix + th.in - t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, fullInput), func(t *testing.T) { + t.Run(fmt.Sprintf("simpleerr-%v-addweekday-%v-%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) { var ts time.Time defer func() { if r := recover(); r != nil { @@ -935,6 +969,56 @@ var testParseErrors = []dateTest{ {in: "8-Mar-2018:14:09:27", err: true}, {in: "8-Mar-2018: 14:09:27", err: true}, {in: "8-Mar-2018:::14:09:27", err: true}, + // Invalid repeated year + {in: "Thu Apr 7 15:13:13 2005 2004", err: true}, + {in: "Thu Apr 7 15:13:13 2005 2004 ", err: true}, + {in: "Thu Apr 7 15:13:13 2005-0700", err: true}, + {in: "Thu Apr 7 15:13:13 2005-07:00", err: true}, + {in: "Thu Apr 7 15:13:13 2005 -0700 2005", err: true}, + {in: "Thu Apr 7 15:13:13 2005 -0700 PST 2005", err: true}, + {in: "Thu Apr 7 15:13:13 2005 -07:00 2005", err: true}, + {in: "Thu Apr 7 15:13:13 2005 -07:00 PST 2005", err: true}, + // Invalid offsets + {in: "Fri Jul 03 2015 18:04:07 GMT+0", err: true}, + {in: "Fri Jul 03 2015 18:04:07 GMT+000", err: true}, + {in: "Fri Jul 03 2015 18:04:07 GMT+0:100", err: true}, + {in: "Fri Jul 03 2015 18:04:07 GMT+010:0", err: true}, + {in: "Fri Jul 03 2015 18:04:07 GMT+01000", err: true}, + {in: "Fri Jul 03 2015 18:04:07 GMT+01:000", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +0", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +000", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +0:100", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +010:0", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +01000", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +01:000", err: true}, + // Invalid extra words on the end (or invalid time zone description) + {in: "2018-09-30 21:09:13 (Universal Coordinated Time)", err: true}, + {in: "2018-09-30 21:09:13pm (Universal Coordinated Time)", err: true}, + {in: "Fri Jul 3 2015 06:04:07 GMT+0100 blah", err: true}, + {in: "Fri Jul 3 2015 06:04:07 GMT+0100 hello world", err: true}, + {in: "Fri Jul 03 2015 18:04:07 UTC+0100 GMT Daylight Time", err: true}, + {in: "Fri Jul 3 2015 06:04:07 UTC+0100 (GMT", err: true}, + {in: "Fri Jul 3 2015 06:04:07 PST-0700 (Pacific (Daylight) Time)", err: true}, + {in: "Fri Jul 3 2015 06:04:07 CEST-0700 (Central European Summer Time) extra", err: true}, + {in: "Fri Jul 3 2015 06:04:07 +0100 blah", err: true}, + {in: "Fri Jul 3 2015 06:04:07 +0100 hello world", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +0100 GMT Daylight Time", err: true}, + {in: "Fri Jul 3 2015 06:04:07 +0100 (GMT", err: true}, + {in: "Fri Jul 3 2015 06:04:07 -0700 (Pacific (Daylight) Time)", err: true}, + {in: "Fri Jul 3 2015 06:04:07 -0700 (Central European Summer Time) extra", err: true}, + {in: "Fri Jul 3 2015 06:04:07 +01:00 blah", err: true}, + {in: "Fri Jul 3 2015 06:04:07 +01:00 hello world", err: true}, + {in: "Fri Jul 03 2015 18:04:07 +01:00 GMT Daylight Time", err: true}, + {in: "Fri Jul 3 2015 06:04:07 +01:00 (GMT", err: true}, + {in: "Fri Jul 3 2015 06:04:07 -07:00 (Pacific (Daylight) Time)", err: true}, + {in: "Fri Jul 3 2015 06:04:07 -07:00 (Central European Summer Time) extra", err: true}, + {in: "Fri Jul 03 2015 18:04:07 GMT GMT", err: true}, + {in: "Fri Jul 3 2015 06:04:07 PMT blah", err: true}, + {in: "Fri Jul 3 2015 06:04:07 PMT hello world", err: true}, + {in: "Fri Jul 03 2015 18:04:07 AMT GMT Daylight Time", err: true}, + {in: "Fri Jul 3 2015 06:04:07 UTC (GMT", err: true}, + {in: "Fri Jul 3 2015 06:04:07 PST (Pacific (Daylight) Time)", err: true}, + {in: "Fri Jul 3 2015 06:04:07 CEST (Central European Summer Time) extra", err: true}, } func TestParseErrors(t *testing.T) { From d5b3c60e9ba4592739ea99c2f097bdcc2d9371eb Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 30 Dec 2023 12:10:37 -0700 Subject: [PATCH 62/62] Cleanup handling of TZ name parsing Fully support the format where a TZ name is in parentheses after the time (and possibly after an offset). This fixes the broken case where a 4 character TZ name was in parentheses after a time. --- parseany.go | 160 ++++++++++++++++++++++++++++------------------- parseany_test.go | 19 ++++-- 2 files changed, 109 insertions(+), 70 deletions(-) diff --git a/parseany.go b/parseany.go index 7e6327f..30188c8 100644 --- a/parseany.go +++ b/parseany.go @@ -1895,9 +1895,8 @@ iterRunes: if !p.setYear() { return p, p.unknownErr(datestr) } - } else { - // allow multiple trailing whitespace } + // else allow multiple trailing whitespace case '+', '-': // The year must be followed by a space before an offset! if p.yearlen > 0 { @@ -1942,12 +1941,10 @@ iterRunes: } else { p.tzlen = i - p.tzi } - if p.tzlen == 4 { - p.set(p.tzi, " MST") - } else if p.tzlen == 3 { - p.set(p.tzi, "MST") - } else if p.tzlen > 0 { - return p, p.unknownErr(datestr) + if p.tzlen > 0 { + if err := p.setTZName(datestr); err != nil { + return p, err + } } p.stateTime = timeWsAlphaZoneOffset p.offseti = i @@ -1956,12 +1953,8 @@ iterRunes: // 17:57:51 MST // 06:20:00 (EST) p.tzlen = i - p.tzi - if p.tzlen == 4 { - p.set(p.tzi, " MST") - } else if p.tzlen == 3 { - p.set(p.tzi, "MST") - } else if p.tzlen > 0 { - return p, p.unknownErr(datestr) + if err := p.setTZName(datestr); err != nil { + return p, err } if r == ' ' { p.stateTime = timeWsAlphaWs @@ -2205,19 +2198,8 @@ iterRunes: case r == ' ': if p.tzi > 0 { p.tzlen = i - p.tzi - switch p.tzlen { - case 3: - // 13:31:51.999 +01:00 CET - p.set(p.tzi, "MST") - case 4: - // 13:31:51.999 +01:00 CEST - p.set(p.tzi, "MST ") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTimeZone - } else { - return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen]) - } + if err := p.setTZName(datestr); err != nil { + return p, err } } else { return p, p.unknownErr(datestr) @@ -2353,18 +2335,9 @@ iterRunes: switch p.stateTime { case timeWsAlpha: - switch len(p.datestr) - p.tzi { - case 3: - // 13:31:51.999 +01:00 CET - p.set(p.tzi, "MST") - case 4: - p.set(p.tzi, "MST ") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTimeZone - } else { - return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) - } + p.tzlen = i - p.tzi + if err := p.setTZName(datestr); err != nil { + return p, err } case timeWsAlphaRParen: @@ -2377,10 +2350,26 @@ iterRunes: } case timeWsOffsetWsTZDescInParen: // The last character must be a closing ')' - if len(p.datestr) <= 0 || p.datestr[i-1] != ')' { + if i <= 0 || p.datestr[i-1] != ')' { return p, p.unknownErr(datestr) } - p.trimExtra(false) + // As a special case, if we don't yet have a timezone name, + // and the content in the paren is 3-4 characters, then treat + // this as a time zone name instead + if len(p.datestr) >= p.extra+1+3+1 { + parenContentsLen := (i - 1) - (p.extra + 2) + if p.tzi == 0 && (parenContentsLen >= 3 && parenContentsLen <= 4) { + p.tzi = p.extra + 2 + p.tzlen = parenContentsLen + if err := p.setTZName(datestr); err != nil { + return p, err + } + p.extra = 0 + } + } + if p.extra > 0 { + p.trimExtra(false) + } case timeWsAlphaZoneOffset: // 06:20:00 UTC-05 if err := p.setTZOffset(i, datestr); err != nil { @@ -2418,19 +2407,9 @@ iterRunes: case timeWsOffsetWsAlphaZone: // 00:12:00 +0000 UTC if p.tzi > 0 { - switch len(p.datestr) - p.tzi { - case 3: - // 13:31:51.999 +01:00 CET - p.set(p.tzi, "MST") - case 4: - // 13:31:51.999 +01:00 CEST - p.set(p.tzi, "MST ") - default: - if p.simpleErrorMessages { - return p, ErrUnknownTimeZone - } else { - return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) - } + p.tzlen = i - p.tzi + if err := p.setTZName(datestr); err != nil { + return p, err } } else { return p, p.unknownErr(datestr) @@ -2940,6 +2919,44 @@ func (p *parser) setTZOffset(i int, datestr string) error { return nil } +func (p *parser) setTZName(datestr string) error { + switch p.tzlen { + case 3: + p.set(p.tzi, "MST") + case 4: + p.set(p.tzi, "MST ") + default: + if p.simpleErrorMessages { + return ErrUnknownTimeZone + } else { + return fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen]) + } + } + return nil +} + +// Removes the characters at the given range from the format string. +// Fills the end of the format string with spaces rather than shortening it. +func (p *parser) removeRangeFromFormat(i, numBytes int) { + if i < 0 || i >= len(p.format) { + return + } + var startErase int + afterRemovedRange := i + numBytes + bytesToCopy := len(p.format) - afterRemovedRange + if bytesToCopy <= 0 { + // nothing to copy, erase everything from the removal point + startErase = i + } else { + copy(p.format[i:], p.format[afterRemovedRange:]) + startErase = i + bytesToCopy + } + // fill in spaces to erase the moved content in its old location + for index := startErase; index < len(p.format); index++ { + p.format[index] = ' ' + } +} + // Find the proper end of the current component (scanning chars starting from start and going // up until the end, and either returning at end or returning the first character that is // not allowed, as determined by allowNumeric, allowAlpha, and allowOther) @@ -3097,6 +3114,26 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) if p.t != nil { return *p.t, nil } + + // Make sure that the entire string matched to a known format that was detected + if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) { + // We can always ignore punctuation at the end of a date/time, but do not allow + // any numbers or letters in the format string. + validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true) + if validFormatTo < len(p.format) { + return time.Time{}, p.unexpectedTail(p.formatSetLen) + } + } + + // Special case where the TZ name is 4 characters long and followed by punctuation, will cause parsing problems + // with the format 'MST ' (will expect a whitespace that isn't there after 4 char timezone). Most robust + // solution is to remove the extra whitespace. Even though it will cause offsets after this point to not match + // between the datestr and format string, it's not an issue at this point. + if p.tzlen == 4 && p.tzi+4 < len(p.format) && p.format[p.tzi+3] == ' ' && p.format[p.tzi+4] != ' ' { + p.removeRangeFromFormat(p.tzi+3, 1) + } + + // If we have a full month name, update the format string to use it (can change length of format string) if len(p.fullMonth) > 0 { p.setFullMonth(p.fullMonth) } @@ -3110,7 +3147,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) // get out of this function to reduce scope it needs to be applied on if err != nil && strings.Contains(err.Error(), "month out of range") { // simple optimized case where mm and dd can be swapped directly - if p.molen == 2 && p.daylen == 2 { + if p.molen == 2 && p.daylen == 2 && len(p.fullMonth) <= 0 && (p.tzi == 0 || (p.moi < p.tzi && p.dayi < p.tzi)) { // skipped bytes have already been removed, so compensate for that moi := p.moi - p.skip p.moi = p.dayi - p.skip @@ -3144,17 +3181,10 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) }() } - // Make sure that the entire string matched to a known format that was detected - if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) { - // We can always ignore punctuation at the end of a date/time, but do not allow - // any numbers or letters in the format string. - validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true) - if validFormatTo < len(p.format) { - return time.Time{}, p.unexpectedTail(p.formatSetLen) - } + if p.skip > len(p.format) { + p.skip = len(p.format) } - - if p.skip > 0 && len(p.format) > p.skip { + if p.skip > 0 { // copy and then re-slice to shorten to avoid losing the header of the pooled format string copy(p.format, p.format[p.skip:]) p.format = p.format[:len(p.format)-p.skip] diff --git a/parseany_test.go b/parseany_test.go index 52e4e42..a9f1368 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -225,9 +225,11 @@ var testInputs = []dateTest{ {in: "Thu, 03 Jul 2017 8:08:04 +0100", out: "2017-07-03 07:08:04 +0000 UTC"}, {in: "Thu, 03 Jul 2017 8:8:4 +0100", out: "2017-07-03 07:08:04 +0000 UTC"}, // - {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC"}, - {in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC"}, + {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", zname: "CEST"}, + {in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC", zname: "MST"}, {in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, + {in: "Tue, 11 Jul 2017 04:08:03 (CEST)", out: "2017-07-11 04:08:03 +0000 UTC", zname: "CEST"}, + {in: "Tue, 5 Jul 2017 04:08:03 (MST)", out: "2017-07-05 04:08:03 +0000 UTC", zname: "MST"}, // day, dd-Mon-yy hh:mm:zz TZ {in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, @@ -330,14 +332,18 @@ var testInputs = []dateTest{ {in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, {in: "04/02/2014 04:08:09 AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09 AM (PST)", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"}, {in: "04/02/2014 04:08:09AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, {in: "04/02/2014 04:08:09 AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, + {in: "04/02/2014 04:08:09 AM (CEST)", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"}, {in: "04/02/2014 04:08:09pm", out: "2014-04-02 16:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09 PM", out: "2014-04-02 16:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"}, {in: "04/02/2014 04:08:09 PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"}, + {in: "04/02/2014 04:08:09 PM (PST)", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"}, {in: "04/02/2014 04:08:09pm CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"}, {in: "04/02/2014 04:08:09 PM CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"}, + {in: "04/02/2014 04:08:09 PM (CEST)", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"}, {in: "04/02/2014 04:08am", out: "2014-04-02 04:08:00 +0000 UTC"}, {in: "04/02/2014 04:08 AM", out: "2014-04-02 04:08:00 +0000 UTC"}, {in: "04/02/2014 04:08pm", out: "2014-04-02 16:08:00 +0000 UTC"}, @@ -822,7 +828,7 @@ func TestParse(t *testing.T) { } fullInput := prefix + th.in - t.Run(fmt.Sprintf("simpleerr-%v-addweekday-%v-%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) { + t.Run(fmt.Sprintf("simpleerr-%v/addweekday-%v/%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) { var ts time.Time defer func() { if r := recover(); r != nil { @@ -1167,6 +1173,9 @@ func TestInLocation(t *testing.T) { ts = MustParse("Tue, 5 Jul 2017 16:28:13 -0700 (MST)") assert.Equal(t, "2017-07-05 23:28:13 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts = MustParse("Tue, 5 Jul 2017 16:28:13 +0300 (CEST)") + assert.Equal(t, "2017-07-05 13:28:13 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + // Now we are going to use ParseIn() and see that it gives different answer // with different zone, offset time.Local = nil @@ -1311,6 +1320,6 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - ts := MustParse("Monday 19/03/2012 00:00:00", RetryAmbiguousDateWithSwap(true)) - assert.Equal(t, "2012-03-19 00:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts := MustParse("September 17, 2012 at 10:09am CEST+02", RetryAmbiguousDateWithSwap(true)) + assert.Equal(t, "2012-09-17 08:09:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) }