22
33from datetime import datetime , timedelta , timezone
44
5+ import numpy as np
56import pandas as pd
67import pytest
78
@@ -67,31 +68,70 @@ def test_date_str_tokenizer(input_str, expected_mask):
6768 assert _tokenize_date_str (input_str ).masked_str == expected_mask
6869
6970
70- def test_infer_from_series ():
71- dates = ["12/20/2020" , "10/17/2020" , "08/10/2020" , "01/22/2020" , "09/01/2020" ]
72- assert _infer_from_series (dates , False ) == "%m/%d/%Y"
71+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
72+ def test_infer_from_series (must_match_all ):
73+ dates = pd .Series (
74+ ["12/20/2020" , "10/17/2020" , "08/10/2020" , "01/22/2020" , "09/01/2020" ]
75+ )
76+ assert _infer_from_series (dates , False , must_match_all = must_match_all ) == "%m/%d/%Y"
7377
7478
75- def test_infer_from_bad_date ():
76- dates = ["#NAME?" , "1000#" , "Jim" , "3" , "$moola" ]
77- assert _infer_from_series (dates , False ) is None
79+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
80+ def test_infer_from_bad_date (must_match_all ):
81+ dates = pd .Series (["#NAME?" , "1000#" , "Jim" , "3" , "$moola" ])
82+ assert _infer_from_series (dates , False , must_match_all = must_match_all ) is None
7883
7984
8085def test_infer_from_some_bad_date ():
81- dates = ["#NAME?" , "1000#" , "Jim" , "3" , "10/17/2020" ]
82- assert _infer_from_series (dates , False ) == "%m/%d/%Y"
86+ dates = pd .Series (["#NAME?" , "1000#" , "Jim" , "3" , "10/17/2020" ])
87+ assert _infer_from_series (dates , False , must_match_all = False ) == "%m/%d/%Y"
88+
89+
90+ def test_infer_from_some_bad_date_with_match_all ():
91+ dates = pd .Series (["#NAME?" , "1000#" , "Jim" , "3" , "10/17/2020" ])
92+ assert _infer_from_series (dates , False , must_match_all = True ) is None
93+
94+
95+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
96+ def test_infer_from_12_hour (must_match_all ):
97+ dates = pd .Series (["8:15 AM" , "9:20 PM" , "1:55 PM" ])
98+ assert _infer_from_series (dates , False , must_match_all = must_match_all ) == "%I:%M %p"
99+
100+
101+ @pytest .mark .parametrize ("with_suffix" , [True , False ])
102+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
103+ def test_detect_datetimes (with_suffix , must_match_all , test_df ):
104+ # Based on the values in the DF, we assert the `with_suffix` flag
105+ # should not change any of the results
106+ check = detect_datetimes (
107+ test_df , with_suffix = with_suffix , must_match_all = must_match_all
108+ )
109+ assert set (check .column_names ) == {"dates" , "iso" }
110+ assert check .get_column_info ("random" ) is None
83111
112+ dates = check .get_column_info ("dates" )
113+ assert dates .name == "dates"
114+ assert dates .inferred_format == "%m/%d/%Y"
84115
85- def test_infer_from_12_hour ():
86- dates = [ "8:15 AM" , "9:20 PM" , "1:55 PM" ]
87- assert _infer_from_series ( dates , False ) == "%I:%M %p "
116+ iso = check . get_column_info ( "iso" )
117+ assert iso . name == "iso"
118+ assert iso . inferred_format == "%Y-%m-%dT%X.%f "
88119
89120
90121@pytest .mark .parametrize ("with_suffix" , [True , False ])
91- def test_detect_datetimes (with_suffix , test_df ):
122+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
123+ def test_detect_datetimes_with_nans (with_suffix , must_match_all , test_df ):
124+ # Create a copy to prevent modification to the session-scoped fixture
125+ # object.
126+ test_df = test_df .copy ()
127+ # Blank out first row
128+ test_df .iloc [0 , :] = np .nan
129+
92130 # Based on the values in the DF, we assert the `with_suffix` flag
93131 # should not change any of the results
94- check = detect_datetimes (test_df , with_suffix = with_suffix )
132+ check = detect_datetimes (
133+ test_df , with_suffix = with_suffix , must_match_all = must_match_all
134+ )
95135 assert set (check .column_names ) == {"dates" , "iso" }
96136 assert check .get_column_info ("random" ) is None
97137
@@ -104,27 +144,41 @@ def test_detect_datetimes(with_suffix, test_df):
104144 assert iso .inferred_format == "%Y-%m-%dT%X.%f"
105145
106146
107- def test_infer_with_suffix ():
108- dates = [
109- "2020-12-20T00:00:00Z" ,
110- "2020-10-17T00:00:00Z" ,
111- "2020-08-10T00:00:00Z" ,
112- "2020-01-22T00:00:00Z" ,
113- "2020-09-01T00:00:00Z" ,
114- ]
115- assert _infer_from_series (dates , True ) == "%Y-%m-%dT%XZ"
147+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
148+ def test_infer_with_suffix (must_match_all ):
149+ dates = pd .Series (
150+ [
151+ "2020-12-20T00:00:00Z" ,
152+ "2020-10-17T00:00:00Z" ,
153+ "2020-08-10T00:00:00Z" ,
154+ "2020-01-22T00:00:00Z" ,
155+ "2020-09-01T00:00:00Z" ,
156+ ]
157+ )
158+ assert (
159+ _infer_from_series (dates , True , must_match_all = must_match_all ) == "%Y-%m-%dT%XZ"
160+ )
116161
117- dates_2 = [d .replace ("Z" , "+00:00" ) for d in dates .copy ()]
118- assert _infer_from_series (dates_2 , True ) == "%Y-%m-%dT%X+00:00"
162+ dates_2 = pd .Series ([d .replace ("Z" , "+00:00" ) for d in dates ])
163+ assert (
164+ _infer_from_series (dates_2 , True , must_match_all = must_match_all )
165+ == "%Y-%m-%dT%X+00:00"
166+ )
119167
120- dates_3 = [d .replace ("Z" , "-00:00" ) for d in dates .copy ()]
121- assert _infer_from_series (dates_3 , True ) == "%Y-%m-%dT%X-00:00"
168+ dates_3 = pd .Series ([d .replace ("Z" , "-00:00" ) for d in dates ])
169+ assert (
170+ _infer_from_series (dates_3 , True , must_match_all = must_match_all )
171+ == "%Y-%m-%dT%X-00:00"
172+ )
122173
123174
124- def test_detect_datetimes_with_suffix (test_df ):
175+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
176+ def test_detect_datetimes_with_suffix (must_match_all , test_df ):
177+ # Prevent modification of the session-scoped fixture object
178+ test_df = test_df .copy ()
125179 # Add a TZ suffix of "Z" to the iso strings
126180 test_df ["iso" ] = test_df ["iso" ].astype ("string" ).apply (lambda val : val + "Z" )
127- check = detect_datetimes (test_df , with_suffix = True )
181+ check = detect_datetimes (test_df , with_suffix = True , must_match_all = must_match_all )
128182 assert set (check .column_names ) == {"dates" , "iso" }
129183
130184 iso = check .get_column_info ("iso" )
@@ -134,7 +188,8 @@ def test_detect_datetimes_with_suffix(test_df):
134188 assert iso .inferred_format == "%Y-%m-%dT%X.%fZ"
135189
136190
137- def test_detect_datetimes_custom_formats ():
191+ @pytest .mark .parametrize ("must_match_all" , [False , True ])
192+ def test_detect_datetimes_custom_formats (must_match_all ):
138193 df = pd .DataFrame (
139194 {
140195 "str" : ["a" , "b" , "c" ],
@@ -151,7 +206,7 @@ def test_detect_datetimes_custom_formats():
151206 }
152207 )
153208
154- check = detect_datetimes (df )
209+ check = detect_datetimes (df , must_match_all = must_match_all )
155210
156211 assert set (check .column_names ) == {
157212 "dateandtime" ,
0 commit comments