@@ -253,7 +253,7 @@ def _calendar_month_middles(year):
253
253
"""list of middle day of each month, used by Linke turbidity lookup"""
254
254
# remove mdays[0] since January starts at mdays[1]
255
255
# make local copy of mdays since we need to change February for leap years
256
- mdays = np .array (calendar .mdays [1 :])
256
+ mdays = np .array (calendar .mdays [1 :])
257
257
ydays = 365
258
258
# handle leap years
259
259
if calendar .isleap (year ):
@@ -533,3 +533,190 @@ def _calc_d(w, aod700, p):
533
533
d = - 0.337 * aod700 ** 2 + 0.63 * aod700 + 0.116 + dp * np .log (p / p0 )
534
534
535
535
return d
536
+
537
+
538
+ def detect_clearsky (measured , clearsky , times , window_length ,
539
+ mean_diff = 75 , max_diff = 75 ,
540
+ lower_line_length = - 5 , upper_line_length = 10 ,
541
+ var_diff = 0.005 , slope_dev = 8 , max_iterations = 20 ,
542
+ return_components = False ):
543
+ """
544
+ Detects clear sky times according to the algorithm developed by Reno
545
+ and Hansen for GHI measurements [1]. The algorithm was designed and
546
+ validated for analyzing GHI time series only. Users may attempt to
547
+ apply it to other types of time series data using different filter
548
+ settings, but should be skeptical of the results.
549
+
550
+ The algorithm detects clear sky times by comparing statistics for a
551
+ measured time series and an expected clearsky time series.
552
+ Statistics are calculated using a sliding time window (e.g., 10
553
+ minutes). An iterative algorithm identifies clear periods, uses the
554
+ identified periods to estimate bias in the clearsky data, scales the
555
+ clearsky data and repeats.
556
+
557
+ Clear times are identified by meeting 5 criteria. Default values for
558
+ these thresholds are appropriate for 10 minute windows of 1 minute
559
+ GHI data.
560
+
561
+ Parameters
562
+ ----------
563
+ measured : array or Series
564
+ Time series of measured values.
565
+ clearsky : array or Series
566
+ Time series of the expected clearsky values.
567
+ times : DatetimeIndex
568
+ Times of measured and clearsky values.
569
+ window_length : int
570
+ Length of sliding time window in minutes. Must be greater than 2
571
+ periods.
572
+ mean_diff : float
573
+ Threshold value for agreement between mean values of measured
574
+ and clearsky in each interval, see Eq. 6 in [1].
575
+ max_diff : float
576
+ Threshold value for agreement between maxima of measured and
577
+ clearsky values in each interval, see Eq. 7 in [1].
578
+ lower_line_length : float
579
+ Lower limit of line length criterion from Eq. 8 in [1].
580
+ Criterion satisfied when
581
+ lower_line_length < line length difference < upper_line_length
582
+ upper_line_length : float
583
+ Upper limit of line length criterion from Eq. 8 in [1].
584
+ var_diff : float
585
+ Threshold value in Hz for the agreement between normalized
586
+ standard deviations of rate of change in irradiance, see Eqs. 9
587
+ through 11 in [1].
588
+ slope_dev : float
589
+ Threshold value for agreement between the largest magnitude of
590
+ change in successive values, see Eqs. 12 through 14 in [1].
591
+ max_iterations : int
592
+ Maximum number of times to apply a different scaling factor to
593
+ the clearsky and redetermine clear_samples. Must be 1 or larger.
594
+ return_components : bool
595
+ Controls if additional output should be returned. See below.
596
+
597
+ Returns
598
+ -------
599
+ clear_samples : array or Series
600
+ Boolean array or Series of whether or not the given time is
601
+ clear. Return type is the same as the input type.
602
+
603
+ components : OrderedDict, optional
604
+ Dict of arrays of whether or not the given time window is clear
605
+ for each condition. Only provided if return_components is True.
606
+
607
+ alpha : scalar, optional
608
+ Scaling factor applied to the clearsky_ghi to obtain the
609
+ detected clear_samples. Only provided if return_components is
610
+ True.
611
+
612
+ References
613
+ ----------
614
+ [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear
615
+ sky irradiance in time series of GHI measurements" Renewable Energy,
616
+ v90, p. 520-531, 2016.
617
+
618
+ Notes
619
+ -----
620
+ Initial implementation in MATLAB by Matthew Reno. Modifications for
621
+ computational efficiency by Joshua Patrick and Curtis Martin. Ported
622
+ to Python by Will Holmgren, Tony Lorenzo, and Cliff Hansen.
623
+
624
+ Differences from MATLAB version:
625
+
626
+ * no support for unequal times
627
+ * automatically determines sample_interval
628
+ * requires a reference clear sky series instead calculating one
629
+ from a user supplied location and UTCoffset
630
+ * parameters are controllable via keyword arguments
631
+ * option to return individual test components and clearsky scaling
632
+ parameter
633
+ """
634
+
635
+ # calculate deltas in units of minutes (matches input window_length units)
636
+ deltas = np .diff (times ) / np .timedelta64 (1 , '60s' )
637
+
638
+ # determine the unique deltas and if we can proceed
639
+ unique_deltas = np .unique (deltas )
640
+ if len (unique_deltas ) == 1 :
641
+ sample_interval = unique_deltas [0 ]
642
+ else :
643
+ raise NotImplementedError ('algorithm does not yet support unequal ' \
644
+ 'times. consider resampling your data.' )
645
+
646
+ samples_per_window = int (window_length / sample_interval )
647
+
648
+ # generate matrix of integers for creating windows with indexing
649
+ from scipy .linalg import hankel
650
+ H = hankel (np .arange (samples_per_window ),
651
+ np .arange (samples_per_window - 1 , len (times )))
652
+
653
+ # calculate measurement statistics
654
+ meas_mean = np .mean (measured [H ], axis = 0 )
655
+ meas_max = np .max (measured [H ], axis = 0 )
656
+ meas_slope = np .diff (measured [H ], n = 1 , axis = 0 )
657
+ # matlab std function normalizes by N-1, so set ddof=1 here
658
+ meas_slope_nstd = np .std (meas_slope , axis = 0 , ddof = 1 ) / meas_mean
659
+ meas_slope_max = np .max (np .abs (meas_slope ), axis = 0 )
660
+ meas_line_length = np .sum (np .sqrt (
661
+ meas_slope * meas_slope + sample_interval * sample_interval ), axis = 0 )
662
+
663
+ # calculate clear sky statistics
664
+ clear_mean = np .mean (clearsky [H ], axis = 0 )
665
+ clear_max = np .max (clearsky [H ], axis = 0 )
666
+ clear_slope = np .diff (clearsky [H ], n = 1 , axis = 0 )
667
+ clear_slope_max = np .max (np .abs (clear_slope ), axis = 0 )
668
+
669
+ from scipy .optimize import minimize_scalar
670
+
671
+ alpha = 1
672
+ for iteration in range (max_iterations ):
673
+ clear_line_length = np .sum (np .sqrt (
674
+ alpha * alpha * clear_slope * clear_slope +
675
+ sample_interval * sample_interval ), axis = 0 )
676
+
677
+ line_diff = meas_line_length - clear_line_length
678
+
679
+ # evaluate comparison criteria
680
+ c1 = np .abs (meas_mean - alpha * clear_mean ) < mean_diff
681
+ c2 = np .abs (meas_max - alpha * clear_max ) < max_diff
682
+ c3 = (line_diff > lower_line_length ) & (line_diff < upper_line_length )
683
+ c4 = meas_slope_nstd < var_diff
684
+ c5 = (meas_slope_max - alpha * clear_slope_max ) < slope_dev
685
+ c6 = (clear_mean != 0 ) & ~ np .isnan (clear_mean )
686
+ clear_windows = c1 & c2 & c3 & c4 & c5 & c6
687
+
688
+ # create array to return
689
+ clear_samples = np .full_like (measured , False , dtype = 'bool' )
690
+ # find the samples contained in any window classified as clear
691
+ clear_samples [np .unique (H [:, clear_windows ])] = True
692
+
693
+ # find a new alpha
694
+ previous_alpha = alpha
695
+ clear_meas = measured [clear_samples ]
696
+ clear_clear = clearsky [clear_samples ]
697
+ def rmse (alpha ):
698
+ return np .sqrt (np .mean ((clear_meas - alpha * clear_clear )** 2 ))
699
+ alpha = minimize_scalar (rmse ).x
700
+ if round (alpha * 10000 ) == round (previous_alpha * 10000 ):
701
+ break
702
+ else :
703
+ import warnings
704
+ warnings .warn ('failed to converge after %s iterations' \
705
+ % max_iterations , RuntimeWarning )
706
+
707
+ # be polite about returning the same type as was input
708
+ if isinstance (measured , pd .Series ):
709
+ clear_samples = pd .Series (clear_samples , index = times )
710
+
711
+ if return_components :
712
+ components = OrderedDict ()
713
+ components ['mean_diff' ] = c1
714
+ components ['max_diff' ] = c2
715
+ components ['line_length' ] = c3
716
+ components ['slope_nstd' ] = c4
717
+ components ['slope_max' ] = c5
718
+ components ['mean_nan' ] = c6
719
+ components ['windows' ] = clear_windows
720
+ return clear_samples , components , alpha
721
+ else :
722
+ return clear_samples
0 commit comments