Changed the plot(object,diagnostic=TRUE) for cpt.range objects

Added in the upper bound on the pen.value.full for CROPS output
rkillick · Oct 14, 2024 · b053862 · b053862
1 parent 9f80bae
commit b053862
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 13 deletions.
diff --git a/NEWS b/NEWS
@@ -5,6 +5,8 @@ Version 2.3
 * Added some generics for cpt.range that were missing before; nseg, seg.len.  Previously these inherited from cpt, but now these can be called on the @cpts slot (default) or optionally with a ncpts argument for the specified number of changepoints.
 * Moved the fit functions outside of the param functions.  Previously these were unnecessarily repeated across cpt, cpt.reg and cpt.range classes.  This necessitated the adding of the generics that we missing for cpt.range so it worked seamlessly across classes.
 * Repeated code to calculate the cpts from a ncpts argument in cpt.range methods was removed.
+* The diagnostic plot for cpt.range objects (plot(object,diagnostic=TRUE)) has been changed to be more informative.  The axes have been swapped and instead of a line graph, a stepped graph is used which better reflects that you can't get fractional numbers of changepoints (type="s").  This can be overridden with another type if preferred.  Reworded the man file for plot-methods to reflect this change.
+* Added the upper bound on the tested penalty values to the pen.value.full slot for CROPS output.  Previously this was automatically removed and so you needed the original function call to know the upper bound that was tested.
 
 Version 2.2.5
 =============

diff --git a/R/class_input.R b/R/class_input.R
@@ -4,11 +4,11 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi
   }else{
     ans=new("cpt")
   }
-  
+
   data.set(ans)=data;cpttype(ans)=cpttype;method(ans)=method; test.stat(ans)=test.stat;pen.type(ans)=penalty;pen.value(ans)=pen.value;minseglen(ans)=minseglen;ans@date=date();
   if(penalty!="CROPS"){ # crops is only one that doesn't give a single set of cpts
     cpts(ans)=out[[2]]
-    
+
     if(param.estimates==TRUE){
       if(test.stat == "Gamma"){
       ans=param(ans, shape)
@@ -17,7 +17,7 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi
       }
     }
   }
-  
+
   if(method=="PELT"){
       ncpts.max(ans)=Inf
   }
@@ -27,26 +27,26 @@ class_input <- function(data, cpttype, method, test.stat, penalty, pen.value, mi
   else{
     ncpts.max(ans)=Q
   }
-  
+
   if(method=="BinSeg"){
     l=list()
     for(i in 1:(length(out$cps)/2)){
-      l[[i]] = out$cps[1,1:i] 
+      l[[i]] = out$cps[1,1:i]
     }
     m = t(sapply(l, '[', 1:max(sapply(l, length))))
-    
+
     cpts.full(ans)=m
     pen.value.full(ans)=out$cps[2,]
   }else if(method=="SegNeigh"){
     cpts.full(ans)=out$cps[-1,]
     pen.value.full(ans)=-diff(out$like.Q)
   }else if(penalty=="CROPS"){
     m = t(sapply(out[[2]], '[', 1:max(sapply(out[[2]], length))))
-    
+
     cpts.full(ans) = m
-    pen.value.full(ans) = out[[1]][1,]
+    pen.value.full(ans) = c(out[[1]][1,],pen.value[2]) # add in the final penalty in the range as this is removed as a duplicate set of changepoints
     if(test.stat=="Gamma"){param.est(ans)$shape=shape}
   }
-  
+
   return(ans)
 }
diff --git a/R/cpt.class.R b/R/cpt.class.R
@@ -668,12 +668,13 @@ setClass("cpt",slots=list(data.set="ts", cpttype="character", method="character"
 	setMethod("plot","cpt.range",function(x,ncpts=NA,diagnostic=FALSE,cpt.col='red',cpt.width=1,cpt.style=1,...){
 	  if(diagnostic==TRUE){
 	    n.changepoints = apply(cpts.full(x), 1, function(x) sum(x > 0, na.rm = TRUE))
+	    n.changepoints=c(n.changepoints,n.changepoints[length(n.changepoints)]) # repeat the last value as this is also the number of changes for the upper pen.value tested
 	    penalty.values = pen.value.full(x)
 	    if (is.null(list(...)$type)) {
 	      # By default, the type of the diagnostic plots is "lines".
-	      plot(x = n.changepoints, y = penalty.values, xlab = 'Number of Changepoints', ylab = 'Penalty Value', type = "l", ...)
+	      plot(x = penalty.values, y = n.changepoints, type="s",ylab = 'Number of Changepoints', xlab = 'Penalty Value', ...)
 	    } else {
-	      plot(x = n.changepoints, y = penalty.values, xlab = 'Number of Changepoints', ylab = 'Penalty Value', ...)
+	      plot(x = penalty.values, y = n.changepoints,ylab = 'Number of Changepoints', xlab = 'Penalty Value', ...)
 	    }
 	    return(invisible(NULL))
 	  }

diff --git a/man/plot-methods.Rd b/man/plot-methods.Rd
@@ -20,7 +20,7 @@
 	Plots the data and identifies the changepoints using vertical lines (change in variance), horizontal lines (change in mean).  Optional arguments to control the lines: \code{cpt.col} equivilent to \code{col} to change the colour of the changepoint line; \code{cpt.width} equivilent to \code{lwd} to change the width of the changepoint line; \code{cpt.style} equivilent to \code{lty} to change the style of the line.
 }
 \item{\code{signature(x = "cpt.range")}}{
-	As for the \code{cpt} objects except for two optional arguments, \code{ncpts} and \code{diagnostic}.  The \code{ncpts} option allows you to specify a plot of the segmentation with \code{ncpts} changepoints in, i.e. the optimal may be specified as 10 changes but you want to plot the segmentation with 5 changes (provided a segmentation with 5 changes is listed in \code{cpts.full(x)}.  The \code{diagnostic} option when set to \code{TRUE} plots the number of changepoints in each segmentation against the change in test statistic when adding that change.  This can aide the decision on the number of changepoints as when a true changepoint is added the cost increases/decreases rapidly, but when a changepoint due to noise is added the change is small.  This is akin to a scree plot in principal component analysis.  The idea is that someone may choose to create a plot using \code{diagnostic=TRUE}, identify the appropriate number of changes and then replot using \code{ncpts} to visualize that segmentation.
+	As for the \code{cpt} objects except for two optional arguments, \code{ncpts} and \code{diagnostic}.  The \code{ncpts} option allows you to specify a plot of the segmentation with \code{ncpts} changepoints in, i.e. the optimal may be specified as 10 changes but you want to plot the segmentation with 5 changes (provided a segmentation with 5 changes is listed in \code{cpts.full(x)}.  The \code{diagnostic} option when set to \code{TRUE} plots the number of changepoints in each segmentation against the penalty values that give that number of changepoints.  This can aide the decision on the number of changepoints as when a true changepoint is added the cost decreases considerably so it creates a stable region where several penalty values give the same number of changepoints, but when a changepoint due to noise is added the change in cost is small and so a small change in penalty value can vary the number of changes a lot.  This is akin to a scree plot in principal component analysis.  The idea is that someone may choose to create a plot using \code{diagnostic=TRUE}, identify the appropriate number of changes and then replot using \code{ncpts} to visualize that segmentation.
 }
 \item{\code{signature(x = "cpt.reg")}}{
 	Plotting is only valid for one regressor.  Plots the regressor against the response and identifies the changepoints using horizontal lines.  Optional arguments to control the lines: \code{cpt.col} equivilent to \code{col} to change the colour of the changepoint line; \code{cpt.width} equivilent to \code{lwd} to change the width of the changepoint line; \code{cpt.style} equivilent to \code{lty} to change the style of the line.
@@ -30,4 +30,4 @@
 \keyword{methods}
 \keyword{plot}
 \keyword{cpt}
-\keyword{internal} 
+\keyword{internal}