Persistence 1D inc. Reconstruct1D  1.1
Finding extrema in one dimensional data, filtering them by persistence and reconstructing smooth functions
 All Classes Namespaces Files Functions Variables Macros Pages
persistence1d.hpp
Go to the documentation of this file.
1 /*! \file persistence1d.hpp
2  Actual code.
3 */
4 
5 #ifndef PERSISTENCE_H
6 #define PERSISTENCE_H
7 
8 #include <assert.h>
9 #include <algorithm>
10 #include <iostream>
11 #include <iterator>
12 #include <vector>
13 
14 #define NO_COLOR -1
15 #define RESIZE_FACTOR 20
16 #define MATLAB_INDEX_FACTOR 1
17 
18 namespace p1d
19 {
20 
21 /** Used to sort data according to its absolute value and refer to its original index in the Data vector.
22 
23  A collection of TIdxAndData is sorted according to its data value (if values are equal, according
24  to indices). The index allows access back to the vertex in the Data vector.
25 */
27 {
28  TIdxAndData():Idx(-1),Data(0){}
29 
30  bool operator<(const TIdxAndData& other) const
31  {
32  if (Data < other.Data) return true;
33  if (Data > other.Data) return false;
34  return (Idx < other.Idx);
35  }
36 
37  ///The index of the vertex within the Data vector.
38  int Idx;
39 
40  ///Vertex data value from the original Data vector sent as an argument to RunPersistence.
41  float Data;
42 };
43 
44 
45 /*! Defines a component within the data domain.
46  A component is created at a local minimum - a vertex whose value is smaller than both of its neighboring
47  vertices' values.
48 */
49 struct TComponent
50 {
51  ///A component is defined by the indices of its edges.
52  ///Both variables hold the respective indices of the vertices in Data vector.
53  ///All vertices between them are considered to belong to this component.
56 
57  ///The index of the local minimum within the component as longs as its alive.
58  int MinIndex;
59 
60  ///The value of the Data[MinIndex].
61  float MinValue; //redundant, but makes life easier
62 
63  ///Set to true when a component is created. Once components are merged,
64  ///the destroyed component Alive value is set to false.
65  ///Used to verify correctness of algorithm.
66  bool Alive;
67 };
68 
69 
70 /** A pair of matched local minimum and local maximum
71  that define a component above a certain persistence threshold.
72  The persistence value is their (absolute) data difference.
73 */
75 {
76  ///Index of local minimum, as per Data vector.
77  int MinIndex;
78 
79  ///Index of local maximum, as per Data vector.
80  int MaxIndex;
81 
82  ///The persistence of the two extrema.
83  ///Data[MaxIndex] - Data[MinIndex]
84  ///Guaranteed to be >= 0.
85  float Persistence;
86 
87  bool operator<(const TPairedExtrema& other) const
88  {
89  if (Persistence < other.Persistence) return true;
90  if (Persistence > other.Persistence) return false;
91  return (MinIndex < other.MinIndex);
92  }
93 };
94 
95 
96 
97 /*! Finds extrema and their persistence in one-dimensional data.
98 
99  Local minima and local maxima are extracted, paired,
100  and sorted according to their persistence.
101  The global minimum is extracted as well.
102 
103  We assume a connected one-dimensional domain.
104  Think of "data on a line", or a function f(x) over some domain xmin <= x <= xmax.
105 */
107 {
108 public:
110  {
111  }
112 
114  {
115  }
116 
117  /*!
118  Call this function with a vector of one dimensional data to find extrema features in the data.
119  The function runs once for, results can be retrieved with different persistent thresholds without
120  further data processing.
121 
122  Input data vector is assumed to be of legal size and legal values.
123 
124  Use PrintResults, GetPairedExtrema or GetExtremaIndices to get results of the function.
125 
126  @param[in] InputData Vector of data to find features on, ordered according to its axis.
127  */
128  bool RunPersistence(const std::vector<float>& InputData)
129  {
130  Data = InputData;
131  Init();
132 
133  //If a user runs this on an empty vector, then they should not get the results of the previous run.
134  if (Data.empty()) return false;
135 
137  Watershed();
139 #ifdef _DEBUG
141 #endif
142  return true;
143  }
144 
145 
146 
147  /*!
148  Prints the contents of the TPairedExtrema vector.
149  If called directly with a TPairedExtrema vector, the global minimum is not printed.
150 
151  @param[in] pairs Vector of pairs to be printed.
152  */
153  void PrintPairs(const std::vector<TPairedExtrema>& pairs) const
154  {
155  for (std::vector<TPairedExtrema>::const_iterator it = pairs.begin();
156  it != pairs.end(); it++)
157  {
158  std::cout << "Persistence: " << (*it).Persistence
159  << " minimum index: " << (*it).MinIndex
160  << " maximum index: " << (*it).MaxIndex
161  << std::endl;
162  }
163  }
164 
165  /*!
166  Prints the global minimum and all paired extrema whose persistence is greater or equal to threshold.
167  By default, all pairs are printed. Supports Matlab indexing.
168 
169  @param[in] threshold Threshold value for pair persistence.
170  @param[in] matlabIndexing Use Matlab indexing for printing.
171  */
172  void PrintResults(const float threshold = 0.0, const bool matlabIndexing = false) const
173  {
174  if (threshold < 0)
175  {
176  std::cout << "Error. Threshold value must be greater than or equal to 0" << std::endl;
177  }
178  if (threshold==0 && !matlabIndexing)
179  {
181  }
182  else
183  {
184  std::vector<TPairedExtrema> pairs;
185  GetPairedExtrema(pairs, threshold, matlabIndexing);
186  PrintPairs(pairs);
187  }
188 
189  std::cout << "Global minimum value: " << GetGlobalMinimumValue()
190  << " index: " << GetGlobalMinimumIndex(matlabIndexing)
191  << std::endl;
192  }
193 
194 
195  /*!
196  Use this method to get the results of RunPersistence.
197  Returned pairs are sorted according to persistence, from least to most persistent.
198 
199  @param[out] pairs Destination vector for PairedExtrema
200  @param[in] threshold Minimal persistence value of returned features. All PairedExtrema
201  with persistence equal to or above this value will be returned.
202  If left to default, all PairedMaxima will be returned.
203 
204  @param[in] matlabIndexing Set this to true to change all indices of features to Matlab's 1-indexing.
205  */
206  bool GetPairedExtrema(std::vector<TPairedExtrema> & pairs, const float threshold = 0, const bool matlabIndexing = false) const
207  {
208  //make sure the user does not use previous results that do not match the data
209  pairs.clear();
210 
211  if (PairedExtrema.empty() || threshold < 0.0) return false;
212 
213  std::vector<TPairedExtrema>::const_iterator lower_bound = FilterByPersistence(threshold);
214 
215  if (lower_bound == PairedExtrema.end()) return false;
216 
217  pairs = std::vector<TPairedExtrema>(lower_bound, PairedExtrema.end());
218 
219  if (matlabIndexing) //match matlab indices by adding one
220  {
221  for (std::vector<TPairedExtrema>::iterator p = pairs.begin(); p != pairs.end(); p++)
222  {
223  (*p).MinIndex += MATLAB_INDEX_FACTOR;
224  (*p).MaxIndex += MATLAB_INDEX_FACTOR;
225  }
226  }
227  return true;
228  }
229 
230  /*!
231  Use this method to get two vectors with all indices of PairedExterma.
232  Returns false if no paired features were found.
233  Returned vectors have the same length.
234  Overwrites any data contained in min, max vectors.
235 
236  @param[out] min Vector of indices of paired local minima.
237  @param[out] max Vector of indices of paired local maxima.
238  @param[in] threshold Return only indices for pairs whose persistence is greater than or equal to threshold.
239  @param[in] matlabIndexing Set this to true to change all indices to match Matlab's 1-indexing.
240 */
241  bool GetExtremaIndices(std::vector<int> & min, std::vector<int> & max, const float threshold = 0, const bool matlabIndexing = false) const
242  {
243  //before doing anything, make sure the user does not use old results
244  min.clear();
245  max.clear();
246 
247  if (PairedExtrema.empty() || threshold < 0.0) return false;
248 
249  min.reserve(PairedExtrema.size());
250  max.reserve(PairedExtrema.size());
251 
252  int matlabIndexFactor = 0;
253  if (matlabIndexing) matlabIndexFactor = MATLAB_INDEX_FACTOR;
254 
255  std::vector<TPairedExtrema>::const_iterator lower_bound = FilterByPersistence(threshold);
256 
257  for (std::vector<TPairedExtrema>::const_iterator p = lower_bound; p != PairedExtrema.end(); p++)
258  {
259  min.push_back((*p).MinIndex + matlabIndexFactor);
260  max.push_back((*p).MaxIndex + matlabIndexFactor);
261  }
262  return true;
263  }
264  /*!
265  Returns the index of the global minimum.
266  The global minimum does not get paired and is not returned
267  via GetPairedExtrema and GetExtremaIndices.
268  */
269  int GetGlobalMinimumIndex(const bool matlabIndexing = false) const
270  {
271  if (Components.empty()) return -1;
272 
273  assert(Components.front().Alive);
274  if (matlabIndexing)
275  {
276  return Components.front().MinIndex + 1;
277  }
278 
279  return Components.front().MinIndex;
280  }
281 
282  /*!
283  Returns the value of the global minimum.
284  The global minimum does not get paired and is not returned
285  via GetPairedExtrema and GetExtremaIndices.
286  */
287  float GetGlobalMinimumValue() const
288  {
289  if (Components.empty()) return 0;
290 
291  assert(Components.front().Alive);
292  return Components.front().MinValue;
293  }
294  /*!
295  Runs basic sanity checks on results of RunPersistence:
296  - Number of unique minima = number of unique maxima - 1 (Morse property)
297  - All returned indices are unique (no index is returned as two extrema)
298  - Global minimum is within domain indices or at default value
299  - Global minimum is not returned as any other extrema.
300  - Global minimum is not paired.
301 
302  Returns true if run results pass these sanity checks.
303  */
305  {
306  bool flag = true;
307  std::vector<int> min, max;
308  std::vector<int> combinedIndices;
309 
310  GetExtremaIndices(min, max);
311 
312  int globalMinIdx = GetGlobalMinimumIndex();
313 
314  std::sort(min.begin(), min.end());
315  std::sort(max.begin(), max.end());
316  combinedIndices.reserve(min.size() + max.size());
317  std::set_union(min.begin(), min.end(), max.begin(), max.end(), std::inserter(combinedIndices, combinedIndices.begin()));
318 
319  //check the combined unique indices are equal to size of min and max
320  if (combinedIndices.size() != (min.size() + max.size()) ||
321  std::binary_search(combinedIndices.begin(), combinedIndices.end(), globalMinIdx) == true)
322  {
323  flag = false;
324  }
325 
326  if ((globalMinIdx > (int)Data.size()-1) || (globalMinIdx < -1)) flag = false;
327  if (globalMinIdx == -1 && min.size() != 0) flag = false;
328 
329  std::vector<int>::iterator minUniqueEnd = std::unique(min.begin(), min.end());
330  std::vector<int>::iterator maxUniqueEnd = std::unique(max.begin(), max.end());
331 
332  if (minUniqueEnd != min.end() ||
333  maxUniqueEnd != max.end() ||
334  (minUniqueEnd - min.begin()) != (maxUniqueEnd - max.begin()))
335  {
336  flag = false;
337  }
338 
339  return flag;
340  }
341 
342 protected:
343  /*!
344  Contain a copy of the original input data.
345  */
346  std::vector<float> Data;
347 
348 
349  /*!
350  Contains a copy the value and index pairs of Data, sorted according to the data values.
351  */
352  std::vector<TIdxAndData> SortedData;
353 
354 
355  /*!
356  Contains the Component assignment for each vertex in Data.
357  Only edges of destroyed components are updated to the new component color.
358  The Component values in this vector are invalid at the end of the algorithm.
359  */
360  std::vector<int> Colors; //need to init to empty
361 
362 
363  /*!
364  A vector of Components.
365  The component index within the vector is used as its Colors in the Watershed function.
366  */
367  std::vector<TComponent> Components;
368 
369 
370  /*!
371  A vector of paired extrema features - always a minimum and a maximum.
372  */
373  std::vector<TPairedExtrema> PairedExtrema;
374 
375 
376  unsigned int TotalComponents; //keeps track of component vector size and newest component "color"
377  bool AliveComponentsVerified; //Index of global minimum in Data vector. This minimum is never paired.
378 
379 
380  /*!
381  Merges two components by doing the following:
382 
383  - Destroys component with smaller hub (sets Alive=false).
384  - Updates surviving component's edges to span the destroyed component's region.
385  - Updates the destroyed component's edge vertex colors to the survivor's color in Colors[].
386 
387  @param[in] firstIdx,secondIdx Indices of components to be merged. Their order does not matter.
388  */
389  void MergeComponents(const int firstIdx, const int secondIdx)
390  {
391  int survivorIdx, destroyedIdx;
392  //survivor - component whose hub is bigger
393  if (Components[firstIdx].MinValue < Components[secondIdx].MinValue)
394  {
395  survivorIdx = firstIdx;
396  destroyedIdx = secondIdx;
397  }
398  else if (Components[firstIdx].MinValue > Components[secondIdx].MinValue)
399  {
400  survivorIdx = secondIdx;
401  destroyedIdx = firstIdx;
402  }
403  else if (firstIdx < secondIdx) // Both components min values are equal, destroy component on the right
404  // This is done to fit with the left-to-right total ordering of the values
405  {
406  survivorIdx = firstIdx;
407  destroyedIdx = secondIdx;
408  }
409  else
410  {
411  survivorIdx = secondIdx;
412  destroyedIdx = firstIdx;
413  }
414 
415  //survivor and destroyed are decided, now destroy!
416  Components[destroyedIdx].Alive = false;
417 
418  //Update the color of the edges of the destroyed component to the color of the surviving component.
419  Colors[Components[destroyedIdx].RightEdgeIndex] = survivorIdx;
420  Colors[Components[destroyedIdx].LeftEdgeIndex] = survivorIdx;
421 
422  //Update the relevant edge index of surviving component, such that it contains the destroyed component's region.
423  if (Components[survivorIdx].MinIndex > Components[destroyedIdx].MinIndex) //destroyed index to the left of survivor, update left edge
424  {
425  Components[survivorIdx].LeftEdgeIndex = Components[destroyedIdx].LeftEdgeIndex;
426  }
427  else
428  {
429  Components[survivorIdx].RightEdgeIndex = Components[destroyedIdx].RightEdgeIndex;
430  }
431  }
432 
433  /*!
434  Creates a new PairedExtrema from the two indices, and adds it to PairedFeatures.
435 
436  @param[in] firstIdx, secondIdx Indices of vertices to be paired. Order does not matter.
437  */
438  void CreatePairedExtrema(const int firstIdx, const int secondIdx)
439  {
440  TPairedExtrema pair;
441 
442  //There might be a potential bug here, todo (we're checking data, not sorted data)
443  //example case: 1 1 1 1 1 1 -5 might remove if after else
444  if (Data[firstIdx] > Data[secondIdx])
445  {
446  pair.MaxIndex = firstIdx;
447  pair.MinIndex = secondIdx;
448  }
449  else if (Data[secondIdx] > Data[firstIdx])
450  {
451  pair.MaxIndex = secondIdx;
452  pair.MinIndex = firstIdx;
453  }
454  //both values are equal, choose the left one as the min
455  else if (firstIdx < secondIdx)
456  {
457  pair.MinIndex = firstIdx;
458  pair.MaxIndex = secondIdx;
459  }
460  else
461  {
462  pair.MinIndex = secondIdx;
463  pair.MaxIndex = firstIdx;
464  }
465 
466  pair.Persistence = Data[pair.MaxIndex] - Data[pair.MinIndex];
467 
468 #ifdef _DEBUG
469  assert(pair.Persistence >= 0);
470 #endif
471  if (PairedExtrema.capacity() == PairedExtrema.size())
472  {
473  PairedExtrema.reserve(PairedExtrema.size() * 2 + 1);
474  }
475 
476  PairedExtrema.push_back(pair);
477  }
478 
479 
480  // Changing the alignment of the next Doxygen comment block breaks its formatting.
481 
482  /*! Creates a new component at a local minimum.
483 
484  Neighboring vertices are assumed to have no color.
485  - Adds a new component to the components vector,
486  - Initializes its edges and minimum index to minIdx.
487  - Updates Colors[minIdx] to the component's color.
488 
489  @param[in] minIdx Index of a local minimum.
490  */
491  void CreateComponent(const int minIdx)
492  {
493  TComponent comp;
494  comp.Alive = true;
495  comp.LeftEdgeIndex = minIdx;
496  comp.RightEdgeIndex = minIdx;
497  comp.MinIndex = minIdx;
498  comp.MinValue = Data[minIdx];
499 
500  //place at the end of component vector and get the current size
501  if (Components.capacity() <= TotalComponents)
502  {
503  Components.reserve(2 * TotalComponents + 1);
504  }
505 
506  Components.push_back(comp);
507  Colors[minIdx] = TotalComponents;
508  TotalComponents++;
509  }
510 
511 
512  /*!
513  Extends the component's region by one vertex:
514 
515  - Updates the matching component's edge to dataIdx..
516  - updates Colors[dataIdx] to the component's color.
517 
518  @param[in] componentIdx Index of component (the value of a neighboring vertex in Colors[]).
519  @param[in] dataIdx Index of vertex which the component is extended to.
520  */
521  void ExtendComponent(const int componentIdx, const int dataIdx)
522  {
523 #ifdef _DEUBG
524  assert(Components[componentIdx].Alive == true)
525 #endif
526 
527  //extend to the left
528  if (dataIdx + 1 == Components[componentIdx].LeftEdgeIndex)
529  {
530  Components[componentIdx].LeftEdgeIndex = dataIdx;
531  }
532  //extend to the right
533  else if (dataIdx - 1 == Components[componentIdx].RightEdgeIndex)
534  {
535  Components[componentIdx].RightEdgeIndex = dataIdx;
536  }
537  else
538  {
539 #ifdef _DEUBG
540  std::string errorMessage = "ExtendComponent: index mismatch. Data index: ";
541  errorMessage += std::to_string((long long)dataIdx);
542  throw (errorMessage);
543 #endif
544  }
545 
546  Colors[dataIdx] = componentIdx;
547  }
548 
549 
550  /*!
551  Initializes main data structures used in class:
552  - Sets Colors[] to NO_COLOR
553  - Reserves memory for Components and PairedExtrema
554 
555  Note: SortedData is should be created before, separately, using CreateIndexValueVector()
556  */
557  void Init()
558  {
559  SortedData.clear();
560  SortedData.reserve(Data.size());
561 
562  Colors.clear();
563  Colors.resize(Data.size());
564  std::fill(Colors.begin(), Colors.end(), NO_COLOR);
565 
566  int vectorSize = (int)(Data.size()/RESIZE_FACTOR) + 1; //starting reserved size >= 1 at least
567 
568  Components.clear();
569  Components.reserve(vectorSize);
570 
571  PairedExtrema.clear();
572  PairedExtrema.reserve(vectorSize);
573 
574  TotalComponents = 0;
575  AliveComponentsVerified = false;
576  }
577 
578 
579  /*!
580  Creates SortedData vector.
581  Assumes Data is already set.
582  */
584  {
585  if (Data.size()==0) return;
586 
587  for (std::vector<float>::size_type i = 0; i != Data.size(); i++)
588  {
589  TIdxAndData dataidxpair;
590 
591  //this is going to make problems
592  dataidxpair.Data = Data[i];
593  dataidxpair.Idx = (int)i;
594 
595  SortedData.push_back(dataidxpair);
596  }
597 
598  std::sort(SortedData.begin(), SortedData.end());
599  }
600 
601 
602  /*!
603  Main algorithm - all of the work happen here.
604 
605  Use only after calling CreateIndexValueVector and Init functions.
606 
607  Iterates over each vertex in the graph according to their ordered values:
608  - Creates a segment for each local minima
609  - Extends a segment is data has only one neighboring component
610  - Merges segments and creates new PairedExtrema when a vertex has two neighboring components.
611  */
612  void Watershed()
613  {
614  if (SortedData.size()==1)
615  {
616  CreateComponent(0);
617  return;
618  }
619 
620  for (std::vector<TIdxAndData>::iterator p = SortedData.begin(); p != SortedData.end(); p++)
621  {
622  int i = (*p).Idx;
623 
624  //left most vertex - no left neighbor
625  //two options - either local minimum, or extend component
626  if (i==0)
627  {
628  if (Colors[i+1] == NO_COLOR)
629  {
630  CreateComponent(i);
631  }
632  else
633  {
634  ExtendComponent(Colors[i+1], i); //in this case, local max as well
635  }
636 
637  continue;
638  }
639  else if (i == Colors.size()-1) //right most vertex - look only to the left
640  {
641  if (Colors[i-1] == NO_COLOR)
642  {
643  CreateComponent(i);
644  }
645  else
646  {
647  ExtendComponent(Colors[i-1], i);
648  }
649  continue;
650  }
651 
652  //look left and right
653  if (Colors[i-1] == NO_COLOR && Colors[i+1] == NO_COLOR) //local minimum - create new component
654  {
655  CreateComponent(i);
656  }
657  else if (Colors[i-1] != NO_COLOR && Colors[i+1] == NO_COLOR) //single neighbor on the left - extnd
658  {
659  ExtendComponent(Colors[i-1], i);
660  }
661  else if (Colors[i-1] == NO_COLOR && Colors[i+1] != NO_COLOR) //single component on the right - extend
662  {
663  ExtendComponent(Colors[i+1], i);
664  }
665  else if (Colors[i-1] != NO_COLOR && Colors[i+1] != NO_COLOR) //local maximum - merge components
666  {
667  int leftComp, rightComp;
668 
669  leftComp = Colors[i-1];
670  rightComp = Colors[i+1];
671 
672  //choose component with smaller hub destroyed component
673  if (Components[rightComp].MinValue < Components[leftComp].MinValue) //left component has smaller hub
674  {
675  CreatePairedExtrema(Components[leftComp].MinIndex, i);
676  }
677  else //either right component has smaller hub, or hubs are equal - destroy right component.
678  {
679  CreatePairedExtrema(Components[rightComp].MinIndex, i);
680  }
681 
682  MergeComponents(leftComp, rightComp);
683  Colors[i] = Colors[i-1]; //color should be correct at both sides at this point
684  }
685  }
686  }
687 
688 
689  /*!
690  Sorts the PairedExtrema list according to the persistence of the features.
691  Orders features with equal persistence according the the index of their minima.
692  */
694  {
695  std::sort(PairedExtrema.begin(), PairedExtrema.end());
696  }
697 
698 
699  /*!
700  Returns an iterator to the first element in PairedExtrema whose persistence is bigger or equal to threshold.
701  If threshold is set to 0, returns an iterator to the first object in PairedExtrema.
702 
703  @param[in] threshold Minimum persistence of features to be returned.
704  */
705  std::vector<TPairedExtrema>::const_iterator FilterByPersistence(const float threshold = 0) const
706  {
707  if (threshold == 0 || threshold < 0) return PairedExtrema.begin();
708 
709  TPairedExtrema searchPair;
710  searchPair.Persistence = threshold;
711  searchPair.MaxIndex = 0;
712  searchPair.MinIndex = 0;
713  return(lower_bound(PairedExtrema.begin(), PairedExtrema.end(), searchPair));
714  }
715  /*!
716  Runs at the end of RunPersistence, after Watershed.
717  Algorithm results should be as followed:
718  - All but one components should not be Alive.
719  - The Alive component contains the global minimum.
720  - The Alive component should be the first component in the Component vector
721  */
723  {
724  //verify that the Alive component is component #0 (contains global minimum by definition)
725  if ((*Components.begin()).Alive != true)
726  {
727 
728 #ifndef _DEBUG
729  return false;
730 #endif
731 #ifdef _DEBUG
732  throw "Error. Component 0 is not Alive, assumed to contain global minimum";
733 #endif
734  }
735 
736  for (std::vector<TComponent>::const_iterator it = Components.begin()+1; it != Components.end(); it++)
737  {
738  if ((*it).Alive == true)
739  {
740 
741 #ifndef _DEBUG
742  return false;
743 #endif
744 #ifdef _DEBUG
745  throw "Error. Found more than one alive component";
746 #endif
747  }
748  }
749 
750  return true;
751  }
752 };
753 }
754 #endif