Finding Peaks

It didn't compile

No tests

Inconsistent indenting and formatting

Useless comments and commented-out code

Non-descriptive naming

Loooong functions

Poor structure

Cumbersome UI

It didn't compile

No tests

Inconsistent indenting and formatting

Useless comments and commented-out code

Non-descriptive naming

Loooong functions

Poor structure

Cumbersome UI

read_data()

print_data()

rainflow_engine()

a

y

calculate()

void Rainflow::calculate(void) {

vector<float> a; double slope1; double slope2; ymax = 0.; nkv = 0; k = 0; // a[k]=y[k]; a.push_back(y[k]); k = 1; for (i = 1; i < (y.size() - 1); i++) { slope1 = (y[i] - y[i - 1]); slope2 = (y[i + 1] - y[i]); if ((slope1 * slope2) <= 0. && fabs(slope1) > 0.) { a.push_back(y[i]); k++; } } a.push_back(y.back()); k++;

a

for

a

a

a

peaks

ymax

nkv

k

peaks

peaks

k

peaks.size()

i

i

print_data()

void Rainflow::find_peaks(vector<float> *peaks) { peaks->push_back(y[0]); unsigned i; for (i = 1; i < (y.size() - 1); i++) { double slope1 = (y[i] - y[i - 1]); double slope2 = (y[i + 1] - y[i]); if ((slope1 * slope2) <= 0. && fabs(slope1) > 0.) { peaks->push_back(y[i]); } } peaks->push_back(y.back()); }

calculate()

last_a = peaks.size() - 1; hold = last_a; long WIDTH = 4; long HEIGHT = 0; B.resize(HEIGHT); unsigned i; for (i = 0; i < HEIGHT; i++) { B[i].resize(WIDTH); } // printf(" H=%ld W=%ld ",HEIGHT,WIDTH); // getch();

last_a

hold

hold

HEIGHT

mina = 100000; maxa = -mina; unsigned i; for (i = 0; i < peaks.size(); i++) { if (peaks[i] < mina) { mina = peaks[i]; } if (peaks[i] > maxa) { maxa = peaks[i]; } // fprintf(pFile[5]," %8.4g

",a[i]); } num = long(maxa - mina) + 1;

n = 0; unsigned i = 0; j = 1; sum = 0; kv = 0; long LLL = peaks.size(); std::vector<float> row(4); printf("

percent completed

"); ymax = 0.; nkv = 0; while (1) { // ... bulk of code omitted for now if ((j + 1) >= peaks.size()) { break; } }

n

j

i + 1

i + 1

total_cycles

calculate()

calculate()

total_cycles

kv

size()

LLL

nkv

LLL

row

ymax

row

ymax

max_cycle_amplitude

Finding Cycles

while ((i + 2) < peaks.size()) { Y = (fabs(peaks[i] - peaks[i + 1])); X = (fabs(peaks[i + 1] - peaks[i + 2])); if (X >= Y && Y > 0 && Y < 1.0e+20) { if (Y > max_cycle_amplitude) { max_cycle_amplitude = Y; } if (i == 0) { total_cycles += 0.5; row[3] = peaks[i + 1]; row[2] = peaks[i]; row[1] = 0.5; row[0] = Y; B.push_back(row); // printf("1 %8.4g %8.4g %8.4g %8.4g //

",B[kv][0],B[kv][1],B[kv][2],B[kv][3]); peaks.erase(peaks.begin()); i = 0; } else { total_cycles += 1; row[3] = peaks[i + 1]; row[2] = peaks[i]; row[1] = 1.; row[0] = Y; B.push_back(row); // printf("2 %8.4g %8.4g %8.4g %8.4g //

",B[kv][0],B[kv][1],B[kv][2],B[kv][3]); peaks.erase(peaks.begin() + (i + 1)); peaks.erase(peaks.begin() + i); i = 0; } } else { i++; } }

total_cycles

max_cycle_amplitude

float max_cycle_amplitude = 0.; float total_cycles = 0.; for (i = 0; i < B.size(); i++) { Y = B[i][0]; if (Y > max_cycle_amplitude) { max_cycle_amplitude = Y; } total_cycles += B[i][1]; }

B

cycles

Y

X

delta1

delta2

delta1

amplitude

row[3] = peaks[i + 1]; row[2] = peaks[i]; row[1] = (i == 0) ? 0.5 : 1.0; row[0] = delta1; cycles.push_back(row); peaks.erase(peaks.begin() + i); if (i != 0) { peaks.erase(peaks.begin() + i); } i = 0;

row

struct

typedef struct Cycle { float peak1; float peak2; float size; float amplitude; } Cycle;

struct

row

find_cycles()

void Rainflow::find_cycles(vector<float> &peaks, vector<cycle> * cycles) { unsigned i = 0; Cycle cycle; while ((i + 2) < peaks.size()) { float delta1 = (fabs(peaks[i] - peaks[i + 1])); float delta2 = (fabs(peaks[i + 1] - peaks[i + 2])); if (delta2 >= delta1 && delta1 > 0 && delta1 < 1.0e+20) { cycle.peak1 = peaks[i]; cycle.peak2 = peaks[i + 1]; cycle.size = (i == 0) ? 0.5 : 1.0; cycle.amplitude = delta1; cycles->push_back(cycle); peaks.erase(peaks.begin() + i); if (i != 0) { peaks.erase(peaks.begin() + i); } i = 0; } else { i++; } } for (i = 0; i < peaks.size(); i++) { float delta = (fabs(peaks[i] - peaks[i + 1])); if (delta > 0. && delta < 1.0e+20) { cycle.peak1 = peaks[i]; cycle.peak2 = peaks[i + 1]; cycle.size = 0.5; cycle.amplitude = delta; cycles->push_back(cycle); } } }

calculate()

Calculating Cycle Statistics

float amplitude = cycles[i].amplitude; if (amplitude > max_cycle_amplitude) { max_cycle_amplitude = amplitude; }

max_cycle_amplitude = max(max_cycle_amplitude, cycles[i].amplitude);

L[1] = 0; L[2] = 2.5; L[3] = 5; L[4] = 10; L[5] = 15; L[6] = 20; L[7] = 30; L[8] = 40; L[9] = 50; L[10] = 60; L[11] = 70; L[12] = 80; L[13] = 90; L[14] = 100; for (ijk = 1; ijk <= 14; ijk++) { L[ijk] *= max_cycle_amplitude / 100.; C[ijk] = 0.; AverageMean[ijk] = 0.; MaxPeak[ijk] = -1.0e+20; MinValley[ijk] = 1.0e+20; MaxAmp[ijk] = -1.0e+20; AverageAmp[ijk] = 1.0e+20; // printf(" L[%ld]=%g max_cycle_amplitude=%8.4g

",ijk,L[ijk],max_cycle_amplitude); } // getch(); for (ijk = 13; ijk >= 0; ijk--) { AverageAmp[ijk] = 0.; }

L

_bins

C

_cycle_counts

_average_means

_max_peaks

_min_valleys

_max_amps

_average_amps

ijk

i

_bins

L

double bin_percentages[] = {0, 2.5, 5, 10, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100}; size_t bin_count = sizeof(bin_percentages)/sizeof(bin_percentages[0]); _bins.resize(bin_count); for (i = 0; i < 14; i++) { _bins[i] = bin_percentages[i] * max_cycle_amplitude / 100.; } _cycle_counts.assign(bin_count, 0.); _average_means.assign(bin_count, 0.); _max_peaks.assign(bin_count, -1.0e+20); _min_valleys.assign(bin_count, 1.0e+20); _max_amps.assign(bin_count, -1.0e+20); _average_amps.assign(bin_count, 0.);

calculate()

for (i = 0; i < cycles.size(); i++) { float amplitude = cycles[i].amplitude; // printf(" %ld %ld %10.4e \t %3.1f

",i,kv,delta1,cycles[i][1]); // printf("i=%d delta1=%g

",i,Y); for (ijk = 12; ijk >= 0; ijk--) { // printf(" %8.4g %8.4g %8.4g

",delta1,L[ijk],L[ijk+1]); if (amplitude >= _bins[ijk] && amplitude <= _bins[ijk + 1]) { _cycle_counts[ijk] = _cycle_counts[ijk] + cycles[i].size; _average_means[ijk] += cycles[i].size * (cycles[i].peak1 + cycles[i].peak2) * 0.5; // weighted average if (cycles[i].peak1 > _max_peaks[ijk]) { _max_peaks[ijk] = cycles[i].peak1; } if (cycles[i].peak2 > _max_peaks[ijk]) { _max_peaks[ijk] = cycles[i].peak2; } if (cycles[i].peak1 < _min_valleys[ijk]) { _min_valleys[ijk] = cycles[i].peak1; } if (cycles[i].peak2 < _min_valleys[ijk]) { _min_valleys[ijk] = cycles[i].peak2; } if (amplitude > _max_amps[ijk]) { _max_amps[ijk] = amplitude; } _average_amps[ijk] += cycles[i].size * amplitude * 0.5; break; } } } for (ijk = 0; ijk < 13; ijk++) { if (_cycle_counts[ijk] > 0) { _average_means[ijk] /= _cycle_counts[ijk]; _average_amps[ijk] /= _cycle_counts[ijk]; } _max_amps[ijk] /= 2.; if (_cycle_counts[ijk] < 0.5) { _average_amps[ijk] = 0.; _max_amps[ijk] = 0.; _average_means[ijk] = 0.; _min_valleys[ijk] = 0.; _max_peaks[ijk] = 0.; } // printf(" %8.4g %8.4g %8.4g %8.4g %8.4g //

",_average_amps[ijk],_max_amps[ijk],_average_means[ijk],_min_valleys[ijk],_max_peaks[ijk]); } printf("



Total Cycles = %g NP=%ld max_cycle_amplitude=%g

", total_cycles, y.size(), max_cycle_amplitude);

ijk

i

j

_max_peaks[j] = max( max(cycles[i].peak1, cycles[i].peak2), _max_peaks[j] );

max()

min()

void Rainflow::calculate_statistics(vector<cycle> const &cycles) { unsigned i = 0; float max_cycle_amplitude = 0.; float total_cycles = 0.; for (i = 0; i < cycles.size(); i++) { max_cycle_amplitude = max(max_cycle_amplitude, cycles[i].amplitude); total_cycles += cycles[i].size; } double bin_percentages[] = {0, 2.5, 5, 10, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100}; size_t bin_count = sizeof(bin_percentages)/sizeof(bin_percentages[0]); _bins.resize(bin_count); for (i = 0; i < 14; i++) { _bins[i] = bin_percentages[i] * max_cycle_amplitude / 100.; } _cycle_counts.assign(bin_count, 0.); _average_means.assign(bin_count, 0.); _max_peaks.assign(bin_count, -1.0e+20); _min_valleys.assign(bin_count, 1.0e+20); _max_amps.assign(bin_count, -1.0e+20); _average_amps.assign(bin_count, 0.); for (i = 0; i < cycles.size(); i++) { float amplitude = cycles[i].amplitude; for (unsigned j = 0; j < 13; j++) { if (amplitude >= _bins[j] && amplitude <= _bins[j + 1]) { _cycle_counts[j] += cycles[i].size; _average_means[j] += cycles[i].size * (cycles[i].peak1 + cycles[i].peak2) * 0.5; // weighted average _max_peaks[j] = max( max(cycles[i].peak1, cycles[i].peak2), _max_peaks[j] ); _min_valleys[j] = min( min(cycles[i].peak1, cycles[i].peak2), _min_valleys[j] ); _max_amps[j] = max(_max_amps[j], amplitude); _average_amps[j] += cycles[i].size * amplitude * 0.5; break; } } } for (i = 0; i < 13; i++) { if (_cycle_counts[i] > 0) { _average_means[i] /= _cycle_counts[i]; _average_amps[i] /= _cycle_counts[i]; } _max_amps[i] /= 2.; if (_cycle_counts[i] < 0.5) { _average_amps[i] = 0.; _max_amps[i] = 0.; _average_means[i] = 0.; _min_valleys[i] = 0.; _max_peaks[i] = 0.; } } printf("



Total Cycles = %g NP=%ld max_cycle_amplitude=%g

", total_cycles, y.size(), max_cycle_amplitude); }

calculate()

void Rainflow::calculate(void) { vector<float> peaks; find_peaks(&peaks); vector<Cycle> cycles; find_cycles(peaks, &cycles); calculate_statistics(cycles); }

calculate()

Odds and Ends

read_data()

print_data()

fp

pFile

y

_points

class Rainflow { private: vector _bins; vector _points; vector _cycle_counts; vector _average_means; vector _max_peaks; vector _min_valleys; vector _max_amps; vector _average_amps;

To quickly recap, we're refactoring some code I found that implements a rainflow counting algorithm. I used this code as a model to implement a real time algorithm for use in a data acquisition system, but before I could do that effectively, I needed to basically rewrite the code so that I could understand it better. I started off by making some tests and running it through an auto-formatter. Then I improved the UI so that I could run the model more quickly. Now it's time to work through the main part of the algorithm and make it more clear and understandable. Once again, you can view all of the commits at my GitHub repo to see side-by-side diffs of the changes to the code.When I first introduced the code, I identified these problem areas:The first two posts took care of about half of them, so we're left with these:Each of the remaining issues have been partially addressed as well, but only in theandmethods that have already been refactored. It's time to finish the job in the core function of this program that does all of the calculations on the data,. This function is huge, at over 260 LOC, and that alone makes it difficult to comprehend. On top of that, a bunch of essentially global variables are used with super short names likeand. We'll fix these issues along with a number of other weaknesses in the code.For starters, let's rename the method. Currently it's a noun, but method names should really be verbs. It should describe what the method is doing. Plus, it's kind of redundant with the name of the class. Since this is the core method that calculates the result, I'm going to go withNow, because this method is so long and changes need to be done to declarations as well as definitions and usages, I'm not going to show all code changes here. It's too much, and I don't want to bore you with tedious detail. I'll try to stick to the highlights. The first chunk of code in this method is:Let's walk through this a bit. The first variable declared is called. What does that mean? Looking down in theloop, we see thatis getting a list of points that meet a certain criteria. That criteria specifies that the slopes on either side of the point must be opposite signs, and the first slope must not be zero. That meansis a list of peaks with the intervening points removed. Let's renametoto make that more clear.Next, the slope variables are declared, but they're only used inside the for loop. Let's move the declarations inside the loop to remove clutter. The next line that assignsis not necessary until later in the method, so we'll move it down and deal with it later. The same is true of theassignment. Both lines move down to immediately before the first while loop.Next is. This variable is working way too hard. It's keeping track of the size of, but theobject already does that. Plus,is a member of the class, but it's only used in this chunk of code! We need to get rid of it, and useinstead. We can also get rid of the commented out code.The for loop looks okay after removing the superfluous variable, but notice the loop counter. Where is that declared? As a class member, of course. Let's move that declaration inside this method and fix up the other occurrence ofinas well.Once all of these changes have been made, this section of calculate() does one thing. It finds the peaks in the data set. We can make this more clear by pulling out the code into a separate method:Before moving on, this is a good place to test and commit these changes. Now we can get back to the next chunk of code inOnce again, we have variables tracking things that are already tracked in objects. Bothandcan be removed, and sincewas only used to print part of the summary to the console—a part that's redundant anyway—we can remove that part of the print statement, too. Then, the rest of this code can just be deleted because it doesn't actually do anything. Look at. It's zero, and it's used as the loop boundary. Nothing happens in the for loop, so I just deleted it. I also deleted the commented out code again.The next section of code:is never used anywhere else, so that gets deleted, too. Don't forget to delete the class member declarations. We're on a roll now. Here's the next chunk of code:I left out most of the code in the while loop for now so that the structure of the loop condition is more obvious, but let's start with the assignments at the top. First,is not used anywhere, just assigned to zero multiple times. Maybe it had been used at one time, but not anymore so it needs to be removed. Next,is always, so it can be replaced withto make the code clearer. Then, sum is not a great choice for what that variable is tracking. It's actually keeping a tally of the total number of cycles found, so I would rename it to. Also, it's a class member, but it's only use outside ofis as part of the summary printed to the console. I can move the summary print statement to the end ofand makea local variable.Moving on,is another variable that tracks the size of a vector, so it can be replaced with a call to the vector's. Now we get to. Along withis used to report a percentage complete for the while loop. All of my runs ran super fast, so even though it may take much longer for runs with millions of data points, I'm willing to remove this reporting for the sake of simplicity. Besides, the only case where the loop would take longer than reading in the data is when there are a huge number of unclosed cycles in the data, which is quite unlikely.That leaves us withand. Sinceis a bit more complicated, we'll skip it for now. As for, it could be named better. It's keeping track of the maximum cycle amplitude, so why don't we name itinstead. We can also make it a local variable.Now we're at the while loop. I find it strange to have it specified as an infinite loop and then only break out of it on a simple condition at the end. Let's move that condition up into the while loop conditional where it belongs and invert the logic. It's a good time to commit changes, so we should make sure the tests still pass and do a commit. I've actually been checking the tests throughout these little changes to make sure I don't make any stupid mistakes.Let's take a look inside this while loop now:There is a lot going on here, but the main thing this while loop is doing is finding cycles on a pass through the vector of peaks. I see a lot of repetition, and part of that repetition is created by keeping track ofandinside the loop. Let's try pulling these metrics out and putting them in their own loop after all of the cycles are found:We can also remove the commented out code right away, and we should really do some more renaming.should be renamed tobecause it's a vector of cycles, andandshould be renamed toandbecause they are the differences between adjacent peaks. They should all be made local, too. After doing that, I renamedin later loops tobecause the context of those loops was slightly different, and that name made more sense.Next, notice that the inner if-else statement is nearly the same in both branches. This large if-else statement can be simplified and the repetition removed by focusing the condition on the statements that differ between the two branches, like so:The next issue to tackle is thevector. Using an array just isn't very expressive, and assigning each of the different data items to different elements calls out for a more structured variable type. Let's make awith more descriptive names for each of the sub parts:Then we can use thisin place of thevector wherever it was used. With that change the while loop, along with the for loop that follows it, is nicely self-contained, and it should be made into its own method. The for loop runs through the leftover peaks that were not matched up to get full cycles, so it is the second half of the cycle finding phase of the algorithm. It was refactored along with the earlier renaming changes, so it's already cleaned up. The wrapped up method can be calledThis method is a little longer than I would like, and it could be easily broken up into two parts. But this is definitely an improvement, so let's move on. The rest of themethod involves calculating statistics on the cycles that were found. We'll crack that nut after taking a break to commit our changes.The first statistics that we calculate on the vector of cycles are the max amplitude and total cycles that we had previously pushed out of the loop that finds cycles. Since these are separate values, they could be broken up into separate loops and put into their own methods, but that's a small improvement, and we've got bigger fish to fry. We can do a quick change to use the standard max() function inside the loop, though:becomesThen we have another chunk of code that initializes the arrays that are used to store the various statistics:Again, we want to rename these variables to something more descriptive.is an array of bin boundaries, so we'll rename it. Why the underscore at the beginning of the name? Because this variable will be kept as a class member, and we want it to be obvious that it is a member and not a local variable. To do that, I normally use a leading underscore.is an array of cycle counts for the corresponding bins, so we can call it. We can rename the rest of the variables to be consistent by making them plural and snake case:, and. And let's rename the oddindex to just be. We can also get rid of the commented out code again.Now what can we do with the initialization of(previously)? My idea to clean it up was to make another array of percentages that's initialized by a literal array, make _bins a vector, and resize the vector to the calculated size of the percentage array. Then, all of the other arrays could also become vectors, and they could simply be initialized with vector method calls. The end result of all of this shuffling around is:Much nicer, and if we want to change the bins for any reason, we only need to change the literal array of percentage values. Everything else adjusts automatically to those changes. Now we can wrap up the last chunk of code inFirst things first, delete the commented out code and renametoor, as appropriate. After that, notice that this code is simply calculating some statistics on the cycles found—maximums, minimums, averages, etc. We can clean up the max and min lines with function calls:With theandfunctions, we can replace 6 lines of code with a single line. And with those refactorings, we are ready to wrap all of this statistics calculation code into a method:Again, this method is a bit longer than I would like, but in the interest of time, I'll leave it like this. It could easily be broken down into a few separate methods to simplify it further. That brings us to the end of the massivemethod, and all we're left with is this:Of course, there's a fair amount of code behind these three methods, but the flow of themethod is now quite obvious and easy to understand. It's time to commit these changes and clean up a couple of additional things.Remember way back in the last article when we left an array of file pointers that was used inandas a class member? We should fix that now. Since both uses of the file pointers are now local to methods, we can rename them toand remove the class member. We can also rename the non-descriptivemember tosince that's what it actually is, a vector of points. After removing the other members that are no longer used, we are left with these:That's much more manageable than it was before with 34(!) members. In fact, the entire program is much more manageable now that we've pared it down from 593 lines to 240 lines. It's cleaner, more comprehensible, and easier to use. Now that it's refactored, it is much easier to add new functionality to this class, which I proceeded to do with much higher efficiency in the project I was working on. This model went from barely understandable to highly useful within a matter of hours, certainly time I consider well spent.I'll admit this was a lot to take in for an example, but I wanted to show an example of refactoring as it might actually be done in the real world instead of just looking at a set of small, contrived examples. It shows how even if you're up against what looks like an absolute mess of code that you don't want to even look at, much less work with for too long, you can make quick, measured progress on cleaning it up with a methodical work flow. Get the code compiling, write some simple tests, and put it under version control right away. Then start walking through, renaming variables, extracting functions, and making use of libraries as you see fit. Obviously, larger projects will take much more effort, but they can still be tackled in chunks to make real progress. For more ideas on different refactorings, check out the classic Refactoring by Martin Fowler . You'll be happy when you rewrite code to something you can live with.