# Calculate total words per subtitle block

'total_words'

=

'text'

.

lambda

.

# for minutes without subtitle blocks at all.

# Roll the dataframe based on ROLL_MINS so that the output is less noisy, the value of every minute is now calculated

# As a mean

# Group the dataframe based on subtitle minute, and get sum of total_words per group

=

.

.

=

'min'

.

'total_words'

# Reindex the dataframe for a minute based index (0 to last-min)

# this is used so we'll also have a representation for minutes without any subtitle blocks

.

.

0

.

'min'

# therefore the next step is to fill those empty indices with 0 total words

.

0

# Roll the dataframe based on roll mins, making the output less noisy

.

# Solve the roll using mean value - every minutes "total words" will now be

# the average total words of the last ROLL_MINS

.

.

0

# Use np.roll measure see the backwards change (current - previous) in total words

# and the forwards change in total words (next - current)

=

'total_words'

-

.

'total_words'

1

=

.

'total_words'

-

1

-

'total_words'

# Find maximum and minimum points, define extrema points as either one

=

>

0

&

<

0

=

<

0

&

>

0

=

|

# Create DF for extrema points, calculate distance of every extrema from the mean words

=

.

.

.

'total_words'

-

'total_words'

.

# Chunk into segments, group by the segments and get index of extrema that's farthest away from the mean

'segment'

=

.

.

/

.

=

.

'segment'

.

=

'total_words'

.

# Information for plot, x y for line and x y for each extrema dot

=

.

=

'total_words'

=

'segment_top_extrema_min'

=

.

'segment_top_extrema_min'

'total_words'