from

sklearn.datasets

import

fetch_mldata

from

sklearn.model_selection

import

train_test_split

def

load_dataset

():

# We first define a download function, supporting both Python 2 and 3.

mnist

=

fetch_mldata

(

'MNIST original'

)

data

=

mnist

[

'data'

]

.

reshape

((

70000

,

1

,

28

,

28

))

target

=

mnist

[

'target'

]

# We can now download and read the training and test set images and labels.

X_train

,

X_test

,

y_train

,

y_test

=

train_test_split

(

data

,

target

,

test_size

=

10000

)

# We reserve the last 10000 training examples for validation.

X_train

,

X_val

=

X_train

[:

-

10000

],

X_train

[

-

10000

:]

y_train

,

y_val

=

y_train

[:

-

10000

],

y_train

[

-

10000

:]

# We just return all the arrays in order, as expected in main().

# (It doesn't matter how we do this as long as we can read them again.)

return

X_train

,

y_train

,

X_val

,

y_val

,

X_test

,

y_test

print

(

"Loading data..."

)

X_train

,

y_train

,

X_val

,

y_val

,

X_test

,

y_test

=

load_dataset

()

total_size

=

X_train

.

shape

[

0

]