Merge remote-tracking branch 'upstream/master'

# Conflicts: # .travis.yml
MarcBS · Feb 27, 2019 · a271eac · a271eac
2 parents dc19431 + d48e970
commit a271eac
Show file tree

Hide file tree

Showing 23 changed files with 347 additions and 126 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -10,6 +10,8 @@ matrix:
           env: KERAS_BACKEND=tensorflow TEST_MODE=INTEGRATION_TESTS PIL=Pillow
         - python: 3.6
           env: KERAS_BACKEND=tensorflow TEST_MODE=PEP8_DOC PIL=Pillow
+        - python: 3.6
+          env: KERAS_BACKEND=tensorflow TEST_MODE=API
         - python: 2.7
           env: KERAS_BACKEND=tensorflow
         - python: 3.6
@@ -58,6 +60,9 @@ install:
   # install mkdocs
   - pip install mkdocs --progress-bar off
 
+  # install pyux
+  - pip install pyux
+
 # command to run tests
 script:
   - export MKL_THREADING_LAYER="GNU"
@@ -72,8 +77,10 @@ script:
       PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests;
     elif [[ "$TEST_MODE" == "PEP8_DOC" ]]; then
       PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0 && py.test tests/docs;
+    elif [[ "$TEST_MODE" == "API" ]]; then
+      PYTHONPATH=$PWD:$PYTHONPATH pip install git+git://www.github.com/keras-team/keras.git && python update_api.py && pip install -e .[tests] --progress-bar off && py.test tests/test_api.py;
     elif [[ "$RUN_ONLY_BACKEND_TESTS" == "1" ]]; then
       PYTHONPATH=$PWD:$PYTHONPATH py.test  tests/keras/backend/;
     else
-      PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --ignore=tests/docs --ignore=tests/keras/legacy/layers_test.py --cov-config .coveragerc --cov=keras tests/;
+      PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --ignore=tests/docs --ignore=tests/keras/legacy/layers_test.py --ignore=tests/test_api.py --cov-config .coveragerc --cov=keras tests/;
     fi
diff --git a/docs/autogen.py b/docs/autogen.py
@@ -92,7 +92,6 @@ def clean_module_name(name):
         name = name.replace('keras_applications', 'keras.applications')
     if name.startswith('keras_preprocessing'):
         name = name.replace('keras_preprocessing', 'keras.preprocessing')
-    assert name[:6] == 'keras.', 'Invalid module name: %s' % name
     return name
 
 

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -85,3 +85,6 @@ nav:
   - Sentiment classification CNN-LSTM: examples/imdb_cnn_lstm.md
   - Fasttext for text classification: examples/imdb_fasttext.md
   - Sentiment classification LSTM: examples/imdb_lstm.md
+  - Sequence to sequence - training: examples/lstm_seq2seq.md
+  - Sequence to sequence - prediction: examples/lstm_seq2seq_restore.md
+  - Stateful LSTM: examples/lstm_stateful.md
diff --git a/docs/templates/applications.md b/docs/templates/applications.md
@@ -742,7 +742,7 @@ These weights are released under [the Apache License](https://github.com/tensorf
 
 
 ```python
-keras.applications.mobilenet_v2.MobileNetV2(input_shape=None, alpha=1.0, depth_multiplier=1, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000)
+keras.applications.mobilenet_v2.MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000)
 ```
 
 MobileNetV2 model, with weights pre-trained on ImageNet.
@@ -771,8 +771,6 @@ The default input size for this model is 224x224.
         of filters in each layer.
     - If `alpha` = 1, default number of filters from the paper
          are used at each layer.
-- depth_multiplier: depth multiplier for depthwise convolution
-      (also called the resolution multiplier)
 - include_top: whether to include the fully-connected
       layer at the top of the network.
 - weights: one of `None` (random initialization),
@@ -804,7 +802,7 @@ A Keras model instance.
 ### Raises
 
 ValueError: in case of invalid argument for `weights`,
-    or invalid input shape or invalid depth_multiplier, alpha,
+    or invalid input shape, alpha,
     rows when weights='imagenet'
 
 ### References

diff --git a/examples/lstm_seq2seq.py b/examples/lstm_seq2seq.py
@@ -1,4 +1,5 @@
-'''Sequence to sequence example in Keras (character-level).
+'''
+#Sequence to sequence example in Keras (character-level).
 
 This script demonstrates how to implement a basic character-level
 sequence-to-sequence model. We apply it to translating
@@ -7,7 +8,7 @@
 do character-level machine translation, as word-level
 models are more common in this domain.
 
-# Summary of the algorithm
+**Summary of the algorithm**
 
 - We start with input sequences from a domain (e.g. English sentences)
     and corresponding target sequences from another domain
@@ -32,21 +33,21 @@
     - Repeat until we generate the end-of-sequence character or we
         hit the character limit.
 
-# Data download
+**Data download**
 
-English to French sentence pairs.
-http://www.manythings.org/anki/fra-eng.zip
+[English to French sentence pairs.
+](http://www.manythings.org/anki/fra-eng.zip)
 
-Lots of neat sentence pairs datasets can be found at:
-http://www.manythings.org/anki/
+[Lots of neat sentence pairs datasets.
+](http://www.manythings.org/anki/)
 
-# References
+**References**
 
-- Sequence to Sequence Learning with Neural Networks
-    https://arxiv.org/abs/1409.3215
-- Learning Phrase Representations using
+- [Sequence to Sequence Learning with Neural Networks
+   ](https://arxiv.org/abs/1409.3215)
+- [Learning Phrase Representations using
     RNN Encoder-Decoder for Statistical Machine Translation
-    https://arxiv.org/abs/1406.1078
+    ](https://arxiv.org/abs/1406.1078)
 '''
 from __future__ import print_function
 

diff --git a/examples/lstm_seq2seq_restore.py b/examples/lstm_seq2seq_restore.py
@@ -1,12 +1,13 @@
-'''Restore a character-level sequence to sequence model from disk and use it
-to generate predictions.
+'''
+#Restore a character-level sequence to sequence model from to generate predictions.
 
-This script loads the s2s.h5 model saved by lstm_seq2seq.py and generates
-sequences from it.  It assumes that no changes have been made (for example:
-latent_dim is unchanged, and the input data and model architecture are unchanged).
+This script loads the ```s2s.h5``` model saved by [lstm_seq2seq.py
+](/examples/lstm_seq2seq/) and generates sequences from it. It assumes
+that no changes have been made (for example: ```latent_dim``` is unchanged,
+and the input data and model architecture are unchanged).
 
-See lstm_seq2seq.py for more details on the model architecture and how
-it is trained.
+See [lstm_seq2seq.py](/examples/lstm_seq2seq/) for more details on the
+model architecture and how it is trained.
 '''
 from __future__ import print_function
 

diff --git a/examples/lstm_stateful.py b/examples/lstm_stateful.py
@@ -1,38 +1,40 @@
-'''Example script showing how to use a stateful LSTM model
-and how its stateless counterpart performs.
+'''
+#How to use a stateful LSTM model, stateful vs stateless LSTM performance comparison
 
-More documentation about the Keras LSTM model can be found at
-https://keras.io/layers/recurrent/#lstm
+[More documentation about the Keras LSTM model](/layers/recurrent/#lstm)
 
 The models are trained on an input/output pair, where
 the input is a generated uniformly distributed
-random sequence of length = "input_len",
-and the output is a moving average of the input with window length = "tsteps".
-Both "input_len" and "tsteps" are defined in the "editable parameters" section.
+random sequence of length = `input_len`,
+and the output is a moving average of the input with window length = `tsteps`.
+Both `input_len` and `tsteps` are defined in the "editable parameters"
+section.
 
-A larger "tsteps" value means that the LSTM will need more memory
+A larger `tsteps` value means that the LSTM will need more memory
 to figure out the input-output relationship.
-This memory length is controlled by the "lahead" variable (more details below).
+This memory length is controlled by the `lahead` variable (more details below).
 
 The rest of the parameters are:
-- input_len: the length of the generated input sequence
-- lahead: the input sequence length that the LSTM
+
+- `input_len`: the length of the generated input sequence
+- `lahead`: the input sequence length that the LSTM
   is trained on for each output point
-- batch_size, epochs: same parameters as in the model.fit(...) function
+- `batch_size`, `epochs`: same parameters as in the `model.fit(...)`
+  function
 
-When lahead > 1, the model input is preprocessed to a "rolling window view"
-of the data, with the window length = "lahead".
-This is similar to sklearn's "view_as_windows"
-with "window_shape" being a single number
-Ref: http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows
+When `lahead > 1`, the model input is preprocessed to a "rolling window view"
+of the data, with the window length = `lahead`.
+This is similar to sklearn's `view_as_windows`
+with `window_shape` [being a single number.](
+http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows)
 
-When lahead < tsteps, only the stateful LSTM converges because its
+When `lahead < tsteps`, only the stateful LSTM converges because its
 statefulness allows it to see beyond the capability that lahead
 gave it to fit the n-point average. The stateless LSTM does not have
-this capability, and hence is limited by its "lahead" parameter,
+this capability, and hence is limited by its `lahead` parameter,
 which is not sufficient to see the n-point average.
 
-When lahead >= tsteps, both the stateful and stateless LSTM converge.
+When `lahead >= tsteps`, both the stateful and stateless LSTM converge.
 '''
 from __future__ import print_function
 import numpy as np

diff --git a/examples/mnist_siamese.py b/examples/mnist_siamese.py
@@ -43,9 +43,9 @@ def contrastive_loss(y_true, y_pred):
     http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
     '''
     margin = 1
-    sqaure_pred = K.square(y_pred)
+    square_pred = K.square(y_pred)
     margin_square = K.square(K.maximum(margin - y_pred, 0))
-    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)
+    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)
 
 
 def create_pairs(x, digit_indices):

diff --git a/keras/backend/cntk_backend.py b/keras/backend/cntk_backend.py
@@ -181,7 +181,7 @@ def variable(value, dtype=None, name=None, constraint=None):
         value = value.astype(dtype)
 
     # TODO: remove the conversion when cntk supports int32, int64
-    # https://docs.microsoft.com/en-us/python/api/cntk.variables.parameter
+    # https://www.cntk.ai/pythondocs/cntk.variables.html#cntk.variables.Parameter
     dtype = 'float32' if 'int' in str(dtype) else dtype
 
     v = C.parameter(shape=shape,
@@ -386,7 +386,7 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None):
         # ensure that randomness is conditioned by the Numpy RNG
         seed = np.random.randint(10e7)
     if dtype is None:
-        dtype = np.float32
+        dtype = floatx()
     else:
         dtype = _convert_string_dtype(dtype)
 
@@ -420,14 +420,12 @@ def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
 
 def random_uniform_variable(shape, low, high,
                             dtype=None, name=None, seed=None):
-    if dtype is None:
-        dtype = floatx()
     if seed is None:
         # ensure that randomness is conditioned by the Numpy RNG
         seed = np.random.randint(10e3)
 
     if dtype is None:
-        dtype = np.float32
+        dtype = floatx()
     else:
         dtype = _convert_string_dtype(dtype)
 
@@ -452,13 +450,11 @@ def random_normal_variable(
         dtype=None,
         name=None,
         seed=None):
-    if dtype is None:
-        dtype = floatx()
     if seed is None:
         # ensure that randomness is conditioned by the Numpy RNG
         seed = np.random.randint(10e7)
     if dtype is None:
-        dtype = np.float32
+        dtype = floatx()
     else:
         dtype = _convert_string_dtype(dtype)
 
@@ -497,7 +493,7 @@ def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
     if seed is None:
         seed = np.random.randint(1, 10e6)
     if dtype is None:
-        dtype = np.float32
+        dtype = floatx()
     else:
         dtype = _convert_string_dtype(dtype)
 
@@ -531,11 +527,13 @@ def eye(size, dtype=None, name=None):
 
 
 def zeros_like(x, dtype=None, name=None):
-    return x * 0
+    name = name or ''
+    return C.zeros_like(x, name)
 
 
 def ones_like(x, dtype=None, name=None):
-    return zeros_like(x) + 1
+    name = name or ''
+    return C.ones_like(x, name)
 
 
 def count_params(x):
@@ -2735,7 +2733,8 @@ def backward(self, state, root_gradients):
 
 
 def reset_uids():
-    raise NotImplementedError
+    global _UID_PREFIXES
+    _UID_PREFIXES = defaultdict(int)
 
 
 def to_dense(tensor):
@@ -2762,7 +2761,8 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
     raise NotImplementedError
 
 
-def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
+def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1,
+               merge_repeated=False):
     raise NotImplementedError
 
 

diff --git a/keras/backend/numpy_backend.py b/keras/backend/numpy_backend.py
@@ -707,7 +707,8 @@ def one_hot(indices, num_classes):
     return to_categorical(indices, num_classes)
 
 
-def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
+def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1,
+               merge_repeated=False):
     num_samples = y_pred.shape[0]
     num_classes = y_pred.shape[-1]
     log_prob = np.zeros((num_samples, 1))

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
@@ -4888,7 +4888,7 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
 
 
 def ctc_decode(y_pred, input_length, greedy=True, beam_width=100,
-               top_paths=1):
+               top_paths=1, merge_repeated=False):
     """Decodes the output of a softmax.
 
     Can use either greedy search (also known as best path)
@@ -4899,18 +4899,20 @@ def ctc_decode(y_pred, input_length, greedy=True, beam_width=100,
             containing the prediction, or output of the softmax.
         input_length: tensor `(samples, )` containing the sequence length for
             each batch item in `y_pred`.
-        greedy: perform much faster best-path search if `true`.
+        greedy: perform much faster best-path search if `True`.
             This does not use a dictionary.
-        beam_width: if `greedy` is `false`: a beam search decoder will be used
+        beam_width: if `greedy` is `False`: a beam search decoder will be used
             with a beam of this width.
-        top_paths: if `greedy` is `false`,
+        top_paths: if `greedy` is `False`,
             how many of the most probable paths will be returned.
+        merge_repeated: if `greedy` is `False`,
+            merge repeated classes in the output beams.
 
     # Returns
         Tuple:
-            List: if `greedy` is `true`, returns a list of one element that
+            List: if `greedy` is `True`, returns a list of one element that
                 contains the decoded sequence.
-                If `false`, returns the `top_paths` most probable
+                If `False`, returns the `top_paths` most probable
                 decoded sequences.
                 Important: blank labels are returned as `-1`.
             Tensor `(top_paths, )` that contains
@@ -4927,14 +4929,11 @@ def ctc_decode(y_pred, input_length, greedy=True, beam_width=100,
         (decoded, log_prob) = ctc.ctc_beam_search_decoder(
             inputs=y_pred,
             sequence_length=input_length, beam_width=beam_width,
-            top_paths=top_paths, merge_repeated=False)
+            top_paths=top_paths, merge_repeated=merge_repeated)
 
     decoded_dense = []
     for st in decoded:
-        dense_tensor = tf.sparse_to_dense(st.indices,
-                                          st.dense_shape,
-                                          st.values,
-                                          default_value=-1)
+        dense_tensor = tf.sparse.to_dense(st, default_value=-1)
         decoded_dense.append(dense_tensor)
     return (decoded_dense, log_prob)
 

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
@@ -4562,7 +4562,8 @@ def ctc_label_dense_to_sparse(labels, label_lengths):
     raise NotImplementedError
 
 
-def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
+def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1,
+               merge_repeated=False):
     raise NotImplementedError
 
 

diff --git a/keras/datasets/boston_housing.py b/keras/datasets/boston_housing.py
@@ -30,9 +30,9 @@ def load_data(path='boston_housing.npz', test_split=0.2, seed=113):
         x = f['x']
         y = f['y']
 
-    np.random.seed(seed)
+    rng = np.random.RandomState(seed)
     indices = np.arange(len(x))
-    np.random.shuffle(indices)
+    rng.shuffle(indices)
     x = x[indices]
     y = y[indices]