From de5f44cad24715b97093f77ad560a7c6463ebdf2 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Thu, 20 Feb 2020 20:04:43 +0000 Subject: [PATCH 01/89] examples added + changed nd4j backend in pom.xml to run on DGX1 Signed-off-by: atuzhykov --- .../TextClassification.java | 184 ++++++++++++++++++ pom.xml | 4 +- 2 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java new file mode 100644 index 0000000000..2852771df0 --- /dev/null +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -0,0 +1,184 @@ +package org.deeplearning4j.examples.nlp.textclassification; + + + + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; +import org.deeplearning4j.api.storage.StatsStorage; +import org.deeplearning4j.iterator.BertIterator; +import org.deeplearning4j.iterator.provider.FileLabeledSentenceProvider; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.optimize.listeners.ScoreIterationListener; +import org.deeplearning4j.text.tokenization.tokenizerfactory.BertWordPieceTokenizerFactory; +import org.deeplearning4j.ui.api.UIServer; +import org.deeplearning4j.ui.stats.StatsListener; +import org.deeplearning4j.ui.storage.FileStatsStorage; +import org.nd4j.evaluation.classification.Evaluation; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.MultiDataSetPreProcessor; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.deeplearning4j.examples.utilities.DataUtilities; + + +import java.io.File; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.*; + +class TextClassifier { + + + /** + * Data URL for downloading + */ + public static final String DATA_URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"; + /** + * Location to save and extract the training/testing data + */ + public static final String DATA_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment/"); + + public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPieceTokenizerFactory t) { + + String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); + String positiveBaseDir = FilenameUtils.concat(path, "pos"); + String negativeBaseDir = FilenameUtils.concat(path, "neg"); + Random rng = new Random(42); + + File filePositive = new File(positiveBaseDir); + File fileNegative = new File(negativeBaseDir); + + Map> reviewFilesMap = new HashMap<>(); + reviewFilesMap.put("Positive", Arrays.asList(Objects.requireNonNull(filePositive.listFiles()))); + reviewFilesMap.put("Negative", Arrays.asList(Objects.requireNonNull(fileNegative.listFiles()))); + + + BertIterator b = BertIterator.builder() + .tokenizer(t) + .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 16) + .minibatchSize(2) + .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) + .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) + .vocabMap(t.getVocab()) + .task(BertIterator.Task.SEQ_CLASSIFICATION) + .build(); + + + return b; + } + + + public static void main(String[] args) throws Exception { + + + //Download and extract data + downloadData(); + + + Nd4j.getMemoryManager().setAutoGcWindow(10000); //https://deeplearning4j.org/workspaces + final int seed = 0; //Seed for reproducibility + String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; + BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); + + + //DataSetIterators for training and testing respectively + + + //Set up network configuration + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .seed(seed) + .updater(new Adam(1e-3)) + .l2(1e-6) + .weightInit(WeightInit.XAVIER) + .list() + .setInputType(InputType.recurrent(1)) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new GlobalPoolingLayer(PoolingType.MAX)) + .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .build(); + + BertIterator train = getBertDataSetIterator(true, t); + BertIterator test = getBertDataSetIterator(false, t); + + MultiDataSetPreProcessor mdsPreprocessor = new MultiDataSetPreProcessor() { + @Override + public void preProcess(MultiDataSet multiDataSet) { + multiDataSet.setFeaturesMaskArray(0, multiDataSet.getFeaturesMaskArray(0).castTo(DataType.FLOAT)); + } + }; + + + train.setPreProcessor(mdsPreprocessor); + test.setPreProcessor(mdsPreprocessor); + + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + + + //Initialize the user interface backend + UIServer uiServer = UIServer.getInstance(); + + //Configure where the network information (gradients, activations, score vs. time etc) is to be stored + //Then add the StatsListener to collect this information from the network, as it trains + StatsStorage statsStorage = new FileStatsStorage(new File(System.getProperty("java.io.tmpdir"), "ui-stats-" + System.currentTimeMillis() + ".dl4j")); + int listenerFrequency = 1; + net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(50)); + //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized + uiServer.attach(statsStorage); + + + for (int i = 1; i <= 10; i++) { + + net.fit(train); + + Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; + System.out.println(eval.stats()); + } + + } + + public static void downloadData() throws Exception { + //Create directory if required + File directory = new File(DATA_PATH); + if (!directory.exists()) directory.mkdir(); + + //Download file: + String archizePath = DATA_PATH + "aclImdb_v1.tar.gz"; + File archiveFile = new File(archizePath); + String extractedPath = DATA_PATH + "aclImdb"; + File extractedFile = new File(extractedPath); + + if (!archiveFile.exists()) { + System.out.println("Starting data download (80MB)..."); + FileUtils.copyURLToFile(new URL(DATA_URL), archiveFile); + System.out.println("Data (.tar.gz file) downloaded to " + archiveFile.getAbsolutePath()); + //Extract tar.gz file to output directory + DataUtilities.extractTarGz(archizePath, DATA_PATH); + } else { + //Assume if archive (.tar.gz) exists, then data has already been extracted + System.out.println("Data (.tar.gz file) already exists at " + archiveFile.getAbsolutePath()); + if (!extractedFile.exists()) { + //Extract tar.gz file to output directory + DataUtilities.extractTarGz(archizePath, DATA_PATH); + } else { + System.out.println("Data (extracted) already exists at " + extractedFile.getAbsolutePath()); + } + } + } + + +} + diff --git a/pom.xml b/pom.xml index 45aee7518a..f2c38c7ee3 100644 --- a/pom.xml +++ b/pom.xml @@ -27,8 +27,8 @@ Examples of training different data sets - nd4j-native-platform - + + nd4j-cuda-10.0-platform UTF-8 bin From d65fa9a89dd871f9b48021b16d375dfcd8df85e4 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 12:59:40 +0000 Subject: [PATCH 02/89] examples added + changed nd4j backend in pom.xml to run on DGX1 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 2852771df0..1e5809ac21 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -36,7 +36,7 @@ import java.nio.charset.StandardCharsets; import java.util.*; -class TextClassifier { +public class TextClassification { /** From c2967df0999218828ca1a9217ef50ad55365f57f Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 13:05:59 +0000 Subject: [PATCH 03/89] other small changes Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1e5809ac21..80febc5752 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -66,7 +66,7 @@ public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPi BertIterator b = BertIterator.builder() .tokenizer(t) .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 16) - .minibatchSize(2) + .minibatchSize(32) .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) .vocabMap(t.getVocab()) @@ -85,7 +85,6 @@ public static void main(String[] args) throws Exception { downloadData(); - Nd4j.getMemoryManager().setAutoGcWindow(10000); //https://deeplearning4j.org/workspaces final int seed = 0; //Seed for reproducibility String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); From 9e8783578a67a1bcb10962d5fcdd0964214a1293 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 14:29:59 +0000 Subject: [PATCH 04/89] small fix to match cuda version with container Signed-off-by: atuzhykov --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f2c38c7ee3..7479a863e5 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ - nd4j-cuda-10.0-platform + nd4j-cuda-10.2-platform UTF-8 bin From 41530e1b7b7e09c1e96b1b1c97b8899bc1d22727 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 15:22:18 +0000 Subject: [PATCH 05/89] small fix to match cuda version with container Signed-off-by: atuzhykov --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7479a863e5..f2c38c7ee3 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ - nd4j-cuda-10.2-platform + nd4j-cuda-10.0-platform UTF-8 bin From 3198021f1436d0a917d17ebb1bd7b6a297fe240b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Sat, 22 Feb 2020 21:59:07 +0000 Subject: [PATCH 06/89] lr 1e-3 > 4e-3 (as multiplying batchsize*k, lr*sqrt(k)) l2 1e-6 > 1e-3 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 80febc5752..e20e41d609 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -96,8 +96,8 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) - .l2(1e-6) + .updater(new Adam(4e-3)) + .l2(1e-3) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 9a1ba54060b1e90abf5dd5cfa89f2b6b2be3404b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Sun, 23 Feb 2020 01:43:36 +0000 Subject: [PATCH 07/89] lr 1e-3 > 4e-3 (as multiplying batchsize*k, lr*sqrt(k)) l2 1e-3 > 1e-6 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index e20e41d609..780b7ea5a1 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -97,7 +97,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(4e-3)) - .l2(1e-3) + .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 95aa6397a82a455da0f850af8c6fa1646f109436 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Mon, 24 Feb 2020 11:11:36 +0000 Subject: [PATCH 08/89] experiment0 Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 780b7ea5a1..6866e7586f 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -27,8 +27,11 @@ import org.nd4j.linalg.dataset.api.MultiDataSetPreProcessor; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.Nadam; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; +import org.nd4j.linalg.schedule.ExponentialSchedule; +import org.nd4j.linalg.schedule.ScheduleType; import java.io.File; @@ -96,12 +99,13 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(4e-3)) - .l2(1e-6) + .updater(new Nadam(new ExponentialSchedule(ScheduleType.ITERATION, 4e-3, 0.99 ))) + .weightDecay(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From 75016265bdf7900499d703543ff48e39e0a3d694 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 15:49:47 +0000 Subject: [PATCH 09/89] experiment1 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../textclassification/TextClassification.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 6866e7586f..b2480f7362 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -28,10 +28,16 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.Nadam; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; import org.nd4j.linalg.schedule.ExponentialSchedule; import org.nd4j.linalg.schedule.ScheduleType; +import org.nd4j.linalg.schedule.SigmoidSchedule; +import org.deeplearning4j.nn.api.Updater; +import org.deeplearning4j.nn.api.Updater; + + import java.io.File; @@ -93,18 +99,20 @@ public static void main(String[] args) throws Exception { BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - //DataSetIterators for training and testing respectively //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new ExponentialSchedule(ScheduleType.ITERATION, 4e-3, 0.99 ))) - .weightDecay(1e-5) + .updater(new Nadam(1e-5)) + .l2(0.0001) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) + .hasBias(true).nIn(t.getVocab().size()) + .updater(new Sgd(1e-4)) + .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From 091c3860cd20978c92b9916c0a6da9aa3f887a50 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 16:19:38 +0000 Subject: [PATCH 10/89] experiment2 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index b2480f7362..a51b81e2a7 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -104,18 +104,17 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-5)) + .updater(new Nadam(1e-2)) .l2(0.0001) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Sgd(1e-4)) + .updater(new Sgd(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 86a65183b0d8dd6ec77a3f850b1ff16f4836f0a3 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:08:46 +0000 Subject: [PATCH 11/89] experiment3 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index a51b81e2a7..1ac714d6a1 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -28,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.Nadam; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; @@ -104,14 +105,14 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-2)) - .l2(0.0001) + .updater(new Nadam(1e-1)) + .weightDecay(1e-4,true) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Sgd(1e-2)) + .updater(new Nesterovs(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From d669d6f600f6a7ba82596e97f6760302a2ba1fed Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:20:50 +0000 Subject: [PATCH 12/89] experiment4 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1ac714d6a1..dec11fa646 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,14 +105,13 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-1)) - .weightDecay(1e-4,true) + .updater(new Nadam(1e-3)) + .weightDecay(1e-5,true) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Nesterovs(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From 578a18608fee7324ed6e0dc702f15cceb3f99652 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:26:54 +0000 Subject: [PATCH 13/89] experiment5 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index dec11fa646..5354976955 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-3)) + .updater(new Nadam(1e-4)) .weightDecay(1e-5,true) .weightInit(WeightInit.XAVIER) .list() From fe429672addca703686c2f3c19c65008d8068d17 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:34:32 +0000 Subject: [PATCH 14/89] experiment6 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5354976955..5b398860fb 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -106,7 +106,6 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Nadam(1e-4)) - .weightDecay(1e-5,true) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 302f7bbb86db1bfdb074879970f8792db261a78f Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:59:00 +0000 Subject: [PATCH 15/89] experiment7 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5b398860fb..49f73ddd48 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -106,14 +106,16 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Nadam(1e-4)) + .weightDecay(1e-7) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .nOut(128).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .nOut(256).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From bb933c594d7f2ee9eca426f25aa18773385d42c9 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:22:58 +0000 Subject: [PATCH 16/89] experiment8 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 49f73ddd48..3111088d84 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,8 +105,8 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-4)) - .weightDecay(1e-7) + .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-4,0.5, 10))) + .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 000f7d891be9cfa096b36f169dd57bdaaf284283 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:32:05 +0000 Subject: [PATCH 17/89] experiment9 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 3111088d84..7c8dc63066 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-4,0.5, 10))) + .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-3,0.5, 10))) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From 79014f8ffe069cd386038294a3ed9993d79027a7 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:37:08 +0000 Subject: [PATCH 18/89] experiment9 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 7c8dc63066..1eb80f89b4 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-3,0.5, 10))) + .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-2,0.5, 10))) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From 4a5cffdc8e67904d794d61ceea362bf20f5d4f0b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:38:50 +0000 Subject: [PATCH 19/89] experiment10 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1eb80f89b4..59122d54f2 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-2,0.5, 10))) + .updater(new Nadam(1e-1)) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From 8ef7519ee410b11ce8ab9d8589d4e0fc4ea8ec94 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:45:02 +0000 Subject: [PATCH 20/89] experiment10 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 59122d54f2..bac9ec7905 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-1)) + .updater(new Nadam(1e-2)) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From b99d59a91bddacf96817eca8a879bd8cec7ec1cb Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 22:47:13 +0000 Subject: [PATCH 21/89] experiment11 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index bac9ec7905..f78e500c42 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,17 +105,16 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-2)) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .hasBias(true).nIn(t.getVocab().size()) - .nOut(256).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(0, new EmbeddingSequenceLayer.Builder() + .hasBias(true).nIn(t.getVocab().size()).nOut(256).weightInit(WeightInit.ZERO) + .updater(new Sgd(1e-4)).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 1aabba130c16e4e1c0c5ecd9202d4fe422cf986e Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 08:53:02 +0000 Subject: [PATCH 22/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../textclassification/TextClassification.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index f78e500c42..0fe7781271 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,21 +105,24 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .weightDecay(1e-6) + .updater(new Nadam(1e-3)) + .weightDecay(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder() - .hasBias(true).nIn(t.getVocab().size()).nOut(256).weightInit(WeightInit.ZERO) - .updater(new Sgd(1e-4)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) + .hasBias(true).nIn(t.getVocab().size()) + .updater(new Sgd(1e-2)) + .nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); + + BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); From dc3f3b3c52e60ccb2951232034fd7e6bcb16abbe Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 09:39:48 +0000 Subject: [PATCH 23/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 0fe7781271..54d531b6fe 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,14 +105,14 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-3)) + .updater(new Adam(1e-3)) .weightDecay(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Sgd(1e-2)) +// .updater(new Sgd(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From f16d8ac9a08ed22af1b5a490c410e83a1328df58 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 10:12:55 +0000 Subject: [PATCH 24/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 54d531b6fe..5c7b7f607b 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -106,14 +106,11 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) - .weightDecay(1e-5) + .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .hasBias(true).nIn(t.getVocab().size()) -// .updater(new Sgd(1e-2)) - .nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) @@ -122,7 +119,6 @@ public static void main(String[] args) throws Exception { .build(); - BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); From 36ae8ee29b7ad429f9a5e76e3ae212c565082138 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 11:50:25 +0000 Subject: [PATCH 25/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- dl4j-examples/pom.xml | 6 ++++++ .../nlp/textclassification/TextClassification.java | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dl4j-examples/pom.xml b/dl4j-examples/pom.xml index 6e4dc69ed4..564ab4b3e8 100644 --- a/dl4j-examples/pom.xml +++ b/dl4j-examples/pom.xml @@ -106,6 +106,12 @@ ${dl4j.version} + + org.deeplearning4j + deeplearning4j-cuda-10.0 + 1.0.0-beta6 + + org.deeplearning4j diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5c7b7f607b..6eedeb9acf 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -110,9 +110,10 @@ public static void main(String[] args) throws Exception { .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) + .l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -143,7 +144,7 @@ public void preProcess(MultiDataSet multiDataSet) { //Configure where the network information (gradients, activations, score vs. time etc) is to be stored //Then add the StatsListener to collect this information from the network, as it trains StatsStorage statsStorage = new FileStatsStorage(new File(System.getProperty("java.io.tmpdir"), "ui-stats-" + System.currentTimeMillis() + ".dl4j")); - int listenerFrequency = 1; + int listenerFrequency = 20; net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(50)); //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized uiServer.attach(statsStorage); From 0de07d847d51a24259042095635f7ceaf6a5316b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 12:39:35 +0000 Subject: [PATCH 26/89] experiment13 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 6eedeb9acf..fb9e61f66d 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -111,7 +111,8 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) + .l2(0).hasBias(true).nIn(t.getVocab().size()) + .nOut(256).updater(new Sgd(1e-3)).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From f2eece6340992e72ddfd10b450cfb8b498dd3a03 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 16:28:32 +0000 Subject: [PATCH 27/89] experiment14 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index fb9e61f66d..beb5106aad 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -115,6 +115,8 @@ public static void main(String[] args) throws Exception { .nOut(256).updater(new Sgd(1e-3)).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From ff91a964f9d240b9489108e3cc6965ddadb9ce86 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Thu, 27 Feb 2020 22:27:48 +0000 Subject: [PATCH 28/89] baseline conf + LengthHandling.FIXED_LENGTH=256 Signed-off-by: atuzhykov --- .../TextClassification.java | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index beb5106aad..5fa67399bb 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -1,8 +1,6 @@ package org.deeplearning4j.examples.nlp.textclassification; - - import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.api.storage.StatsStorage; @@ -39,8 +37,6 @@ import org.deeplearning4j.nn.api.Updater; - - import java.io.File; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -75,7 +71,7 @@ public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPi BertIterator b = BertIterator.builder() .tokenizer(t) - .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 16) + .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 256) .minibatchSize(32) .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) @@ -100,9 +96,6 @@ public static void main(String[] args) throws Exception { BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - - - //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) @@ -110,19 +103,14 @@ public static void main(String[] args) throws Exception { .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .l2(0).hasBias(true).nIn(t.getVocab().size()) - .nOut(256).updater(new Sgd(1e-3)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); - BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); From 2e99c55a921d0e48a0411e5eb0ad52ac1ed98b3f Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 09:50:50 +0000 Subject: [PATCH 29/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5fa67399bb..b02c352bdd 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -11,6 +11,7 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; @@ -104,8 +105,8 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From e107c2cffecb7f6497a01b0c8d8a561b458bd9ee Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 10:43:23 +0000 Subject: [PATCH 30/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm+lr1e-4 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index b02c352bdd..edfbfc16f3 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -99,7 +99,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Adam(1e-4)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() From c09746d725993b26c8de852b51f0d67b094dc54b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 15:29:31 +0000 Subject: [PATCH 31/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm_256 Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index edfbfc16f3..ce59f39da2 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -99,14 +99,14 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-4)) + .updater(new Adam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -150,6 +150,8 @@ public void preProcess(MultiDataSet multiDataSet) { System.out.println(eval.stats()); } + System.out.print(net.summary()); + } public static void downloadData() throws Exception { From 2b6414a42a504a34c57da544606ceb1b6cee2909 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 21:54:22 +0000 Subject: [PATCH 32/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm_256_lr1e-4 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index ce59f39da2..b80fe319c5 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -99,7 +99,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Adam(1e-4)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() From 834c33896fe80beab1d6220447ebc737eeabd66c Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:10:40 +0200 Subject: [PATCH 33/89] base_conf+bidir_LSTM_256_layersize_Adam_lr1e-3_SGD_lr1e-3_for_EmbdLayer --- .../TextClassification.java | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index b80fe319c5..1fd3bc0dd3 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -6,6 +6,7 @@ import org.deeplearning4j.api.storage.StatsStorage; import org.deeplearning4j.iterator.BertIterator; import org.deeplearning4j.iterator.provider.FileLabeledSentenceProvider; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -96,15 +97,31 @@ public static void main(String[] args) throws Exception { String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); +// ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() +// .seed(seed) +// .updater(new Adam(1e-4)) +// .l2(1e-6) +// .weightInit(WeightInit.XAVIER) +// .graphBuilder() +// +// .addInputs("input1", "input2") +// .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") +// .addLayer("L2", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input2") +// .addVertex("merge", new MergeVertex(), "L1", "L2") +// .addLayer("out", new OutputLayer.Builder().nIn(4+4).nOut(3).build(), "merge") +// .setOutputs("out") +// .build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-4)) + .updater(new Adam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) + .hasBias(true).nIn(t.getVocab().size()).nOut(128) + .updater(new Sgd(1e-3)).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) @@ -150,6 +167,10 @@ public void preProcess(MultiDataSet multiDataSet) { System.out.println(eval.stats()); } + System.out.println("Training set evaluation"); + Evaluation eval = net.doEvaluation(train, new Evaluation[]{new Evaluation()})[0]; + System.out.println(eval.stats()); + System.out.print(net.summary()); } From d6b2644706b9a1867e2689e6e00d4bc48bc0612e Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:24:59 +0200 Subject: [PATCH 34/89] base_conf+bidir_LSTM_256_layersize_Adam_lr1e-3_SGD_lr1e-3_for_EmbdLayer --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1fd3bc0dd3..929cd023fc 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -120,7 +120,7 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(128) + .hasBias(true).nIn(t.getVocab().size()).nOut(256) .updater(new Sgd(1e-3)).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) From 615657f8c71db62e2574b763b474759dbeae762c Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:35:09 +0200 Subject: [PATCH 35/89] base_conf+bidir_LSTM_256_layersize_Nadam_lr1e-3 --- .../examples/nlp/textclassification/TextClassification.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 929cd023fc..97523b8bc9 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -114,14 +114,13 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Nadam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(256) - .updater(new Sgd(1e-3)).build()) + .hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From 77d6c27a73e3e905b3898addea8089280c75ba4c Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:59:17 +0200 Subject: [PATCH 36/89] base_conf+bidir_LSTM_256_layersize_Nadam_lr1e-3 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 97523b8bc9..9963862486 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -120,7 +120,7 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) + .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From d4ef0453901d9c6d99d2371fcdd68953efdcefc0 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 15:30:06 +0200 Subject: [PATCH 37/89] base_conf+bidir_LSTM_256_layersize_Nadam_lr1e-3 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 9963862486..f464e7a74d 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -114,7 +114,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-3)) + .updater(new Adam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() From 8a743054ba515f5916f07597f2eb675b6b5a3f03 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 16:56:20 +0200 Subject: [PATCH 38/89] base_conf+3x_bidir_LSTM_256_layersize_Nadam_lr1e-3 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index f464e7a74d..4be91f8a41 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -123,6 +123,7 @@ public static void main(String[] args) throws Exception { .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 166892814954c00badf262956d85436114b1e6b3 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 17:24:54 +0200 Subject: [PATCH 39/89] base_conf+3xbidir_LSTM_256_layersize_Adam_lr1e-3_l21e-5 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 4be91f8a41..94f4148f8a 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -115,7 +115,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) - .l2(1e-6) + .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 022c4dcd060b3dfa0f45321b51fba439018fe47a Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 22:32:04 +0200 Subject: [PATCH 40/89] base_conf+3x_bidir_LSTM_256_layersize_Adam_Sheduled_lr Signed-off-by: Andrii Tuzhykov --- .../TextClassification.java | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 94f4148f8a..8cf7b7b23a 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -32,9 +32,7 @@ import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; -import org.nd4j.linalg.schedule.ExponentialSchedule; -import org.nd4j.linalg.schedule.ScheduleType; -import org.nd4j.linalg.schedule.SigmoidSchedule; +import org.nd4j.linalg.schedule.*; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.api.Updater; @@ -97,24 +95,17 @@ public static void main(String[] args) throws Exception { String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); -// ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() -// .seed(seed) -// .updater(new Adam(1e-4)) -// .l2(1e-6) -// .weightInit(WeightInit.XAVIER) -// .graphBuilder() -// -// .addInputs("input1", "input2") -// .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") -// .addLayer("L2", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input2") -// .addVertex("merge", new MergeVertex(), "L1", "L2") -// .addLayer("out", new OutputLayer.Builder().nIn(4+4).nOut(3).build(), "merge") -// .setOutputs("out") -// .build(); + + ISchedule lrSchedule = new MapSchedule.Builder(ScheduleType.EPOCH) + .add(0, 2e-3) + .add(1, 1e-3) + .add(3, 8e-4) + .add(5, 5e-4) + .add(7, 2e-4).build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Adam(lrSchedule)) .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() From c7f5393be588bbab139e0d7832fb63cb1b585fed Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Tue, 3 Mar 2020 12:56:06 +0200 Subject: [PATCH 41/89] base_conf+2x_bidir_LSTM_256_Adam_lr1e-3_lstm_dropout_075 Signed-off-by: Andrii Tuzhykov --- .../textclassification/TextClassification.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 8cf7b7b23a..0e760ffed8 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -96,25 +96,20 @@ public static void main(String[] args) throws Exception { BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - ISchedule lrSchedule = new MapSchedule.Builder(ScheduleType.EPOCH) - .add(0, 2e-3) - .add(1, 1e-3) - .add(3, 8e-4) - .add(5, 5e-4) - .add(7, 2e-4).build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(lrSchedule)) + .updater(new Adam(1e-3)) .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256) + .dropOut(0.75).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256) + .dropOut(0.75).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From ccecf71daab2537f7fe381e0245552a9cd1a95fa Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Thu, 5 Mar 2020 01:01:08 +0200 Subject: [PATCH 42/89] prefinal examples Signed-off-by: Andrii Tuzhykov --- .../BertIteratorExample.java} | 105 +++++++++++++----- 1 file changed, 79 insertions(+), 26 deletions(-) rename dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/{textclassification/TextClassification.java => bertiteratorexample/BertIteratorExample.java} (68%) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java similarity index 68% rename from dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java rename to dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java index 0e760ffed8..092feed707 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java @@ -1,4 +1,19 @@ -package org.deeplearning4j.examples.nlp.textclassification; +/******************************************************************************* + * Copyright (c) 2015-2020 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.examples.nlp.bertiteratorexample; import org.apache.commons.io.FileUtils; @@ -6,7 +21,6 @@ import org.deeplearning4j.api.storage.StatsStorage; import org.deeplearning4j.iterator.BertIterator; import org.deeplearning4j.iterator.provider.FileLabeledSentenceProvider; -import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -25,35 +39,48 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.MultiDataSetPreProcessor; -import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; -import org.nd4j.linalg.learning.config.Nadam; -import org.nd4j.linalg.learning.config.Nesterovs; -import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; -import org.nd4j.linalg.schedule.*; -import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.api.Updater; - +import java.io.BufferedInputStream; import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.*; -public class TextClassification { + +/** + * @author andrewtuzhykov@gmail.com + */ + +public class BertIteratorExample { /** * Data URL for downloading */ public static final String DATA_URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"; + + // Bert Base Uncased Vocabulary + public static final String VOCAB_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt"; + /** * Location to save and extract the training/testing data */ public static final String DATA_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment/"); + + /** + * Get BertIterator instance. + * + * @param isTraining specifies which dataset iterator we want to get: train or test. + * @param t BertWordPieceTokenizerFactory initialized with provided vocab. + * @return BertIterator with specified parameters. + */ + public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPieceTokenizerFactory t) { String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); @@ -91,10 +118,13 @@ public static void main(String[] args) throws Exception { downloadData(); - final int seed = 0; //Seed for reproducibility - String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; - BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); + final int seed = 0; + //Seed for reproducibility + String pathToVocab = DATA_PATH + "vocab.txt"; + // Path to vocab + // BertWordPieceTokenizerFactory initialized with given vocab + BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() @@ -103,21 +133,28 @@ public static void main(String[] args) throws Exception { .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() + // matching EmbeddingSequenceLayer outputs with Bidirectional LSTM inputs .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256) - .dropOut(0.75).activation(Activation.TANH).build())) + .dropOut(0.8).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256) - .dropOut(0.75).activation(Activation.TANH).build())) + .dropOut(0.8).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) - .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) + .layer(new OutputLayer.Builder().nOut(2) + .dropOut(0.97).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); + + // Getting train and test BertIterators for both: test and train, + // changing argument isTraining: true to get train and false to get test respectively BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); + + // preprocessor for DataType matching MultiDataSetPreProcessor mdsPreprocessor = new MultiDataSetPreProcessor() { @Override public void preProcess(MultiDataSet multiDataSet) { @@ -125,11 +162,11 @@ public void preProcess(MultiDataSet multiDataSet) { } }; - + // Applying preprocessor for both: train and test datasets train.setPreProcessor(mdsPreprocessor); test.setPreProcessor(mdsPreprocessor); - + // initialize MultiLayerNetwork instance with described above configuration MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -145,7 +182,8 @@ public void preProcess(MultiDataSet multiDataSet) { uiServer.attach(statsStorage); - for (int i = 1; i <= 10; i++) { + // Setting to train net for 19 epochs (note: previous net state persist after each iteration) + for (int i = 1; i <= 19; i++) { net.fit(train); @@ -153,12 +191,6 @@ public void preProcess(MultiDataSet multiDataSet) { System.out.println(eval.stats()); } - System.out.println("Training set evaluation"); - Evaluation eval = net.doEvaluation(train, new Evaluation[]{new Evaluation()})[0]; - System.out.println(eval.stats()); - - System.out.print(net.summary()); - } public static void downloadData() throws Exception { @@ -188,6 +220,27 @@ public static void downloadData() throws Exception { System.out.println("Data (extracted) already exists at " + extractedFile.getAbsolutePath()); } } + + + String vocabPath = DATA_PATH + "vocab.txt"; + File vocabFile = new File(vocabPath); + + if (!vocabFile.exists()) { + try (BufferedInputStream inputStream = new BufferedInputStream(new URL(VOCAB_URL).openStream()); + FileOutputStream file = new FileOutputStream(DATA_PATH + "vocab.txt")) { + byte data[] = new byte[1024]; + int byteContent; + while ((byteContent = inputStream.read(data, 0, 1024)) != -1) { + file.write(data, 0, byteContent); + } + } catch (IOException e) { + // handles IO exceptions + } + + } else { + System.out.println("Vocab file already exists at " + vocabFile.getAbsolutePath()); + } + } From 51e30c371f537e2e33f151d07b8387fc849ab142 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Thu, 20 Feb 2020 20:04:43 +0000 Subject: [PATCH 43/89] examples added + changed nd4j backend in pom.xml to run on DGX1 Signed-off-by: atuzhykov --- .../TextClassification.java | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java new file mode 100644 index 0000000000..2852771df0 --- /dev/null +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -0,0 +1,184 @@ +package org.deeplearning4j.examples.nlp.textclassification; + + + + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; +import org.deeplearning4j.api.storage.StatsStorage; +import org.deeplearning4j.iterator.BertIterator; +import org.deeplearning4j.iterator.provider.FileLabeledSentenceProvider; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.optimize.listeners.ScoreIterationListener; +import org.deeplearning4j.text.tokenization.tokenizerfactory.BertWordPieceTokenizerFactory; +import org.deeplearning4j.ui.api.UIServer; +import org.deeplearning4j.ui.stats.StatsListener; +import org.deeplearning4j.ui.storage.FileStatsStorage; +import org.nd4j.evaluation.classification.Evaluation; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.MultiDataSetPreProcessor; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.deeplearning4j.examples.utilities.DataUtilities; + + +import java.io.File; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.*; + +class TextClassifier { + + + /** + * Data URL for downloading + */ + public static final String DATA_URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"; + /** + * Location to save and extract the training/testing data + */ + public static final String DATA_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment/"); + + public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPieceTokenizerFactory t) { + + String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); + String positiveBaseDir = FilenameUtils.concat(path, "pos"); + String negativeBaseDir = FilenameUtils.concat(path, "neg"); + Random rng = new Random(42); + + File filePositive = new File(positiveBaseDir); + File fileNegative = new File(negativeBaseDir); + + Map> reviewFilesMap = new HashMap<>(); + reviewFilesMap.put("Positive", Arrays.asList(Objects.requireNonNull(filePositive.listFiles()))); + reviewFilesMap.put("Negative", Arrays.asList(Objects.requireNonNull(fileNegative.listFiles()))); + + + BertIterator b = BertIterator.builder() + .tokenizer(t) + .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 16) + .minibatchSize(2) + .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) + .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) + .vocabMap(t.getVocab()) + .task(BertIterator.Task.SEQ_CLASSIFICATION) + .build(); + + + return b; + } + + + public static void main(String[] args) throws Exception { + + + //Download and extract data + downloadData(); + + + Nd4j.getMemoryManager().setAutoGcWindow(10000); //https://deeplearning4j.org/workspaces + final int seed = 0; //Seed for reproducibility + String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; + BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); + + + //DataSetIterators for training and testing respectively + + + //Set up network configuration + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .seed(seed) + .updater(new Adam(1e-3)) + .l2(1e-6) + .weightInit(WeightInit.XAVIER) + .list() + .setInputType(InputType.recurrent(1)) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new GlobalPoolingLayer(PoolingType.MAX)) + .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .build(); + + BertIterator train = getBertDataSetIterator(true, t); + BertIterator test = getBertDataSetIterator(false, t); + + MultiDataSetPreProcessor mdsPreprocessor = new MultiDataSetPreProcessor() { + @Override + public void preProcess(MultiDataSet multiDataSet) { + multiDataSet.setFeaturesMaskArray(0, multiDataSet.getFeaturesMaskArray(0).castTo(DataType.FLOAT)); + } + }; + + + train.setPreProcessor(mdsPreprocessor); + test.setPreProcessor(mdsPreprocessor); + + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + + + //Initialize the user interface backend + UIServer uiServer = UIServer.getInstance(); + + //Configure where the network information (gradients, activations, score vs. time etc) is to be stored + //Then add the StatsListener to collect this information from the network, as it trains + StatsStorage statsStorage = new FileStatsStorage(new File(System.getProperty("java.io.tmpdir"), "ui-stats-" + System.currentTimeMillis() + ".dl4j")); + int listenerFrequency = 1; + net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(50)); + //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized + uiServer.attach(statsStorage); + + + for (int i = 1; i <= 10; i++) { + + net.fit(train); + + Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; + System.out.println(eval.stats()); + } + + } + + public static void downloadData() throws Exception { + //Create directory if required + File directory = new File(DATA_PATH); + if (!directory.exists()) directory.mkdir(); + + //Download file: + String archizePath = DATA_PATH + "aclImdb_v1.tar.gz"; + File archiveFile = new File(archizePath); + String extractedPath = DATA_PATH + "aclImdb"; + File extractedFile = new File(extractedPath); + + if (!archiveFile.exists()) { + System.out.println("Starting data download (80MB)..."); + FileUtils.copyURLToFile(new URL(DATA_URL), archiveFile); + System.out.println("Data (.tar.gz file) downloaded to " + archiveFile.getAbsolutePath()); + //Extract tar.gz file to output directory + DataUtilities.extractTarGz(archizePath, DATA_PATH); + } else { + //Assume if archive (.tar.gz) exists, then data has already been extracted + System.out.println("Data (.tar.gz file) already exists at " + archiveFile.getAbsolutePath()); + if (!extractedFile.exists()) { + //Extract tar.gz file to output directory + DataUtilities.extractTarGz(archizePath, DATA_PATH); + } else { + System.out.println("Data (extracted) already exists at " + extractedFile.getAbsolutePath()); + } + } + } + + +} + From b42580e42f4942246bdf9272ccf830b16339c63b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 12:59:40 +0000 Subject: [PATCH 44/89] examples added + changed nd4j backend in pom.xml to run on DGX1 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 2852771df0..1e5809ac21 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -36,7 +36,7 @@ import java.nio.charset.StandardCharsets; import java.util.*; -class TextClassifier { +public class TextClassification { /** From aa0c7494436f790e34b2cc4c8e0f92fec3e8b042 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 13:05:59 +0000 Subject: [PATCH 45/89] other small changes Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1e5809ac21..80febc5752 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -66,7 +66,7 @@ public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPi BertIterator b = BertIterator.builder() .tokenizer(t) .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 16) - .minibatchSize(2) + .minibatchSize(32) .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) .vocabMap(t.getVocab()) @@ -85,7 +85,6 @@ public static void main(String[] args) throws Exception { downloadData(); - Nd4j.getMemoryManager().setAutoGcWindow(10000); //https://deeplearning4j.org/workspaces final int seed = 0; //Seed for reproducibility String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); From 5f5625e6d914acdc66d57673d28d77009d5e8bcb Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 14:29:59 +0000 Subject: [PATCH 46/89] small fix to match cuda version with container Signed-off-by: atuzhykov --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f2c38c7ee3..7479a863e5 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ - nd4j-cuda-10.0-platform + nd4j-cuda-10.2-platform UTF-8 bin From 61ca5552a23e10cd4d9a7e5b3001c0d60d43339e Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 21 Feb 2020 15:22:18 +0000 Subject: [PATCH 47/89] small fix to match cuda version with container Signed-off-by: atuzhykov --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7479a863e5..f2c38c7ee3 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ - nd4j-cuda-10.2-platform + nd4j-cuda-10.0-platform UTF-8 bin From f6c95c20a51d03999a46f3e80598e13b9f36a5e8 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Sat, 22 Feb 2020 21:59:07 +0000 Subject: [PATCH 48/89] lr 1e-3 > 4e-3 (as multiplying batchsize*k, lr*sqrt(k)) l2 1e-6 > 1e-3 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 80febc5752..e20e41d609 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -96,8 +96,8 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) - .l2(1e-6) + .updater(new Adam(4e-3)) + .l2(1e-3) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From a20525ad1d47eb604b61ae94745095b24e5b0282 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Sun, 23 Feb 2020 01:43:36 +0000 Subject: [PATCH 49/89] lr 1e-3 > 4e-3 (as multiplying batchsize*k, lr*sqrt(k)) l2 1e-3 > 1e-6 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index e20e41d609..780b7ea5a1 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -97,7 +97,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(4e-3)) - .l2(1e-3) + .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 6d60af52b5b1c5dca200ef98820986235b999cc3 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Mon, 24 Feb 2020 11:11:36 +0000 Subject: [PATCH 50/89] experiment0 Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 780b7ea5a1..6866e7586f 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -27,8 +27,11 @@ import org.nd4j.linalg.dataset.api.MultiDataSetPreProcessor; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.Nadam; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; +import org.nd4j.linalg.schedule.ExponentialSchedule; +import org.nd4j.linalg.schedule.ScheduleType; import java.io.File; @@ -96,12 +99,13 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(4e-3)) - .l2(1e-6) + .updater(new Nadam(new ExponentialSchedule(ScheduleType.ITERATION, 4e-3, 0.99 ))) + .weightDecay(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From 5b1082c5a8ba2a75c8ec13fcc2dd59c94c339df9 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 15:49:47 +0000 Subject: [PATCH 51/89] experiment1 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../textclassification/TextClassification.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 6866e7586f..b2480f7362 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -28,10 +28,16 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.Nadam; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; import org.nd4j.linalg.schedule.ExponentialSchedule; import org.nd4j.linalg.schedule.ScheduleType; +import org.nd4j.linalg.schedule.SigmoidSchedule; +import org.deeplearning4j.nn.api.Updater; +import org.deeplearning4j.nn.api.Updater; + + import java.io.File; @@ -93,18 +99,20 @@ public static void main(String[] args) throws Exception { BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - //DataSetIterators for training and testing respectively //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new ExponentialSchedule(ScheduleType.ITERATION, 4e-3, 0.99 ))) - .weightDecay(1e-5) + .updater(new Nadam(1e-5)) + .l2(0.0001) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) + .hasBias(true).nIn(t.getVocab().size()) + .updater(new Sgd(1e-4)) + .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From c0d363e5dc6ff0a0ab99fc435b2db0eeab6e8ceb Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 16:19:38 +0000 Subject: [PATCH 52/89] experiment2 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index b2480f7362..a51b81e2a7 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -104,18 +104,17 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-5)) + .updater(new Nadam(1e-2)) .l2(0.0001) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Sgd(1e-4)) + .updater(new Sgd(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From b92aa8f35e748f9d79e518b29e64390efc8ca98f Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:08:46 +0000 Subject: [PATCH 53/89] experiment3 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index a51b81e2a7..1ac714d6a1 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -28,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.Nadam; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; @@ -104,14 +105,14 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-2)) - .l2(0.0001) + .updater(new Nadam(1e-1)) + .weightDecay(1e-4,true) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Sgd(1e-2)) + .updater(new Nesterovs(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From 407feae328259425bd293338053fba58f8e52ed3 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:20:50 +0000 Subject: [PATCH 54/89] experiment4 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1ac714d6a1..dec11fa646 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,14 +105,13 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-1)) - .weightDecay(1e-4,true) + .updater(new Nadam(1e-3)) + .weightDecay(1e-5,true) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Nesterovs(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From 9adc5e45f34104098bf88df2bd77d420514c0525 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:26:54 +0000 Subject: [PATCH 55/89] experiment5 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index dec11fa646..5354976955 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-3)) + .updater(new Nadam(1e-4)) .weightDecay(1e-5,true) .weightInit(WeightInit.XAVIER) .list() From 2e73eaebe412741a3badb1c05864056fb0efbb3b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:34:32 +0000 Subject: [PATCH 56/89] experiment6 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5354976955..5b398860fb 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -106,7 +106,6 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Nadam(1e-4)) - .weightDecay(1e-5,true) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 88ea86387b217a6a9da49fd70142a69ef95ba620 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 17:59:00 +0000 Subject: [PATCH 57/89] experiment7 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5b398860fb..49f73ddd48 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -106,14 +106,16 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Nadam(1e-4)) + .weightDecay(1e-7) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .nOut(128).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .nOut(256).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From f4aadbef6fa71c8353cbf3aebc4dd8f650880f45 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:22:58 +0000 Subject: [PATCH 58/89] experiment8 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 49f73ddd48..3111088d84 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,8 +105,8 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-4)) - .weightDecay(1e-7) + .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-4,0.5, 10))) + .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From 772d38e2532e12182e7ea40ff6d951a6c0b72f52 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:32:05 +0000 Subject: [PATCH 59/89] experiment9 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 3111088d84..7c8dc63066 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-4,0.5, 10))) + .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-3,0.5, 10))) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From 096eca42a3f28fec9ea02d23fbc4c3b4686b740b Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:37:08 +0000 Subject: [PATCH 60/89] experiment9 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 7c8dc63066..1eb80f89b4 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-3,0.5, 10))) + .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-2,0.5, 10))) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From b2f6510cc476664af9a6fc1af9ac9a9b9ba333bb Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:38:50 +0000 Subject: [PATCH 61/89] experiment10 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1eb80f89b4..59122d54f2 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(new SigmoidSchedule(ScheduleType.ITERATION,1e-2,0.5, 10))) + .updater(new Nadam(1e-1)) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From c5de18f4c3c6e55820e88d013ec3bd022ee1862c Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 18:45:02 +0000 Subject: [PATCH 62/89] experiment10 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 59122d54f2..bac9ec7905 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-1)) + .updater(new Nadam(1e-2)) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() From 145e1fd1552850b05c278edf4e6b75eeae60f727 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Tue, 25 Feb 2020 22:47:13 +0000 Subject: [PATCH 63/89] experiment11 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index bac9ec7905..f78e500c42 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,17 +105,16 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-2)) .weightDecay(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .hasBias(true).nIn(t.getVocab().size()) - .nOut(256).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(0, new EmbeddingSequenceLayer.Builder() + .hasBias(true).nIn(t.getVocab().size()).nOut(256).weightInit(WeightInit.ZERO) + .updater(new Sgd(1e-4)).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 2a72f0aa62a2514fa230317b8a667e32bf584b1a Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 08:53:02 +0000 Subject: [PATCH 64/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../textclassification/TextClassification.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index f78e500c42..0fe7781271 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,21 +105,24 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .weightDecay(1e-6) + .updater(new Nadam(1e-3)) + .weightDecay(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder() - .hasBias(true).nIn(t.getVocab().size()).nOut(256).weightInit(WeightInit.ZERO) - .updater(new Sgd(1e-4)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).updater(new Nadam(1e-4)).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) + .hasBias(true).nIn(t.getVocab().size()) + .updater(new Sgd(1e-2)) + .nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); + + BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); From 3f07a38f19df1f0dabb0387f49c34827842016e9 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 09:39:48 +0000 Subject: [PATCH 65/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 0fe7781271..54d531b6fe 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -105,14 +105,14 @@ public static void main(String[] args) throws Exception { //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-3)) + .updater(new Adam(1e-3)) .weightDecay(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) .hasBias(true).nIn(t.getVocab().size()) - .updater(new Sgd(1e-2)) +// .updater(new Sgd(1e-2)) .nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) From 6256e85f5a201bdf37bd6f417ff742c3ca918f75 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 10:12:55 +0000 Subject: [PATCH 66/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 54d531b6fe..5c7b7f607b 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -106,14 +106,11 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) - .weightDecay(1e-5) + .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .hasBias(true).nIn(t.getVocab().size()) -// .updater(new Sgd(1e-2)) - .nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) @@ -122,7 +119,6 @@ public static void main(String[] args) throws Exception { .build(); - BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); From da1458873d4dd042f2e886fd96f62db218f2c695 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 11:50:25 +0000 Subject: [PATCH 67/89] experiment12 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5c7b7f607b..6eedeb9acf 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -110,9 +110,10 @@ public static void main(String[] args) throws Exception { .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) + .l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -143,7 +144,7 @@ public void preProcess(MultiDataSet multiDataSet) { //Configure where the network information (gradients, activations, score vs. time etc) is to be stored //Then add the StatsListener to collect this information from the network, as it trains StatsStorage statsStorage = new FileStatsStorage(new File(System.getProperty("java.io.tmpdir"), "ui-stats-" + System.currentTimeMillis() + ".dl4j")); - int listenerFrequency = 1; + int listenerFrequency = 20; net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(50)); //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized uiServer.attach(statsStorage); From cb26a75ec13905625e5e78031384f6add9d260f7 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 12:39:35 +0000 Subject: [PATCH 68/89] experiment13 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 6eedeb9acf..fb9e61f66d 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -111,7 +111,8 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) + .l2(0).hasBias(true).nIn(t.getVocab().size()) + .nOut(256).updater(new Sgd(1e-3)).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From f0e124155ec14dd34ba148b3d7684a0108be0677 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Wed, 26 Feb 2020 16:28:32 +0000 Subject: [PATCH 69/89] experiment14 (notes belong to commit name are here http://tiny.cc/yashkz) Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index fb9e61f66d..beb5106aad 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -115,6 +115,8 @@ public static void main(String[] args) throws Exception { .nOut(256).updater(new Sgd(1e-3)).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) + .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From c4f9d9a0fa6afa68dce32a8a3e042e59537cdf61 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Thu, 27 Feb 2020 22:27:48 +0000 Subject: [PATCH 70/89] baseline conf + LengthHandling.FIXED_LENGTH=256 Signed-off-by: atuzhykov --- .../TextClassification.java | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index beb5106aad..5fa67399bb 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -1,8 +1,6 @@ package org.deeplearning4j.examples.nlp.textclassification; - - import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.api.storage.StatsStorage; @@ -39,8 +37,6 @@ import org.deeplearning4j.nn.api.Updater; - - import java.io.File; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -75,7 +71,7 @@ public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPi BertIterator b = BertIterator.builder() .tokenizer(t) - .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 16) + .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 256) .minibatchSize(32) .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) @@ -100,9 +96,6 @@ public static void main(String[] args) throws Exception { BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - - - //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) @@ -110,19 +103,14 @@ public static void main(String[] args) throws Exception { .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0,1)) - .l2(0).hasBias(true).nIn(t.getVocab().size()) - .nOut(256).updater(new Sgd(1e-3)).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(256).activation(Activation.TANH).build()) - + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); - BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); From 02d47fdfd35e3850ffbd0892d6a809e2b21c7ca7 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 09:50:50 +0000 Subject: [PATCH 71/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 5fa67399bb..b02c352bdd 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -11,6 +11,7 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; @@ -104,8 +105,8 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nOut(128).activation(Activation.TANH).build()) + .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 61b63f88434c31736690add8a3f02bef8683d255 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 10:43:23 +0000 Subject: [PATCH 72/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm+lr1e-4 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index b02c352bdd..edfbfc16f3 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -99,7 +99,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Adam(1e-4)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() From 820eda5dd4945472575d0b503a387100623fb2b5 Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 15:29:31 +0000 Subject: [PATCH 73/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm_256 Signed-off-by: atuzhykov --- .../nlp/textclassification/TextClassification.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index edfbfc16f3..ce59f39da2 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -99,14 +99,14 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-4)) + .updater(new Adam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(128).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -150,6 +150,8 @@ public void preProcess(MultiDataSet multiDataSet) { System.out.println(eval.stats()); } + System.out.print(net.summary()); + } public static void downloadData() throws Exception { From 2c757c06c20a52cf1ee2453168d57e8bfeae510c Mon Sep 17 00:00:00 2001 From: atuzhykov Date: Fri, 28 Feb 2020 21:54:22 +0000 Subject: [PATCH 74/89] baselineconf+LengthHandling.FIXED_LENGTH=256+Bidirectional_lstm_256_lr1e-4 Signed-off-by: atuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index ce59f39da2..b80fe319c5 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -99,7 +99,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Adam(1e-4)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() From 53efeec4dab37026d2517da46d9f0bc01525de24 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:10:40 +0200 Subject: [PATCH 75/89] base_conf+bidir_LSTM_256_layersize_Adam_lr1e-3_SGD_lr1e-3_for_EmbdLayer --- .../TextClassification.java | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index b80fe319c5..1fd3bc0dd3 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -6,6 +6,7 @@ import org.deeplearning4j.api.storage.StatsStorage; import org.deeplearning4j.iterator.BertIterator; import org.deeplearning4j.iterator.provider.FileLabeledSentenceProvider; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -96,15 +97,31 @@ public static void main(String[] args) throws Exception { String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); +// ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() +// .seed(seed) +// .updater(new Adam(1e-4)) +// .l2(1e-6) +// .weightInit(WeightInit.XAVIER) +// .graphBuilder() +// +// .addInputs("input1", "input2") +// .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") +// .addLayer("L2", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input2") +// .addVertex("merge", new MergeVertex(), "L1", "L2") +// .addLayer("out", new OutputLayer.Builder().nIn(4+4).nOut(3).build(), "merge") +// .setOutputs("out") +// .build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-4)) + .updater(new Adam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0).hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) + .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) + .hasBias(true).nIn(t.getVocab().size()).nOut(128) + .updater(new Sgd(1e-3)).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) @@ -150,6 +167,10 @@ public void preProcess(MultiDataSet multiDataSet) { System.out.println(eval.stats()); } + System.out.println("Training set evaluation"); + Evaluation eval = net.doEvaluation(train, new Evaluation[]{new Evaluation()})[0]; + System.out.println(eval.stats()); + System.out.print(net.summary()); } From 1bbb9e0a007b22ea5536dbfbe85bcc9cf0d54ca5 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:24:59 +0200 Subject: [PATCH 76/89] base_conf+bidir_LSTM_256_layersize_Adam_lr1e-3_SGD_lr1e-3_for_EmbdLayer --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 1fd3bc0dd3..929cd023fc 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -120,7 +120,7 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(128) + .hasBias(true).nIn(t.getVocab().size()).nOut(256) .updater(new Sgd(1e-3)).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) From 880cd3051a7cfa68ce3210d884c2cf4ab72e0b17 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:35:09 +0200 Subject: [PATCH 77/89] base_conf+bidir_LSTM_256_layersize_Nadam_lr1e-3 --- .../examples/nlp/textclassification/TextClassification.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 929cd023fc..97523b8bc9 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -114,14 +114,13 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Nadam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(256) - .updater(new Sgd(1e-3)).build()) + .hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From 9555d557956f5802567f9ad4c04e14c106f4b81c Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 14:59:17 +0200 Subject: [PATCH 78/89] base_conf+bidir_LSTM_256_layersize_Nadam_lr1e-3 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 97523b8bc9..9963862486 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -120,7 +120,7 @@ public static void main(String[] args) throws Exception { .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(256).build()) + .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) From 8aab2fbe7919e1b0f202f3d27d3d207eff9666f6 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 15:30:06 +0200 Subject: [PATCH 79/89] base_conf+bidir_LSTM_256_layersize_Nadam_lr1e-3 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 9963862486..f464e7a74d 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -114,7 +114,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Nadam(1e-3)) + .updater(new Adam(1e-3)) .l2(1e-6) .weightInit(WeightInit.XAVIER) .list() From 542db8615de74fe47559d739aa1413a299f4578d Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 16:56:20 +0200 Subject: [PATCH 80/89] base_conf+3x_bidir_LSTM_256_layersize_Nadam_lr1e-3 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index f464e7a74d..4be91f8a41 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -123,6 +123,7 @@ public static void main(String[] args) throws Exception { .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 46dffc010d5c0a8245ce78af15fb45e5a755234a Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 17:24:54 +0200 Subject: [PATCH 81/89] base_conf+3xbidir_LSTM_256_layersize_Adam_lr1e-3_l21e-5 Signed-off-by: Andrii Tuzhykov --- .../examples/nlp/textclassification/TextClassification.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 4be91f8a41..94f4148f8a 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -115,7 +115,7 @@ public static void main(String[] args) throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) - .l2(1e-6) + .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) From c5a979aed9933df5fc96eaa24d5a7dc9efaf26b3 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Mon, 2 Mar 2020 22:32:04 +0200 Subject: [PATCH 82/89] base_conf+3x_bidir_LSTM_256_layersize_Adam_Sheduled_lr Signed-off-by: Andrii Tuzhykov --- .../TextClassification.java | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 94f4148f8a..8cf7b7b23a 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -32,9 +32,7 @@ import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.deeplearning4j.examples.utilities.DataUtilities; -import org.nd4j.linalg.schedule.ExponentialSchedule; -import org.nd4j.linalg.schedule.ScheduleType; -import org.nd4j.linalg.schedule.SigmoidSchedule; +import org.nd4j.linalg.schedule.*; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.api.Updater; @@ -97,24 +95,17 @@ public static void main(String[] args) throws Exception { String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); -// ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() -// .seed(seed) -// .updater(new Adam(1e-4)) -// .l2(1e-6) -// .weightInit(WeightInit.XAVIER) -// .graphBuilder() -// -// .addInputs("input1", "input2") -// .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") -// .addLayer("L2", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input2") -// .addVertex("merge", new MergeVertex(), "L1", "L2") -// .addLayer("out", new OutputLayer.Builder().nIn(4+4).nOut(3).build(), "merge") -// .setOutputs("out") -// .build(); + + ISchedule lrSchedule = new MapSchedule.Builder(ScheduleType.EPOCH) + .add(0, 2e-3) + .add(1, 1e-3) + .add(3, 8e-4) + .add(5, 5e-4) + .add(7, 2e-4).build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(1e-3)) + .updater(new Adam(lrSchedule)) .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() From 6e390eb6b4d0f166e3ba76ca9e431689db7addda Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Tue, 3 Mar 2020 12:56:06 +0200 Subject: [PATCH 83/89] base_conf+2x_bidir_LSTM_256_Adam_lr1e-3_lstm_dropout_075 Signed-off-by: Andrii Tuzhykov --- .../textclassification/TextClassification.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java index 8cf7b7b23a..0e760ffed8 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java @@ -96,25 +96,20 @@ public static void main(String[] args) throws Exception { BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - ISchedule lrSchedule = new MapSchedule.Builder(ScheduleType.EPOCH) - .add(0, 2e-3) - .add(1, 1e-3) - .add(3, 8e-4) - .add(5, 5e-4) - .add(7, 2e-4).build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) - .updater(new Adam(lrSchedule)) + .updater(new Adam(1e-3)) .l2(1e-5) .weightInit(WeightInit.XAVIER) .list() .setInputType(InputType.recurrent(1)) .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(256).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256) + .dropOut(0.75).activation(Activation.TANH).build())) + .layer(new Bidirectional(new LSTM.Builder().nOut(256) + .dropOut(0.75).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) From 794550854af7435b5b9681dea89249faefc9a9d6 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Thu, 5 Mar 2020 01:01:08 +0200 Subject: [PATCH 84/89] prefinal examples Signed-off-by: Andrii Tuzhykov --- .../TextClassification.java | 195 ------------------ 1 file changed, 195 deletions(-) delete mode 100644 dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java deleted file mode 100644 index 0e760ffed8..0000000000 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/textclassification/TextClassification.java +++ /dev/null @@ -1,195 +0,0 @@ -package org.deeplearning4j.examples.nlp.textclassification; - - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.FilenameUtils; -import org.deeplearning4j.api.storage.StatsStorage; -import org.deeplearning4j.iterator.BertIterator; -import org.deeplearning4j.iterator.provider.FileLabeledSentenceProvider; -import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.distribution.NormalDistribution; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.listeners.ScoreIterationListener; -import org.deeplearning4j.text.tokenization.tokenizerfactory.BertWordPieceTokenizerFactory; -import org.deeplearning4j.ui.api.UIServer; -import org.deeplearning4j.ui.stats.StatsListener; -import org.deeplearning4j.ui.storage.FileStatsStorage; -import org.nd4j.evaluation.classification.Evaluation; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.dataset.api.MultiDataSet; -import org.nd4j.linalg.dataset.api.MultiDataSetPreProcessor; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.learning.config.Adam; -import org.nd4j.linalg.learning.config.Nadam; -import org.nd4j.linalg.learning.config.Nesterovs; -import org.nd4j.linalg.learning.config.Sgd; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.deeplearning4j.examples.utilities.DataUtilities; -import org.nd4j.linalg.schedule.*; -import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.api.Updater; - - -import java.io.File; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.util.*; - -public class TextClassification { - - - /** - * Data URL for downloading - */ - public static final String DATA_URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"; - /** - * Location to save and extract the training/testing data - */ - public static final String DATA_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment/"); - - public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPieceTokenizerFactory t) { - - String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); - String positiveBaseDir = FilenameUtils.concat(path, "pos"); - String negativeBaseDir = FilenameUtils.concat(path, "neg"); - Random rng = new Random(42); - - File filePositive = new File(positiveBaseDir); - File fileNegative = new File(negativeBaseDir); - - Map> reviewFilesMap = new HashMap<>(); - reviewFilesMap.put("Positive", Arrays.asList(Objects.requireNonNull(filePositive.listFiles()))); - reviewFilesMap.put("Negative", Arrays.asList(Objects.requireNonNull(fileNegative.listFiles()))); - - - BertIterator b = BertIterator.builder() - .tokenizer(t) - .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 256) - .minibatchSize(32) - .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) - .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) - .vocabMap(t.getVocab()) - .task(BertIterator.Task.SEQ_CLASSIFICATION) - .build(); - - - return b; - } - - - public static void main(String[] args) throws Exception { - - - //Download and extract data - downloadData(); - - - final int seed = 0; //Seed for reproducibility - String pathToVocab = "/home/jenkins/uncased_L-12_H-768_A-12/vocab.txt"; - BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - - - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .seed(seed) - .updater(new Adam(1e-3)) - .l2(1e-5) - .weightInit(WeightInit.XAVIER) - .list() - .setInputType(InputType.recurrent(1)) - .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) - .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - .layer(new Bidirectional(new LSTM.Builder().nOut(256) - .dropOut(0.75).activation(Activation.TANH).build())) - .layer(new Bidirectional(new LSTM.Builder().nOut(256) - .dropOut(0.75).activation(Activation.TANH).build())) - .layer(new GlobalPoolingLayer(PoolingType.MAX)) - .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) - .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .build(); - - BertIterator train = getBertDataSetIterator(true, t); - BertIterator test = getBertDataSetIterator(false, t); - - MultiDataSetPreProcessor mdsPreprocessor = new MultiDataSetPreProcessor() { - @Override - public void preProcess(MultiDataSet multiDataSet) { - multiDataSet.setFeaturesMaskArray(0, multiDataSet.getFeaturesMaskArray(0).castTo(DataType.FLOAT)); - } - }; - - - train.setPreProcessor(mdsPreprocessor); - test.setPreProcessor(mdsPreprocessor); - - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - - - //Initialize the user interface backend - UIServer uiServer = UIServer.getInstance(); - - //Configure where the network information (gradients, activations, score vs. time etc) is to be stored - //Then add the StatsListener to collect this information from the network, as it trains - StatsStorage statsStorage = new FileStatsStorage(new File(System.getProperty("java.io.tmpdir"), "ui-stats-" + System.currentTimeMillis() + ".dl4j")); - int listenerFrequency = 20; - net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(50)); - //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized - uiServer.attach(statsStorage); - - - for (int i = 1; i <= 10; i++) { - - net.fit(train); - - Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; - System.out.println(eval.stats()); - } - - System.out.println("Training set evaluation"); - Evaluation eval = net.doEvaluation(train, new Evaluation[]{new Evaluation()})[0]; - System.out.println(eval.stats()); - - System.out.print(net.summary()); - - } - - public static void downloadData() throws Exception { - //Create directory if required - File directory = new File(DATA_PATH); - if (!directory.exists()) directory.mkdir(); - - //Download file: - String archizePath = DATA_PATH + "aclImdb_v1.tar.gz"; - File archiveFile = new File(archizePath); - String extractedPath = DATA_PATH + "aclImdb"; - File extractedFile = new File(extractedPath); - - if (!archiveFile.exists()) { - System.out.println("Starting data download (80MB)..."); - FileUtils.copyURLToFile(new URL(DATA_URL), archiveFile); - System.out.println("Data (.tar.gz file) downloaded to " + archiveFile.getAbsolutePath()); - //Extract tar.gz file to output directory - DataUtilities.extractTarGz(archizePath, DATA_PATH); - } else { - //Assume if archive (.tar.gz) exists, then data has already been extracted - System.out.println("Data (.tar.gz file) already exists at " + archiveFile.getAbsolutePath()); - if (!extractedFile.exists()) { - //Extract tar.gz file to output directory - DataUtilities.extractTarGz(archizePath, DATA_PATH); - } else { - System.out.println("Data (extracted) already exists at " + extractedFile.getAbsolutePath()); - } - } - } - - -} - From 74162fff7b861346a51f355ea5c1979155417b55 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Thu, 5 Mar 2020 01:13:58 +0200 Subject: [PATCH 85/89] prefinal Signed-off-by: Andrii Tuzhykov --- .../nlp/bertiteratorexample/BertIteratorExample.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java index 092feed707..332a5cb4e3 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java @@ -135,13 +135,17 @@ public static void main(String[] args) throws Exception { .list() // matching EmbeddingSequenceLayer outputs with Bidirectional LSTM inputs .setInputType(InputType.recurrent(1)) +// // initialized weights with normal distribution, amount of inputs according to vocab size and off L2 for this layer .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) +// // two Bidirectional LSTM layers in a row with dropout and tanh as activation function .layer(new Bidirectional(new LSTM.Builder().nOut(256) .dropOut(0.8).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256) .dropOut(0.8).activation(Activation.TANH).build())) .layer(new GlobalPoolingLayer(PoolingType.MAX)) + // defining last layer with 2 outputs (2 classes - positive and negative), + // small dropout to avoid overfitting and MCXENT loss function .layer(new OutputLayer.Builder().nOut(2) .dropOut(0.97).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -187,6 +191,8 @@ public void preProcess(MultiDataSet multiDataSet) { net.fit(train); + + // Get and print accuracy, precision, recall & F1 and confusion matrix Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; System.out.println(eval.stats()); } @@ -222,6 +228,7 @@ public static void downloadData() throws Exception { } + // Download Bert Base Uncased Vocab String vocabPath = DATA_PATH + "vocab.txt"; File vocabFile = new File(vocabPath); @@ -234,7 +241,7 @@ public static void downloadData() throws Exception { file.write(data, 0, byteContent); } } catch (IOException e) { - // handles IO exceptions + System.out.println("Something went wrong getting Bert Base Vocabulary"); } } else { From 13a2392ca70945b17713eb73c5c11319f38e1998 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Thu, 5 Mar 2020 18:10:17 +0200 Subject: [PATCH 86/89] changed package and class name, added trained model URL Signed-off-by: Andrii Tuzhykov --- .../SentencePieceRNNExample.java} | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) rename dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/{bertiteratorexample/BertIteratorExample.java => sentencepiecernnexample/SentencePieceRNNExample.java} (97%) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java similarity index 97% rename from dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java rename to dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java index 332a5cb4e3..eec6cd37c9 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/bertiteratorexample/BertIteratorExample.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java @@ -13,7 +13,7 @@ * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ -package org.deeplearning4j.examples.nlp.bertiteratorexample; +package org.deeplearning4j.examples.nlp.sentencepiecernnexample; import org.apache.commons.io.FileUtils; @@ -56,7 +56,13 @@ * @author andrewtuzhykov@gmail.com */ -public class BertIteratorExample { + /** + * NOTE: You may download already trained defined below model for your own inference + * https://dl4jdata.blob.core.windows.net/dl4j-examples/models/sentencepiece_rnn_example_model.zip + */ + + +public class SentencePieceRNNExample { /** @@ -191,7 +197,6 @@ public void preProcess(MultiDataSet multiDataSet) { net.fit(train); - // Get and print accuracy, precision, recall & F1 and confusion matrix Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; System.out.println(eval.stats()); From 5b1b7108ade98d91ffb0c9cc5d1f11f1a6b9448b Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Fri, 6 Mar 2020 17:57:37 +0200 Subject: [PATCH 87/89] fixed required changes Signed-off-by: Andrii Tuzhykov --- dl4j-examples/pom.xml | 6 - .../SentencePieceRNNExample.java | 138 ++++++++++-------- pom.xml | 4 +- 3 files changed, 81 insertions(+), 67 deletions(-) diff --git a/dl4j-examples/pom.xml b/dl4j-examples/pom.xml index 564ab4b3e8..6e4dc69ed4 100644 --- a/dl4j-examples/pom.xml +++ b/dl4j-examples/pom.xml @@ -106,12 +106,6 @@ ${dl4j.version} - - org.deeplearning4j - deeplearning4j-cuda-10.0 - 1.0.0-beta6 - - org.deeplearning4j diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java index eec6cd37c9..59c1049827 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2015-2020 Skymind, Inc. + * Copyright (c) Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -53,15 +53,41 @@ /** - * @author andrewtuzhykov@gmail.com + * Example: Given a movie review (raw text), classify that movie review as either positive or negative based on the words it contains. + * This is done by combining BertIterator and a current neural network model: embedding sequence layer with nIn = vocabulary size, + * two bidirectional LSTM layers, followed by global pooling layer and output with nOu = 2 (2 classes: positive and negative reviews). + * As far model is predisposed to overfitting we also add l2 regularization and dropout for certain layers. + * To prepare reviews we use BertIterator, which is MultiDataSetIterator for training BERT (Transformer) models. + * We congigure BertIterator for supervised sequence classification: + * 0. As tokenizer we use BertWordPieceTokenizerFactory with provided BERT BASE UNCASED vocabulary. + * 1. We handle length of sequence to fixed - trim longer sequences and pad shorter to 256 words. + * 2. Sentence provider get as a reviewFilesMap, connstructed from dataset, described below. + * 3. FeatureArrays configures what arrays should be included: INDICES_MASK means + * indices array and mask array only, no segment ID array; returns 1 feature array, 1 feature mask array (plus labels). + * 4. As task we specify BertIterator.Task.SEQ_CLASSIFICATION, which means sequence clasification. + * Training data is the "Large Movie Review Dataset" from http://ai.stanford.edu/~amaas/data/sentiment/ + * This data set contains 25,000 training reviews + 25,000 testing reviews + *

+ * Process: + * 0. Automatic on first run of example: Download data (movie reviews) + extract and download BERT-BASE UNCASED vocabulary file. + * 1. BertWordPieceTokenizerFactory initializing with provided vocab. + * 2. Configuring MiltiLayerNetwork. + * 3. Setting of BertIterator and getting train and test data with followed by preprocessor. + * 4. Train network + *

+ * With the current configuration, gives approx. 86% accuracy after 19 epochs. Better performance may be possible with + * additional tuning. + *

+ * NOTE: You may download already trained defined below model for your own inference + * https://dl4jdata.blob.core.windows.net/dl4j-examples/models/sentencepiece_rnn_example_model.zip + *

+ * Recommended papers: + * 0. SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing + * https://arxiv.org/abs/1808.06226 + * 1. Attention Is All You Need + * https://arxiv.org/abs/1706.03762 + * @author Andrii Tuzhykov */ - - /** - * NOTE: You may download already trained defined below model for your own inference - * https://dl4jdata.blob.core.windows.net/dl4j-examples/models/sentencepiece_rnn_example_model.zip - */ - - public class SentencePieceRNNExample { @@ -69,8 +95,9 @@ public class SentencePieceRNNExample { * Data URL for downloading */ public static final String DATA_URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"; - - // Bert Base Uncased Vocabulary + /** + * Bert Base Uncased Vocabulary URL + */ public static final String VOCAB_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt"; /** @@ -79,47 +106,8 @@ public class SentencePieceRNNExample { public static final String DATA_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment/"); - /** - * Get BertIterator instance. - * - * @param isTraining specifies which dataset iterator we want to get: train or test. - * @param t BertWordPieceTokenizerFactory initialized with provided vocab. - * @return BertIterator with specified parameters. - */ - - public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPieceTokenizerFactory t) { - - String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); - String positiveBaseDir = FilenameUtils.concat(path, "pos"); - String negativeBaseDir = FilenameUtils.concat(path, "neg"); - Random rng = new Random(42); - - File filePositive = new File(positiveBaseDir); - File fileNegative = new File(negativeBaseDir); - - Map> reviewFilesMap = new HashMap<>(); - reviewFilesMap.put("Positive", Arrays.asList(Objects.requireNonNull(filePositive.listFiles()))); - reviewFilesMap.put("Negative", Arrays.asList(Objects.requireNonNull(fileNegative.listFiles()))); - - - BertIterator b = BertIterator.builder() - .tokenizer(t) - .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 256) - .minibatchSize(32) - .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) - .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) - .vocabMap(t.getVocab()) - .task(BertIterator.Task.SEQ_CLASSIFICATION) - .build(); - - - return b; - } - - public static void main(String[] args) throws Exception { - //Download and extract data downloadData(); @@ -132,7 +120,6 @@ public static void main(String[] args) throws Exception { // BertWordPieceTokenizerFactory initialized with given vocab BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(new File(pathToVocab), true, true, StandardCharsets.UTF_8); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .updater(new Adam(1e-3)) @@ -141,10 +128,10 @@ public static void main(String[] args) throws Exception { .list() // matching EmbeddingSequenceLayer outputs with Bidirectional LSTM inputs .setInputType(InputType.recurrent(1)) -// // initialized weights with normal distribution, amount of inputs according to vocab size and off L2 for this layer + // initialized weights with normal distribution, amount of inputs according to vocab size and off L2 for this layer .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) -// // two Bidirectional LSTM layers in a row with dropout and tanh as activation function + // two Bidirectional LSTM layers in a row with dropout and tanh as activation function .layer(new Bidirectional(new LSTM.Builder().nOut(256) .dropOut(0.8).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256) @@ -157,14 +144,12 @@ public static void main(String[] args) throws Exception { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); - // Getting train and test BertIterators for both: test and train, // changing argument isTraining: true to get train and false to get test respectively BertIterator train = getBertDataSetIterator(true, t); BertIterator test = getBertDataSetIterator(false, t); - - // preprocessor for DataType matching + // Preprocessor for DataType matching; can be removed after 1.0.0-beta7 release. MultiDataSetPreProcessor mdsPreprocessor = new MultiDataSetPreProcessor() { @Override public void preProcess(MultiDataSet multiDataSet) { @@ -192,18 +177,55 @@ public void preProcess(MultiDataSet multiDataSet) { uiServer.attach(statsStorage); - // Setting to train net for 19 epochs (note: previous net state persist after each iteration) + // Setting to train net for 19 epochs (note: previous net state persist after each epoch (i.e. cycle iteration)) for (int i = 1; i <= 19; i++) { net.fit(train); // Get and print accuracy, precision, recall & F1 and confusion matrix Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; + System.out.println("===== Evaluation at training iteration " + i + " ====="); System.out.println(eval.stats()); } } + /** + * Get BertIterator instance. + * + * @param isTraining specifies which dataset iterator we want to get: train or test. + * @param t BertWordPieceTokenizerFactory initialized with provided vocab. + * @return BertIterator with specified parameters. + */ + public static BertIterator getBertDataSetIterator(boolean isTraining, BertWordPieceTokenizerFactory t) { + + String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); + String positiveBaseDir = FilenameUtils.concat(path, "pos"); + String negativeBaseDir = FilenameUtils.concat(path, "neg"); + Random rng = new Random(42); + + File filePositive = new File(positiveBaseDir); + File fileNegative = new File(negativeBaseDir); + + Map> reviewFilesMap = new HashMap<>(); + reviewFilesMap.put("Positive", Arrays.asList(Objects.requireNonNull(filePositive.listFiles()))); + reviewFilesMap.put("Negative", Arrays.asList(Objects.requireNonNull(fileNegative.listFiles()))); + + + BertIterator b = BertIterator.builder() + .tokenizer(t) + .lengthHandling(BertIterator.LengthHandling.FIXED_LENGTH, 256) + .minibatchSize(32) + .sentenceProvider(new FileLabeledSentenceProvider(reviewFilesMap, rng)) + .featureArrays(BertIterator.FeatureArrays.INDICES_MASK) + .vocabMap(t.getVocab()) + .task(BertIterator.Task.SEQ_CLASSIFICATION) + .build(); + + + return b; + } + public static void downloadData() throws Exception { //Create directory if required File directory = new File(DATA_PATH); @@ -254,7 +276,5 @@ public static void downloadData() throws Exception { } } - - } diff --git a/pom.xml b/pom.xml index f2c38c7ee3..57ed70a4a7 100644 --- a/pom.xml +++ b/pom.xml @@ -27,8 +27,8 @@ Examples of training different data sets - - nd4j-cuda-10.0-platform + nd4j-native-platform + UTF-8 bin From a22956d58da3f6a862d054cd300fecf1d2c2f358 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Wed, 11 Mar 2020 12:00:58 +0200 Subject: [PATCH 88/89] fixed new round of required changes Signed-off-by: Andrii Tuzhykov --- .../SentencePieceRNNExample.java | 15 ++++++++++----- pom.xml | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java index 59c1049827..ed6598603f 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java @@ -59,7 +59,7 @@ * As far model is predisposed to overfitting we also add l2 regularization and dropout for certain layers. * To prepare reviews we use BertIterator, which is MultiDataSetIterator for training BERT (Transformer) models. * We congigure BertIterator for supervised sequence classification: - * 0. As tokenizer we use BertWordPieceTokenizerFactory with provided BERT BASE UNCASED vocabulary. + * 0. BertIterator and BertWordPieceTokenizer implement the Word Piece sub-word tokenization algorithm, with a vocabulary size of 30522 tokens. * 1. We handle length of sequence to fixed - trim longer sequences and pad shorter to 256 words. * 2. Sentence provider get as a reviewFilesMap, connstructed from dataset, described below. * 3. FeatureArrays configures what arrays should be included: INDICES_MASK means @@ -86,6 +86,7 @@ * https://arxiv.org/abs/1808.06226 * 1. Attention Is All You Need * https://arxiv.org/abs/1706.03762 + * * @author Andrii Tuzhykov */ public class SentencePieceRNNExample { @@ -128,10 +129,10 @@ public static void main(String[] args) throws Exception { .list() // matching EmbeddingSequenceLayer outputs with Bidirectional LSTM inputs .setInputType(InputType.recurrent(1)) - // initialized weights with normal distribution, amount of inputs according to vocab size and off L2 for this layer + // initialized weights with normal distribution, amount of inputs according to vocab size and off L2 for this layer .layer(0, new EmbeddingSequenceLayer.Builder().weightInit(new NormalDistribution(0, 1)).l2(0) .hasBias(true).nIn(t.getVocab().size()).nOut(128).build()) - // two Bidirectional LSTM layers in a row with dropout and tanh as activation function + // two Bidirectional LSTM layers in a row with dropout and tanh as activation function .layer(new Bidirectional(new LSTM.Builder().nOut(256) .dropOut(0.8).activation(Activation.TANH).build())) .layer(new Bidirectional(new LSTM.Builder().nOut(256) @@ -165,7 +166,8 @@ public void preProcess(MultiDataSet multiDataSet) { MultiLayerNetwork net = new MultiLayerNetwork(conf); - //Initialize the user interface backend + /* + //Uncomment this section to run the example with the user interface UIServer uiServer = UIServer.getInstance(); //Configure where the network information (gradients, activations, score vs. time etc) is to be stored @@ -175,6 +177,9 @@ public void preProcess(MultiDataSet multiDataSet) { net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(50)); //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized uiServer.attach(statsStorage); + */ + + net.setListeners(new ScoreIterationListener(50)); // Setting to train net for 19 epochs (note: previous net state persist after each epoch (i.e. cycle iteration)) @@ -183,7 +188,7 @@ public void preProcess(MultiDataSet multiDataSet) { net.fit(train); // Get and print accuracy, precision, recall & F1 and confusion matrix - Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; + Evaluation eval = net.evaluate(test); System.out.println("===== Evaluation at training iteration " + i + " ====="); System.out.println(eval.stats()); } diff --git a/pom.xml b/pom.xml index 57ed70a4a7..45aee7518a 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ nd4j-native-platform - + UTF-8 bin From 5e7df4f9b3e4613c7d84da6e4545409a469494e6 Mon Sep 17 00:00:00 2001 From: Andrii Tuzhykov Date: Wed, 11 Mar 2020 12:17:42 +0200 Subject: [PATCH 89/89] small issue belong to match BertIterator and DataSetIterator in Evaluation class Signed-off-by: Andrii Tuzhykov --- .../nlp/sentencepiecernnexample/SentencePieceRNNExample.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java index ed6598603f..5326c3454c 100644 --- a/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java +++ b/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sentencepiecernnexample/SentencePieceRNNExample.java @@ -188,7 +188,7 @@ public void preProcess(MultiDataSet multiDataSet) { net.fit(train); // Get and print accuracy, precision, recall & F1 and confusion matrix - Evaluation eval = net.evaluate(test); + Evaluation eval = net.doEvaluation(test, new Evaluation[]{new Evaluation()})[0]; System.out.println("===== Evaluation at training iteration " + i + " ====="); System.out.println(eval.stats()); }