diff --git a/Datasets/CMakeLists.txt b/Datasets/CMakeLists.txt index 3f1b519cb7d..6323663e502 100644 --- a/Datasets/CMakeLists.txt +++ b/Datasets/CMakeLists.txt @@ -25,7 +25,6 @@ add_library(Datasets ImageSegmentationDataset.swift OxfordIIITPets/OxfordIIITPets.swift) target_link_libraries(Datasets PUBLIC - Batcher ModelSupport) set_target_properties(Datasets PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_Swift_MODULE_DIRECTORY}) diff --git a/Datasets/COCO/COCO.swift b/Datasets/COCO/COCO.swift index ff1fe9c6182..82dd0ffd262 100644 --- a/Datasets/COCO/COCO.swift +++ b/Datasets/COCO/COCO.swift @@ -1,3 +1,18 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + import Foundation // Code below is ported from https://github.com/cocometadata/cocoapi diff --git a/Datasets/COCO/COCODataset.swift b/Datasets/COCO/COCODataset.swift index 268506c49c7..91516385a27 100644 --- a/Datasets/COCO/COCODataset.swift +++ b/Datasets/COCO/COCODataset.swift @@ -1,52 +1,102 @@ -import Batcher +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import Foundation +import TensorFlow + +public struct COCODataset { + /// Type of the collection of non-collated batches. + public typealias Batches = Slices>> + /// The type of the training data, represented as a sequence of epochs, which + /// are collection of batches. + public typealias Training = LazyMapSequence< + TrainingEpochs<[ObjectDetectionExample], Entropy>, + LazyMapSequence + > + /// The type of the validation data, represented as a collection of batches. + public typealias Validation = LazyMapSequence, [ObjectDetectionExample]> + /// The training epochs. + public let training: Training + /// The validation batches. + public let validation: Validation + + /// Creates an instance with `batchSize` on `device` using `remoteBinaryArchiveLocation`. + /// + /// - Parameters: + /// - training: The COCO metadata for the training data. + /// - validation: The COCO metadata for the validation data. + /// - includeMasks: Whether to include the segmentation masks when loading the dataset. + /// - batchSize: Number of images provided per batch. + /// - entropy: A source of randomness used to shuffle sample ordering. It + /// will be stored in `self`, so if it is only pseudorandom and has value + /// semantics, the sequence of epochs is deterministic and not dependent + /// on other operations. + /// - device: The Device on which resulting Tensors from this dataset will be placed, as well + /// as where the latter stages of any conversion calculations will be performed. + public init( + training: COCO, validation: COCO, includeMasks: Bool, batchSize: Int, + entropy: Entropy, device: Device, + transform: @escaping (ObjectDetectionExample) -> [ObjectDetectionExample] + ) { + let trainingSamples = loadCOCOExamples( + from: training, + includeMasks: includeMasks, + batchSize: batchSize) -public struct COCODataset: ObjectDetectionDataset { - public typealias SourceDataSet = [ObjectDetectionExample] - public let trainingExamples: SourceDataSet - public let training: Batcher - public let testExamples: SourceDataSet - public let test: Batcher + self.training = TrainingEpochs(samples: trainingSamples, batchSize: batchSize, entropy: entropy) + .lazy.map { (batches: Batches) -> LazyMapSequence in + return batches.lazy.map { + makeBatch(samples: $0, device: device, transform: transform) + } + } + + let validationSamples = loadCOCOExamples( + from: validation, + includeMasks: includeMasks, + batchSize: batchSize) - public init( - training: COCO, test: COCO, - includeMasks: Bool, batchSize: Int, numWorkers: Int - ) { - self.trainingExamples = - loadCOCOExamples( - from: training, - includeMasks: includeMasks, - batchSize: batchSize, - numWorkers: numWorkers) - self.training = - Batcher( - on: trainingExamples, - batchSize: batchSize, - numWorkers: numWorkers, - shuffle: true) - self.testExamples = - loadCOCOExamples( - from: test, - includeMasks: includeMasks, - batchSize: batchSize, - numWorkers: numWorkers) - self.test = - Batcher( - on: testExamples, - batchSize: batchSize, - numWorkers: numWorkers, - shuffle: false) + self.validation = validationSamples.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device, transform: transform) } + } + + public static func identity(_ example: ObjectDetectionExample) -> [ObjectDetectionExample] { + return [example] + } } -func loadCOCOExamples(from coco: COCO, includeMasks: Bool, batchSize: Int, numWorkers: Int) +extension COCODataset: ObjectDetectionData where Entropy == SystemRandomNumberGenerator { + /// Creates an instance with `batchSize`, using the SystemRandomNumberGenerator. + public init( + training: COCO, validation: COCO, includeMasks: Bool, batchSize: Int, + on device: Device = Device.default, + transform: @escaping (ObjectDetectionExample) -> [ObjectDetectionExample] = COCODataset.identity + ) { + self.init( + training: training, validation: validation, includeMasks: includeMasks, batchSize: batchSize, + entropy: SystemRandomNumberGenerator(), device: device, transform: transform) + } +} + + +func loadCOCOExamples(from coco: COCO, includeMasks: Bool, batchSize: Int) -> [ObjectDetectionExample] { let images = coco.metadata["images"] as! [COCO.Image] let batchCount: Int = images.count / batchSize + 1 - let n = min(numWorkers, batchCount) let batches = Array(0.. Objec } return ObjectDetectionExample(image: img, objects: objects) } + +fileprivate func makeBatch( + samples: BatchSamples, device: Device, + transform: (ObjectDetectionExample) -> [ObjectDetectionExample] +) -> [ObjectDetectionExample] where BatchSamples.Element == ObjectDetectionExample { + return samples.reduce([]) { + $0 + transform($1) + } +} diff --git a/Datasets/COCO/COCOVariant.swift b/Datasets/COCO/COCOVariant.swift index 949ba8dcd1b..1850e74a98a 100644 --- a/Datasets/COCO/COCOVariant.swift +++ b/Datasets/COCO/COCOVariant.swift @@ -1,3 +1,17 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import Foundation import ModelSupport diff --git a/Datasets/LanguageModelDataset.swift b/Datasets/LanguageModelDataset.swift index 2963382c152..db970aa5567 100644 --- a/Datasets/LanguageModelDataset.swift +++ b/Datasets/LanguageModelDataset.swift @@ -1,3 +1,17 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import TensorFlow /// A dataset suitable for language modeling. diff --git a/Datasets/ObjectDetectionDataset.swift b/Datasets/ObjectDetectionDataset.swift index 097653b47e6..289f947d5f6 100644 --- a/Datasets/ObjectDetectionDataset.swift +++ b/Datasets/ObjectDetectionDataset.swift @@ -1,4 +1,17 @@ -import Batcher +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import Foundation import ModelSupport import TensorFlow @@ -52,7 +65,7 @@ public struct LabeledObject { } } -public struct ObjectDetectionExample: _Collatable, KeyPathIterable { +public struct ObjectDetectionExample: KeyPathIterable { public let image: LazyImage public let objects: [LabeledObject] @@ -62,10 +75,28 @@ public struct ObjectDetectionExample: _Collatable, KeyPathIterable { } } -public protocol ObjectDetectionDataset { - associatedtype SourceDataSet: Collection - where SourceDataSet.Element == ObjectDetectionExample, SourceDataSet.Index == Int +/// Types whose elements represent an object detection dataset (with both +/// training and validation data). +public protocol ObjectDetectionData { + /// The type of the training data, represented as a sequence of epochs, which + /// are collection of batches. + associatedtype Training: Sequence + where Training.Element: Collection, Training.Element.Element == [ObjectDetectionExample] + /// The type of the validation data, represented as a collection of batches. + associatedtype Validation: Collection where Validation.Element == [ObjectDetectionExample] + /// Creates an instance from a given `batchSize`. + init( + training: COCO, validation: COCO, includeMasks: Bool, batchSize: Int, on device: Device, + transform: @escaping (ObjectDetectionExample) -> [ObjectDetectionExample]) + /// The `training` epochs. + var training: Training { get } + /// The `validation` batches. + var validation: Validation { get } - var training: Batcher { get } - var test: Batcher { get } + // The following is probably going to be necessary since we can't extract that + // information from `Epochs` or `Batches`. + /// The number of samples in the `training` set. + //var trainingSampleCount: Int {get} + /// The number of samples in the `validation` set. + //var validationSampleCount: Int {get} } diff --git a/Datasets/TensorPair.swift b/Datasets/TensorPair.swift index 64143837eb0..252d5e424b2 100644 --- a/Datasets/TensorPair.swift +++ b/Datasets/TensorPair.swift @@ -13,7 +13,6 @@ // limitations under the License. import TensorFlow -import Batcher /// A generic tuple of two tensors `Tensor`. /// @@ -21,7 +20,7 @@ import Batcher /// `Collatable`. You can use it for most basic datasets with one tensor of inputs and one tensor of /// labels but you should write your own struct for more complex tasks (or if you want more descriptive /// names). -public struct TensorPair: _Collatable, KeyPathIterable { +public struct TensorPair: KeyPathIterable { public var first: Tensor public var second: Tensor @@ -30,4 +29,4 @@ public struct TensorPair: _Collatabl self.first = first self.second = second } -} \ No newline at end of file +} diff --git a/Package.swift b/Package.swift index 9a33a883625..9f99567e231 100644 --- a/Package.swift +++ b/Package.swift @@ -25,7 +25,7 @@ let package = Package( ], targets: [ .target(name: "Batcher", path: "Batcher"), - .target(name: "Datasets", dependencies: ["ModelSupport", "Batcher"], path: "Datasets"), + .target(name: "Datasets", dependencies: ["ModelSupport"], path: "Datasets"), .target(name: "STBImage", path: "Support/STBImage"), .target( name: "ModelSupport", dependencies: ["SwiftProtobuf", "STBImage"], path: "Support", @@ -117,7 +117,7 @@ let package = Package( ), .target( name: "pix2pix", - dependencies: ["Batcher", "ArgumentParser", "ModelSupport", "Datasets"], + dependencies: ["ArgumentParser", "ModelSupport", "Datasets"], path: "pix2pix" ), .target( diff --git a/Tests/DatasetsTests/COCO/COCODatasetTests.swift b/Tests/DatasetsTests/COCO/COCODatasetTests.swift index 9a07364f299..06ab73595b5 100644 --- a/Tests/DatasetsTests/COCO/COCODatasetTests.swift +++ b/Tests/DatasetsTests/COCO/COCODatasetTests.swift @@ -9,10 +9,16 @@ final class COCODatasetTests: XCTestCase { // to avoid fetching the full training data during CI runs. let dataset = COCODataset( training: COCOVariant.loadVal(), - test: COCOVariant.loadTest(), - includeMasks: false, batchSize: 32, numWorkers: 8) - verify(dataset.trainingExamples) - verify(dataset.testExamples) + validation: COCOVariant.loadTest(), + includeMasks: false, batchSize: 32) + + for epochBatches in dataset.training.prefix(1) { + let batch = epochBatches.first! + XCTAssertTrue(batch[0].image.width != 0) + } + + let validationBatch = dataset.validation.first! + XCTAssertTrue(validationBatch[0].image.width != 0) } func testExamplesIncludingMasks() { @@ -20,15 +26,16 @@ final class COCODatasetTests: XCTestCase { // to avoid fetching the full training data during CI runs. let dataset = COCODataset( training: COCOVariant.loadVal(), - test: COCOVariant.loadTest(), - includeMasks: true, batchSize: 32, numWorkers: 8) - verify(dataset.trainingExamples) - verify(dataset.testExamples) - } + validation: COCOVariant.loadTest(), + includeMasks: true, batchSize: 32) + + for epochBatches in dataset.training.prefix(1) { + let batch = epochBatches.first! + XCTAssertTrue(batch[0].image.width != 0) + } - func verify(_ examples: [ObjectDetectionExample]) { - XCTAssertTrue(examples.count > 0) - XCTAssertTrue(examples[0].image.width != 0) + let validationBatch = dataset.validation.first! + XCTAssertTrue(validationBatch[0].image.width != 0) } static var allTests = [