From 72a1f14c1d6210d279dd0d20260bfdd1355198af Mon Sep 17 00:00:00 2001 From: jisha Date: Wed, 20 May 2020 21:47:56 -0700 Subject: [PATCH] Adding support for model auto-publish for KabootarJob --- CONTRIBUTORS.md | 2 + VERSIONS.md | 3 + gradle.properties | 2 +- .../expectedJobs/jobs1/jobs1_job27.job | 7 ++ .../resources/gradle/positive/jobs1.gradle | 7 ++ .../checker/RequiredFieldsChecker.groovy | 15 +++ .../gradle/hadoopdsl/job/KabootarJob.groovy | 105 ++++++++++++++++++ 7 files changed, 140 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 50155d0..653182d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -22,6 +22,8 @@ the LinkedIn Gradle DSL for Apache Hadoop was influenced by Vaughan. ### Contributors +The following were contributed by Jin Sha. Thanks, Jin! +* `Added support for model auto-publish for KabootarJob` The following were contributed by Arpan Agrawal. Thanks, Arpan! * `Added AutoTunePigLiJob job type support ` diff --git a/VERSIONS.md b/VERSIONS.md index 0be4812..171c641 100644 --- a/VERSIONS.md +++ b/VERSIONS.md @@ -17,6 +17,9 @@ the License. Note that the LinkedIn build system occasionally requires that we skip a version bump, so you will see a few skipped version numbers in the list below. +0.15.22 +* Adding support for model auto-publish for KabootarJob + 0.15.16 * Adding KubernetesJob job type support. diff --git a/gradle.properties b/gradle.properties index 99a6281..cc2c13e 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,2 @@ org.gradle.daemon=true -version=0.15.21 +version=0.15.22 diff --git a/hadoop-plugin/src/integTest/resources/expectedJobs/jobs1/jobs1_job27.job b/hadoop-plugin/src/integTest/resources/expectedJobs/jobs1/jobs1_job27.job index 45d05cc..1d1b6b9 100644 --- a/hadoop-plugin/src/integTest/resources/expectedJobs/jobs1/jobs1_job27.job +++ b/hadoop-plugin/src/integTest/resources/expectedJobs/jobs1/jobs1_job27.job @@ -2,6 +2,13 @@ type=KabootarJob dependencies=jobs1_job26 ai.project.group=AIFoundationOther +auto.publish.enabled=true +auto.publish.model.contains.confidential.data=false +auto.publish.model.contains.pii.data=false +auto.publish.model.deployment.group.name=myDeploymentGroup +auto.publish.model.name=myFirstModel +auto.publish.version.update.type=PATCH +enable.quasar.model.bundle=true framework=PHOTON_CONNECT initial.import=initial/import/File model.supplementary.data.path=/user/testmodelregsvc/data diff --git a/hadoop-plugin/src/integTest/resources/gradle/positive/jobs1.gradle b/hadoop-plugin/src/integTest/resources/gradle/positive/jobs1.gradle index bf20725..2f84db2 100644 --- a/hadoop-plugin/src/integTest/resources/gradle/positive/jobs1.gradle +++ b/hadoop-plugin/src/integTest/resources/gradle/positive/jobs1.gradle @@ -451,6 +451,13 @@ hadoop { usesOrigin 'FELLOWSHIP' // Optional usesFramework 'PHOTON_CONNECT' // Optional usesModelSupplementaryDataLocation '/user/testmodelregsvc/data' // Optional + usesEnableQuasarModelBundle true // Optional + usesEnableAutoPublish true // Optional + usesAutoPublishModelName 'myFirstModel' // Required if usesEnableAutoPublish is true + usesAutoPublishModelDeploymentGroupName 'myDeploymentGroup' // Required if usesEnableAutoPublish is true + usesAutoPublishVersionUpdateType 'PATCH' // Optional + usesAutoPublishModelContainsPiiData false // Required if usesEnableAutoPublish is true + usesAutoPublishModelContainsConfidentialData false // Required if usesEnableAutoPublish is true depends 'job26' } diff --git a/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/checker/RequiredFieldsChecker.groovy b/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/checker/RequiredFieldsChecker.groovy index 6a42b34..113b17e 100644 --- a/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/checker/RequiredFieldsChecker.groovy +++ b/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/checker/RequiredFieldsChecker.groovy @@ -265,6 +265,11 @@ class RequiredFieldsChecker extends BaseStaticChecker { boolean emptyAiProjectGroup = job.aiProjectGroup == null || job.aiProjectGroup.isEmpty(); boolean emptyWormholeNamespace = job.wormholeNamespace == null || job.wormholeNamespace.isEmpty(); boolean emptyInitialImport = job.initialImport == null || job.initialImport.isEmpty(); + boolean emptyAutoPublishModelName = job.autoPublishModelName == null || job.autoPublishModelName.isEmpty(); + boolean emptyAutoPublishModelDeploymentGroupName = job.autoPublishModelDeploymentGroupName == null || job.autoPublishModelDeploymentGroupName.isEmpty(); + boolean emptyAutoPublishModelContainsPiiData = job.autoPublishModelContainsPiiData == null; + boolean emptyAutoPublishModelContainsConfidentialData = job.autoPublishModelContainsConfidentialData = null; + if (emptyTrainedModelLocation || emptyTrainingName || emptyAiProjectGroup || emptyWormholeNamespace || emptyInitialImport) { project.logger.lifecycle( @@ -273,6 +278,16 @@ class RequiredFieldsChecker extends BaseStaticChecker { "Please see the job documentation for more details."); foundError = true; } + + if (job.enableAutoPublish && (emptyAutoPublishModelName || emptyAutoPublishModelDeploymentGroupName || + emptyAutoPublishModelContainsPiiData || emptyAutoPublishModelContainsConfidentialData)) { + project.logger.lifecycle( + "RequiredFieldsChecker ERROR: KabootarJob ${job.name} must set autoPublishModelName, " + + "autoPublishModelDeploymentGroupName, autoPublishModelContainsPiiData and " + + "autoPublishModelContainsConfidentialData when auto-publish is enabled " + + "Please see the job documentation for more details."); + foundError = true; + } } diff --git a/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/job/KabootarJob.groovy b/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/job/KabootarJob.groovy index 806c210..04af03a 100644 --- a/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/job/KabootarJob.groovy +++ b/hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/job/KabootarJob.groovy @@ -35,6 +35,13 @@ import com.linkedin.gradle.hadoopdsl.HadoopDslMethod; * usesOrigin 'FELLOWSHIP' // Optional * usesFramework 'PHOTON_CONNECT' // Optional * usesModelSupplementaryDataLocation '/user/testmodelregsvc/trained-models-supplementary-data' // Optional + * usesEnableQuasarModelBundle // Optional + * usesEnableAutoPublish // Optional + * usesAutoPublishModelName // Required if usesEnableAutoPublish is true + * usesAutoPublishModelDeploymentGroupName // Required if usesEnableAutoPublish is true + * usesAutoPublishVersionUpdateType // Optional + * usesAutoPublishModelContainsPiiData // Required if usesEnableAutoPublish is true + * usesAutoPublishModelContainsConfidentialData // Required if usesEnableAutoPublish is true *}* */ class KabootarJob extends HadoopJavaJob { // Required @@ -49,6 +56,14 @@ class KabootarJob extends HadoopJavaJob { String origin; String framework; String modelSupplementaryDataLocation; + Boolean enableQuasarModelBundle; + Boolean enableAutoPublish; + String autoPublishModelName; //required if enableAutoPublish is true + String autoPublishModelDeploymentGroupName; //required if enableAutoPublish is true + String autoPublishVersionUpdateType; + Boolean autoPublishModelContainsPiiData; //required if enableAutoPublish is true + Boolean autoPublishModelContainsConfidentialData; //required if enableAutoPublish is true + /** * Constructor for KabootarJob. @@ -86,6 +101,13 @@ class KabootarJob extends HadoopJavaJob { cloneJob.origin = origin; cloneJob.framework = framework; cloneJob.modelSupplementaryDataLocation = modelSupplementaryDataLocation; + cloneJob.enableQuasarModelBundle = enableQuasarModelBundle; + cloneJob.enableAutoPublish = enableAutoPublish; + cloneJob.autoPublishModelName = autoPublishModelName; + cloneJob.autoPublishModelDeploymentGroupName = autoPublishModelDeploymentGroupName; + cloneJob.autoPublishVersionUpdateType = autoPublishVersionUpdateType; + cloneJob.autoPublishModelContainsPiiData = autoPublishModelContainsPiiData; + cloneJob.autoPublishModelContainsConfidentialData = autoPublishModelContainsConfidentialData; return ((KabootarJob) super.clone(cloneJob)); } @@ -198,4 +220,87 @@ class KabootarJob extends HadoopJavaJob { this.modelSupplementaryDataLocation = modelSupplementaryDataLocation; setJobProperty("model.supplementary.data.path", modelSupplementaryDataLocation); } + + /** + * DSL usesEnableQuasarModelBundle method causes enable.quasar.model.bundle to be set in the job file. + * + * @param enable.quasar.model.bundle - Flag that enables quasar model bundle format for models produced by Kabootar + */ + @HadoopDslMethod + void usesEnableQuasarModelBundle(Boolean enableQuasarModelBundle) { + this.enableQuasarModelBundle = enableQuasarModelBundle; + setJobProperty("enable.quasar.model.bundle", enableQuasarModelBundle); + } + + /** + * DSL usesEnableAutoPublish method causes auto.publish.enabled to be set in the job file. + * + * @param enableAutoPublish - Flag that controls whether Kabootar should auto-publish the trained model + */ + @HadoopDslMethod + void usesEnableAutoPublish(Boolean enableAutoPublish) { + this.enableAutoPublish = enableAutoPublish; + setJobProperty("auto.publish.enabled", enableAutoPublish); + } + + /** + * DSL usesAutoPublishModelName method causes auto.publish.model.name to be set in the job file. + * + * @param autoPublishModelName - Intended auto-published model name. + */ + @HadoopDslMethod + void usesAutoPublishModelName(String autoPublishModelName) { + this.autoPublishModelName = autoPublishModelName; + setJobProperty("auto.publish.model.name", autoPublishModelName); + } + + /** + * DSL usesAutoPublishModelDeploymentGroupName method causes auto.publish.model.deployment.group.name to be set in the + * job file. + * + * @param autoPublishModelDeploymentGroupName - Intended model deployment group name that the trained model will be + * auto-published into. This model deployment group must exist and the publisher needs to have DEVELOPMENT_TEAM role + * for the group. + */ + @HadoopDslMethod + void usesAutoPublishModelDeploymentGroupName(String autoPublishModelDeploymentGroupName) { + this.autoPublishModelDeploymentGroupName = autoPublishModelDeploymentGroupName; + setJobProperty("auto.publish.model.deployment.group.name", autoPublishModelDeploymentGroupName); + } + + /** + * DSL usesAutoPublishVersionUpdateType method causes auto.publish.version.update.type to be set in the job file. + * + * @param autoPublishVersionUpdateType - auto-publish model version update type. Can be patch, minor or major. + */ + @HadoopDslMethod + void usesAutoPublishVersionUpdateType(String autoPublishVersionUpdateType) { + this.autoPublishVersionUpdateType = autoPublishVersionUpdateType; + setJobProperty("auto.publish.version.update.type", autoPublishVersionUpdateType); + } + + /** + * DSL usesAutoPublishModelContainsPiiData method causes auto.publish.model.contains.pii.data to be set in the job + * file. + * + * @param autoPublishModelContainsPiiData - Indicate whether the to be auto-published trained model contains PII data. + */ + @HadoopDslMethod + void usesAutoPublishModelContainsPiiData(Boolean autoPublishModelContainsPiiData) { + this.autoPublishModelContainsPiiData = autoPublishModelContainsPiiData; + setJobProperty("auto.publish.model.contains.pii.data", autoPublishModelContainsPiiData); + } + + /** + * DSL usesAutoPublishModelContainsConfidentialData method causes auto.publish.model.contains.confidential.data to + * be set in the job file. + * + * @param autoPublishModelContainsConfidentialData - Indicate whether the to be auto-published trained model contains + * confidential data. + */ + @HadoopDslMethod + void usesAutoPublishModelContainsConfidentialData(Boolean autoPublishModelContainsConfidentialData) { + this.autoPublishModelContainsConfidentialData = autoPublishModelContainsConfidentialData; + setJobProperty("auto.publish.model.contains.confidential.data", autoPublishModelContainsConfidentialData); + } }