Skip to content

Adding support for model auto-publish for KabootarJob #268

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ the LinkedIn Gradle DSL for Apache Hadoop was influenced by
Vaughan.

### Contributors
The following were contributed by Jin Sha. Thanks, Jin!
* `Added support for model auto-publish for KabootarJob`

The following were contributed by Arpan Agrawal. Thanks, Arpan!
* `Added AutoTunePigLiJob job type support `
Expand Down
3 changes: 3 additions & 0 deletions VERSIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ the License.
Note that the LinkedIn build system occasionally requires that we skip a
version bump, so you will see a few skipped version numbers in the list below.

0.15.22
* Adding support for model auto-publish for KabootarJob

0.15.16
* Adding KubernetesJob job type support.

Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
org.gradle.daemon=true
version=0.15.21
version=0.15.22
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
type=KabootarJob
dependencies=jobs1_job26
ai.project.group=AIFoundationOther
auto.publish.enabled=true
auto.publish.model.contains.confidential.data=false
auto.publish.model.contains.pii.data=false
auto.publish.model.deployment.group.name=myDeploymentGroup
auto.publish.model.name=myFirstModel
auto.publish.version.update.type=PATCH
enable.quasar.model.bundle=true
framework=PHOTON_CONNECT
initial.import=initial/import/File
model.supplementary.data.path=/user/testmodelregsvc/data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,13 @@ hadoop {
usesOrigin 'FELLOWSHIP' // Optional
usesFramework 'PHOTON_CONNECT' // Optional
usesModelSupplementaryDataLocation '/user/testmodelregsvc/data' // Optional
usesEnableQuasarModelBundle true // Optional
usesEnableAutoPublish true // Optional
usesAutoPublishModelName 'myFirstModel' // Required if usesEnableAutoPublish is true
usesAutoPublishModelDeploymentGroupName 'myDeploymentGroup' // Required if usesEnableAutoPublish is true
usesAutoPublishVersionUpdateType 'PATCH' // Optional
usesAutoPublishModelContainsPiiData false // Required if usesEnableAutoPublish is true
usesAutoPublishModelContainsConfidentialData false // Required if usesEnableAutoPublish is true

depends 'job26'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ class RequiredFieldsChecker extends BaseStaticChecker {
boolean emptyAiProjectGroup = job.aiProjectGroup == null || job.aiProjectGroup.isEmpty();
boolean emptyWormholeNamespace = job.wormholeNamespace == null || job.wormholeNamespace.isEmpty();
boolean emptyInitialImport = job.initialImport == null || job.initialImport.isEmpty();
boolean emptyAutoPublishModelName = job.autoPublishModelName == null || job.autoPublishModelName.isEmpty();
boolean emptyAutoPublishModelDeploymentGroupName = job.autoPublishModelDeploymentGroupName == null || job.autoPublishModelDeploymentGroupName.isEmpty();
boolean emptyAutoPublishModelContainsPiiData = job.autoPublishModelContainsPiiData == null;
boolean emptyAutoPublishModelContainsConfidentialData = job.autoPublishModelContainsConfidentialData = null;


if (emptyTrainedModelLocation || emptyTrainingName || emptyAiProjectGroup || emptyWormholeNamespace || emptyInitialImport) {
project.logger.lifecycle(
Expand All @@ -273,6 +278,16 @@ class RequiredFieldsChecker extends BaseStaticChecker {
"Please see the job documentation for more details.");
foundError = true;
}

if (job.enableAutoPublish && (emptyAutoPublishModelName || emptyAutoPublishModelDeploymentGroupName ||
emptyAutoPublishModelContainsPiiData || emptyAutoPublishModelContainsConfidentialData)) {
project.logger.lifecycle(
"RequiredFieldsChecker ERROR: KabootarJob ${job.name} must set autoPublishModelName, " +
"autoPublishModelDeploymentGroupName, autoPublishModelContainsPiiData and " +
"autoPublishModelContainsConfidentialData when auto-publish is enabled " +
"Please see the job documentation for more details.");
foundError = true;
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ import com.linkedin.gradle.hadoopdsl.HadoopDslMethod;
* usesOrigin 'FELLOWSHIP' // Optional
* usesFramework 'PHOTON_CONNECT' // Optional
* usesModelSupplementaryDataLocation '/user/testmodelregsvc/trained-models-supplementary-data' // Optional
* usesEnableQuasarModelBundle // Optional
* usesEnableAutoPublish // Optional
* usesAutoPublishModelName // Required if usesEnableAutoPublish is true
* usesAutoPublishModelDeploymentGroupName // Required if usesEnableAutoPublish is true
* usesAutoPublishVersionUpdateType // Optional
* usesAutoPublishModelContainsPiiData // Required if usesEnableAutoPublish is true
* usesAutoPublishModelContainsConfidentialData // Required if usesEnableAutoPublish is true
*}* </pre>*/
class KabootarJob extends HadoopJavaJob {
// Required
Expand All @@ -49,6 +56,14 @@ class KabootarJob extends HadoopJavaJob {
String origin;
String framework;
String modelSupplementaryDataLocation;
Boolean enableQuasarModelBundle;
Boolean enableAutoPublish;
String autoPublishModelName; //required if enableAutoPublish is true
String autoPublishModelDeploymentGroupName; //required if enableAutoPublish is true
String autoPublishVersionUpdateType;
Boolean autoPublishModelContainsPiiData; //required if enableAutoPublish is true
Boolean autoPublishModelContainsConfidentialData; //required if enableAutoPublish is true


/**
* Constructor for KabootarJob.
Expand Down Expand Up @@ -86,6 +101,13 @@ class KabootarJob extends HadoopJavaJob {
cloneJob.origin = origin;
cloneJob.framework = framework;
cloneJob.modelSupplementaryDataLocation = modelSupplementaryDataLocation;
cloneJob.enableQuasarModelBundle = enableQuasarModelBundle;
cloneJob.enableAutoPublish = enableAutoPublish;
cloneJob.autoPublishModelName = autoPublishModelName;
cloneJob.autoPublishModelDeploymentGroupName = autoPublishModelDeploymentGroupName;
cloneJob.autoPublishVersionUpdateType = autoPublishVersionUpdateType;
cloneJob.autoPublishModelContainsPiiData = autoPublishModelContainsPiiData;
cloneJob.autoPublishModelContainsConfidentialData = autoPublishModelContainsConfidentialData;
return ((KabootarJob) super.clone(cloneJob));
}

Expand Down Expand Up @@ -198,4 +220,87 @@ class KabootarJob extends HadoopJavaJob {
this.modelSupplementaryDataLocation = modelSupplementaryDataLocation;
setJobProperty("model.supplementary.data.path", modelSupplementaryDataLocation);
}

/**
* DSL usesEnableQuasarModelBundle method causes enable.quasar.model.bundle to be set in the job file.
*
* @param enable.quasar.model.bundle - Flag that enables quasar model bundle format for models produced by Kabootar
*/
@HadoopDslMethod
void usesEnableQuasarModelBundle(Boolean enableQuasarModelBundle) {
this.enableQuasarModelBundle = enableQuasarModelBundle;
setJobProperty("enable.quasar.model.bundle", enableQuasarModelBundle);
}

/**
* DSL usesEnableAutoPublish method causes auto.publish.enabled to be set in the job file.
*
* @param enableAutoPublish - Flag that controls whether Kabootar should auto-publish the trained model
*/
@HadoopDslMethod
void usesEnableAutoPublish(Boolean enableAutoPublish) {
this.enableAutoPublish = enableAutoPublish;
setJobProperty("auto.publish.enabled", enableAutoPublish);
}

/**
* DSL usesAutoPublishModelName method causes auto.publish.model.name to be set in the job file.
*
* @param autoPublishModelName - Intended auto-published model name.
*/
@HadoopDslMethod
void usesAutoPublishModelName(String autoPublishModelName) {
this.autoPublishModelName = autoPublishModelName;
setJobProperty("auto.publish.model.name", autoPublishModelName);
}

/**
* DSL usesAutoPublishModelDeploymentGroupName method causes auto.publish.model.deployment.group.name to be set in the
* job file.
*
* @param autoPublishModelDeploymentGroupName - Intended model deployment group name that the trained model will be
* auto-published into. This model deployment group must exist and the publisher needs to have DEVELOPMENT_TEAM role
* for the group.
*/
@HadoopDslMethod
void usesAutoPublishModelDeploymentGroupName(String autoPublishModelDeploymentGroupName) {
this.autoPublishModelDeploymentGroupName = autoPublishModelDeploymentGroupName;
setJobProperty("auto.publish.model.deployment.group.name", autoPublishModelDeploymentGroupName);
}

/**
* DSL usesAutoPublishVersionUpdateType method causes auto.publish.version.update.type to be set in the job file.
*
* @param autoPublishVersionUpdateType - auto-publish model version update type. Can be patch, minor or major.
*/
@HadoopDslMethod
void usesAutoPublishVersionUpdateType(String autoPublishVersionUpdateType) {
this.autoPublishVersionUpdateType = autoPublishVersionUpdateType;
setJobProperty("auto.publish.version.update.type", autoPublishVersionUpdateType);
}

/**
* DSL usesAutoPublishModelContainsPiiData method causes auto.publish.model.contains.pii.data to be set in the job
* file.
*
* @param autoPublishModelContainsPiiData - Indicate whether the to be auto-published trained model contains PII data.
*/
@HadoopDslMethod
void usesAutoPublishModelContainsPiiData(Boolean autoPublishModelContainsPiiData) {
this.autoPublishModelContainsPiiData = autoPublishModelContainsPiiData;
setJobProperty("auto.publish.model.contains.pii.data", autoPublishModelContainsPiiData);
}

/**
* DSL usesAutoPublishModelContainsConfidentialData method causes auto.publish.model.contains.confidential.data to
* be set in the job file.
*
* @param autoPublishModelContainsConfidentialData - Indicate whether the to be auto-published trained model contains
* confidential data.
*/
@HadoopDslMethod
void usesAutoPublishModelContainsConfidentialData(Boolean autoPublishModelContainsConfidentialData) {
this.autoPublishModelContainsConfidentialData = autoPublishModelContainsConfidentialData;
setJobProperty("auto.publish.model.contains.confidential.data", autoPublishModelContainsConfidentialData);
}
}