Skip to content

Commit a09c01c

Browse files
authored
Merge pull request #268 from js2839/master
Adding support for model auto-publish for KabootarJob
2 parents 1b04ee4 + 72a1f14 commit a09c01c

File tree

7 files changed

+140
-1
lines changed

7 files changed

+140
-1
lines changed

CONTRIBUTORS.md

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ the LinkedIn Gradle DSL for Apache Hadoop was influenced by
2222
Vaughan.
2323

2424
### Contributors
25+
The following were contributed by Jin Sha. Thanks, Jin!
26+
* `Added support for model auto-publish for KabootarJob`
2527

2628
The following were contributed by Arpan Agrawal. Thanks, Arpan!
2729
* `Added AutoTunePigLiJob job type support `

VERSIONS.md

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ the License.
1717
Note that the LinkedIn build system occasionally requires that we skip a
1818
version bump, so you will see a few skipped version numbers in the list below.
1919

20+
0.15.22
21+
* Adding support for model auto-publish for KabootarJob
22+
2023
0.15.16
2124
* Adding KubernetesJob job type support.
2225

gradle.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
org.gradle.daemon=true
2-
version=0.15.21
2+
version=0.15.22

hadoop-plugin/src/integTest/resources/expectedJobs/jobs1/jobs1_job27.job

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
type=KabootarJob
33
dependencies=jobs1_job26
44
ai.project.group=AIFoundationOther
5+
auto.publish.enabled=true
6+
auto.publish.model.contains.confidential.data=false
7+
auto.publish.model.contains.pii.data=false
8+
auto.publish.model.deployment.group.name=myDeploymentGroup
9+
auto.publish.model.name=myFirstModel
10+
auto.publish.version.update.type=PATCH
11+
enable.quasar.model.bundle=true
512
framework=PHOTON_CONNECT
613
initial.import=initial/import/File
714
model.supplementary.data.path=/user/testmodelregsvc/data

hadoop-plugin/src/integTest/resources/gradle/positive/jobs1.gradle

+7
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,13 @@ hadoop {
451451
usesOrigin 'FELLOWSHIP' // Optional
452452
usesFramework 'PHOTON_CONNECT' // Optional
453453
usesModelSupplementaryDataLocation '/user/testmodelregsvc/data' // Optional
454+
usesEnableQuasarModelBundle true // Optional
455+
usesEnableAutoPublish true // Optional
456+
usesAutoPublishModelName 'myFirstModel' // Required if usesEnableAutoPublish is true
457+
usesAutoPublishModelDeploymentGroupName 'myDeploymentGroup' // Required if usesEnableAutoPublish is true
458+
usesAutoPublishVersionUpdateType 'PATCH' // Optional
459+
usesAutoPublishModelContainsPiiData false // Required if usesEnableAutoPublish is true
460+
usesAutoPublishModelContainsConfidentialData false // Required if usesEnableAutoPublish is true
454461

455462
depends 'job26'
456463
}

hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/checker/RequiredFieldsChecker.groovy

+15
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ class RequiredFieldsChecker extends BaseStaticChecker {
265265
boolean emptyAiProjectGroup = job.aiProjectGroup == null || job.aiProjectGroup.isEmpty();
266266
boolean emptyWormholeNamespace = job.wormholeNamespace == null || job.wormholeNamespace.isEmpty();
267267
boolean emptyInitialImport = job.initialImport == null || job.initialImport.isEmpty();
268+
boolean emptyAutoPublishModelName = job.autoPublishModelName == null || job.autoPublishModelName.isEmpty();
269+
boolean emptyAutoPublishModelDeploymentGroupName = job.autoPublishModelDeploymentGroupName == null || job.autoPublishModelDeploymentGroupName.isEmpty();
270+
boolean emptyAutoPublishModelContainsPiiData = job.autoPublishModelContainsPiiData == null;
271+
boolean emptyAutoPublishModelContainsConfidentialData = job.autoPublishModelContainsConfidentialData = null;
272+
268273

269274
if (emptyTrainedModelLocation || emptyTrainingName || emptyAiProjectGroup || emptyWormholeNamespace || emptyInitialImport) {
270275
project.logger.lifecycle(
@@ -273,6 +278,16 @@ class RequiredFieldsChecker extends BaseStaticChecker {
273278
"Please see the job documentation for more details.");
274279
foundError = true;
275280
}
281+
282+
if (job.enableAutoPublish && (emptyAutoPublishModelName || emptyAutoPublishModelDeploymentGroupName ||
283+
emptyAutoPublishModelContainsPiiData || emptyAutoPublishModelContainsConfidentialData)) {
284+
project.logger.lifecycle(
285+
"RequiredFieldsChecker ERROR: KabootarJob ${job.name} must set autoPublishModelName, " +
286+
"autoPublishModelDeploymentGroupName, autoPublishModelContainsPiiData and " +
287+
"autoPublishModelContainsConfidentialData when auto-publish is enabled " +
288+
"Please see the job documentation for more details.");
289+
foundError = true;
290+
}
276291
}
277292

278293

hadoop-plugin/src/main/groovy/com/linkedin/gradle/hadoopdsl/job/KabootarJob.groovy

+105
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ import com.linkedin.gradle.hadoopdsl.HadoopDslMethod;
3535
* usesOrigin 'FELLOWSHIP' // Optional
3636
* usesFramework 'PHOTON_CONNECT' // Optional
3737
* usesModelSupplementaryDataLocation '/user/testmodelregsvc/trained-models-supplementary-data' // Optional
38+
* usesEnableQuasarModelBundle // Optional
39+
* usesEnableAutoPublish // Optional
40+
* usesAutoPublishModelName // Required if usesEnableAutoPublish is true
41+
* usesAutoPublishModelDeploymentGroupName // Required if usesEnableAutoPublish is true
42+
* usesAutoPublishVersionUpdateType // Optional
43+
* usesAutoPublishModelContainsPiiData // Required if usesEnableAutoPublish is true
44+
* usesAutoPublishModelContainsConfidentialData // Required if usesEnableAutoPublish is true
3845
*}* </pre>*/
3946
class KabootarJob extends HadoopJavaJob {
4047
// Required
@@ -49,6 +56,14 @@ class KabootarJob extends HadoopJavaJob {
4956
String origin;
5057
String framework;
5158
String modelSupplementaryDataLocation;
59+
Boolean enableQuasarModelBundle;
60+
Boolean enableAutoPublish;
61+
String autoPublishModelName; //required if enableAutoPublish is true
62+
String autoPublishModelDeploymentGroupName; //required if enableAutoPublish is true
63+
String autoPublishVersionUpdateType;
64+
Boolean autoPublishModelContainsPiiData; //required if enableAutoPublish is true
65+
Boolean autoPublishModelContainsConfidentialData; //required if enableAutoPublish is true
66+
5267

5368
/**
5469
* Constructor for KabootarJob.
@@ -86,6 +101,13 @@ class KabootarJob extends HadoopJavaJob {
86101
cloneJob.origin = origin;
87102
cloneJob.framework = framework;
88103
cloneJob.modelSupplementaryDataLocation = modelSupplementaryDataLocation;
104+
cloneJob.enableQuasarModelBundle = enableQuasarModelBundle;
105+
cloneJob.enableAutoPublish = enableAutoPublish;
106+
cloneJob.autoPublishModelName = autoPublishModelName;
107+
cloneJob.autoPublishModelDeploymentGroupName = autoPublishModelDeploymentGroupName;
108+
cloneJob.autoPublishVersionUpdateType = autoPublishVersionUpdateType;
109+
cloneJob.autoPublishModelContainsPiiData = autoPublishModelContainsPiiData;
110+
cloneJob.autoPublishModelContainsConfidentialData = autoPublishModelContainsConfidentialData;
89111
return ((KabootarJob) super.clone(cloneJob));
90112
}
91113

@@ -198,4 +220,87 @@ class KabootarJob extends HadoopJavaJob {
198220
this.modelSupplementaryDataLocation = modelSupplementaryDataLocation;
199221
setJobProperty("model.supplementary.data.path", modelSupplementaryDataLocation);
200222
}
223+
224+
/**
225+
* DSL usesEnableQuasarModelBundle method causes enable.quasar.model.bundle to be set in the job file.
226+
*
227+
* @param enable.quasar.model.bundle - Flag that enables quasar model bundle format for models produced by Kabootar
228+
*/
229+
@HadoopDslMethod
230+
void usesEnableQuasarModelBundle(Boolean enableQuasarModelBundle) {
231+
this.enableQuasarModelBundle = enableQuasarModelBundle;
232+
setJobProperty("enable.quasar.model.bundle", enableQuasarModelBundle);
233+
}
234+
235+
/**
236+
* DSL usesEnableAutoPublish method causes auto.publish.enabled to be set in the job file.
237+
*
238+
* @param enableAutoPublish - Flag that controls whether Kabootar should auto-publish the trained model
239+
*/
240+
@HadoopDslMethod
241+
void usesEnableAutoPublish(Boolean enableAutoPublish) {
242+
this.enableAutoPublish = enableAutoPublish;
243+
setJobProperty("auto.publish.enabled", enableAutoPublish);
244+
}
245+
246+
/**
247+
* DSL usesAutoPublishModelName method causes auto.publish.model.name to be set in the job file.
248+
*
249+
* @param autoPublishModelName - Intended auto-published model name.
250+
*/
251+
@HadoopDslMethod
252+
void usesAutoPublishModelName(String autoPublishModelName) {
253+
this.autoPublishModelName = autoPublishModelName;
254+
setJobProperty("auto.publish.model.name", autoPublishModelName);
255+
}
256+
257+
/**
258+
* DSL usesAutoPublishModelDeploymentGroupName method causes auto.publish.model.deployment.group.name to be set in the
259+
* job file.
260+
*
261+
* @param autoPublishModelDeploymentGroupName - Intended model deployment group name that the trained model will be
262+
* auto-published into. This model deployment group must exist and the publisher needs to have DEVELOPMENT_TEAM role
263+
* for the group.
264+
*/
265+
@HadoopDslMethod
266+
void usesAutoPublishModelDeploymentGroupName(String autoPublishModelDeploymentGroupName) {
267+
this.autoPublishModelDeploymentGroupName = autoPublishModelDeploymentGroupName;
268+
setJobProperty("auto.publish.model.deployment.group.name", autoPublishModelDeploymentGroupName);
269+
}
270+
271+
/**
272+
* DSL usesAutoPublishVersionUpdateType method causes auto.publish.version.update.type to be set in the job file.
273+
*
274+
* @param autoPublishVersionUpdateType - auto-publish model version update type. Can be patch, minor or major.
275+
*/
276+
@HadoopDslMethod
277+
void usesAutoPublishVersionUpdateType(String autoPublishVersionUpdateType) {
278+
this.autoPublishVersionUpdateType = autoPublishVersionUpdateType;
279+
setJobProperty("auto.publish.version.update.type", autoPublishVersionUpdateType);
280+
}
281+
282+
/**
283+
* DSL usesAutoPublishModelContainsPiiData method causes auto.publish.model.contains.pii.data to be set in the job
284+
* file.
285+
*
286+
* @param autoPublishModelContainsPiiData - Indicate whether the to be auto-published trained model contains PII data.
287+
*/
288+
@HadoopDslMethod
289+
void usesAutoPublishModelContainsPiiData(Boolean autoPublishModelContainsPiiData) {
290+
this.autoPublishModelContainsPiiData = autoPublishModelContainsPiiData;
291+
setJobProperty("auto.publish.model.contains.pii.data", autoPublishModelContainsPiiData);
292+
}
293+
294+
/**
295+
* DSL usesAutoPublishModelContainsConfidentialData method causes auto.publish.model.contains.confidential.data to
296+
* be set in the job file.
297+
*
298+
* @param autoPublishModelContainsConfidentialData - Indicate whether the to be auto-published trained model contains
299+
* confidential data.
300+
*/
301+
@HadoopDslMethod
302+
void usesAutoPublishModelContainsConfidentialData(Boolean autoPublishModelContainsConfidentialData) {
303+
this.autoPublishModelContainsConfidentialData = autoPublishModelContainsConfidentialData;
304+
setJobProperty("auto.publish.model.contains.confidential.data", autoPublishModelContainsConfidentialData);
305+
}
201306
}

0 commit comments

Comments
 (0)