diff --git a/samples/feature_group/README.md b/samples/feature_group/README.md new file mode 100644 index 00000000..600bbfcd --- /dev/null +++ b/samples/feature_group/README.md @@ -0,0 +1,62 @@ +# Feature Group Sample + +This sample demonstrates how to create a feature group using the Amazon AWS Controllers for Kubernetes (ACK) service controller for Amazon SageMaker. + +Inspiration for this sample was taken from the notebook on [Fraud Detection with Amazon SageMaker FeatureStore](https://sagemaker-examples.readthedocs.io/en/latest/sagemaker-featurestore/sagemaker_featurestore_fraud_detection_python_sdk.html). + +## Prerequisites + +This sample assumes that you have completed the [common prerequisites](https://github.com/aws-controllers-k8s/sagemaker-controller/blob/main/samples/README.md). + +### Create an S3 bucket: + +Since we are using the offline store in this example, you need to set up an s3 bucket. [Here are directions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) to set up your s3 bucket through the S3 Console, AWS SDK, or AWS CLI. + +### Updating the Feature Group Specification: + +In the `my-feature-group.yaml` file, modify the placeholder values with those associated with your account and feature group. + +## Creating your Feature Group + +### Create a Feature Group: + +To submit your prepared feature group specification, apply the specification to your Kubernetes cluster as such: + +``` +$ kubectl apply -f my-feature-group.yaml +featuregroup.sagemaker.services.k8s.aws/my-feature-group created +``` + +### List Feature Groups: + +To list all feature groups created using the ACK controller use the following command: + +``` +$ kubectl get featuregroup +``` + +### Describe a Feature Group: + +To get more details about the feature group once it's submitted, like checking the status, errors or parameters of the feature group use the following command: + +``` +$ kubectl describe featuregroup my-feature-group +``` + +## Ingesting Data into your Feature Group + +Note that ingestion is **not** supported in the controller. +To ingest data from the my-sample-data.csv file into your feature group, use the following command: + +``` +$ python3 data_ingestion.py -i my-sample-data.csv -fg my-feature-group +``` + +## Deleting your Feature Group + +To delete the feature group, use the following command: + +``` +$ kubectl delete featuregroup my-feature-group +featuregroup.sagemaker.services.k8s.aws "my-feature-group" deleted +``` diff --git a/samples/feature_group/data_ingestion.py b/samples/feature_group/data_ingestion.py new file mode 100644 index 00000000..ef116d45 --- /dev/null +++ b/samples/feature_group/data_ingestion.py @@ -0,0 +1,29 @@ +#!/usr/bin/python + +import argparse +import boto3 +import csv + +sagemaker_featurestore_runtime_client = boto3.Session().client( + service_name="sagemaker-featurestore-runtime") + +# Initialize the parser. +parser = argparse.ArgumentParser() +parser.add_argument("-i", "--input_file", help = "Path to a csv file containing data for ingestion.") +parser.add_argument("-fg", "--feature_group_name", help = "Name of the feature group to write data to.") + +# Read arguments from the command line. +args = parser.parse_args() + +# Write records from the csv file to s3. +with open(args.input_file) as file_handle: + for row in csv.DictReader(file_handle, skipinitialspace=True): + record=[] + for featureName, valueAsString in row.items(): + record.append({ + 'FeatureName':featureName, + 'ValueAsString':valueAsString + }) + sagemaker_featurestore_runtime_client.put_record( + FeatureGroupName=args.feature_group_name, + Record=record) diff --git a/samples/feature_group/my-feature-group.yaml b/samples/feature_group/my-feature-group.yaml new file mode 100644 index 00000000..e4baae8d --- /dev/null +++ b/samples/feature_group/my-feature-group.yaml @@ -0,0 +1,19 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: FeatureGroup +metadata: + name: + spec: + eventTimeFeatureName: EventTime + featureDefinitions: + - featureName: TransactionID + featureType: Integral + - featureName: EventTime + featureType: Fractional + featureGroupName: + recordIdentifierFeatureName: TransactionID + offlineStoreConfig: + s3StorageConfig: + s3URI: s3:///feature-group-data + onlineStoreConfig: +enableOnlineStore: True + roleARN: diff --git a/samples/feature_group/my-sample-data.csv b/samples/feature_group/my-sample-data.csv new file mode 100644 index 00000000..188a7b58 --- /dev/null +++ b/samples/feature_group/my-sample-data.csv @@ -0,0 +1,4 @@ +TransactionID,EventTime +1,1623434915 +2,1623435267 +3,1623435284