Skip to content

Commit 7961400

Browse files
committed
refactor: code aws s3 bucket for remote storage of custom configs
1 parent 5cdc0bf commit 7961400

File tree

11 files changed

+100
-63
lines changed

11 files changed

+100
-63
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ PIP := $(PYTHON) -m pip
1414
ifneq ("$(wildcard .env)","")
1515
include .env
1616
else
17-
$(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nGOOGLE_MAPS_API_KEY=PLEASE-ADD-ME\nDEBUG_MODE=True\SETTINGS_AWS_S3_BUCKET=PLEASE-ADD-ME\n" >> .env)
17+
$(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nGOOGLE_MAPS_API_KEY=PLEASE-ADD-ME\nDEBUG_MODE=True\n" >> .env)
1818
endif
1919

2020
.PHONY: analyze pre-commit api-init api-activate api-lint api-clean api-test client-init client-lint client-update client-run client-build client-release

api/terraform/python/openai_api/common/aws.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,13 @@ def domain_exists(self) -> bool:
158158

159159
def get_bucket_by_prefix(self, bucket_prefix) -> str:
160160
"""Return the bucket name given the bucket prefix."""
161-
for bucket in settings.aws_s3_client.buckets.all():
162-
if bucket.name.startswith(bucket_prefix):
163-
return f"arn:aws:s3:::{bucket.name}"
161+
try:
162+
for bucket in settings.aws_s3_client.list_buckets()["Buckets"]:
163+
if bucket["Name"].startswith(bucket_prefix):
164+
return f"arn:aws:s3:::{bucket['Name']}"
165+
except TypeError:
166+
# TypeError: startswith first arg must be str or a tuple of str, not NoneType
167+
pass
164168
return None
165169

166170
def bucket_exists(self, bucket_prefix) -> bool:

api/terraform/python/openai_api/common/conf.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ class Services:
9898
AWS_IAM = ("iam", True)
9999
AWS_LAMBDA = ("lambda", True)
100100
AWS_ROUTE53 = ("route53", True)
101+
AWS_S3 = ("s3", True)
101102

102103
# disabled
103-
AWS_S3 = ("s3", False)
104104
AWS_SNS = ("sns", False)
105105
AWS_SQS = ("sqs", False)
106106
AWS_SES = ("ses", False)
@@ -161,16 +161,15 @@ class SettingsDefaults:
161161
AWS_REGION = TFVARS.get("aws_region", "us-east-1")
162162

163163
# aws api gateway defaults
164-
AWS_APIGATEWAY_CREATE_CUSTOM_DOMAIN = TFVARS.get("aws_apigateway_create_custom_domaim", False)
165-
AWS_APIGATEWAY_ROOT_DOMAIN = TFVARS.get("aws_apigateway_root_domain", None)
164+
AWS_APIGATEWAY_CREATE_CUSTOM_DOMAIN = TFVARS.get("create_custom_domain", False)
165+
AWS_APIGATEWAY_ROOT_DOMAIN = TFVARS.get("root_domain", None)
166166
AWS_APIGATEWAY_READ_TIMEOUT: int = TFVARS.get("aws_apigateway_read_timeout", 70)
167167
AWS_APIGATEWAY_CONNECT_TIMEOUT: int = TFVARS.get("aws_apigateway_connect_timeout", 70)
168168
AWS_APIGATEWAY_MAX_ATTEMPTS: int = TFVARS.get("aws_apigateway_max_attempts", 10)
169169

170170
GOOGLE_MAPS_API_KEY: str = TFVARS.get("google_maps_api_key", None)
171171

172172
LANGCHAIN_MEMORY_KEY = "chat_history"
173-
SETTINGS_AWS_S3_BUCKET: str = None
174173
OPENAI_API_ORGANIZATION: str = None
175174
OPENAI_API_KEY = SecretStr(None)
176175
OPENAI_ENDPOINT_IMAGE_N = 4
@@ -358,7 +357,6 @@ def __init__(self, **data: Any): # noqa: C901
358357
env="GOOGLE_MAPS_API_KEY",
359358
)
360359
langchain_memory_key: Optional[str] = Field(SettingsDefaults.LANGCHAIN_MEMORY_KEY, env="LANGCHAIN_MEMORY_KEY")
361-
settings_aws_bucket: Optional[str] = Field(SettingsDefaults.SETTINGS_AWS_S3_BUCKET, env="SETTINGS_AWS_S3_BUCKET")
362360
openai_api_organization: Optional[str] = Field(
363361
SettingsDefaults.OPENAI_API_ORGANIZATION, env="OPENAI_API_ORGANIZATION"
364362
)
@@ -466,9 +464,12 @@ def aws_dynamodb_client(self):
466464
return self._aws_dynamodb_client
467465

468466
@property
469-
def aws_s3_bucket_name(self) -> str:
470-
"""Return the S3 bucket name."""
471-
return self.aws_account_id + "-" + self.shared_resource_identifier
467+
def aws_s3_client(self):
468+
"""S3 client"""
469+
Services.raise_error_on_disabled(Services.AWS_S3)
470+
if not self._aws_s3_client:
471+
self._aws_s3_client = self.aws_session.client("s3")
472+
return self._aws_s3_client
472473

473474
@property
474475
def aws_apigateway_name(self) -> str:
@@ -488,6 +489,13 @@ def aws_apigateway_domain_name(self) -> str:
488489
return f"{api_id}.execute-api.{settings.aws_region}.amazonaws.com"
489490
return None
490491

492+
@property
493+
def aws_s3_bucket_name(self) -> str:
494+
"""Return the S3 bucket name."""
495+
if self.shared_resource_identifier and self.aws_apigateway_root_domain:
496+
return "api." + self.shared_resource_identifier + "." + self.aws_apigateway_root_domain
497+
return None
498+
491499
@property
492500
def is_using_dotenv_file(self) -> bool:
493501
"""Is the dotenv file being used?"""
@@ -566,7 +574,7 @@ def get_installed_packages():
566574
"google_maps_api_key": self.google_maps_api_key,
567575
},
568576
"openai_api": {
569-
"settings_aws_bucket": self.settings_aws_bucket,
577+
"aws_s3_bucket_name": self.aws_s3_bucket_name,
570578
"langchain_memory_key": self.langchain_memory_key,
571579
"openai_endpoint_image_n": self.openai_endpoint_image_n,
572580
"openai_endpoint_image_size": self.openai_endpoint_image_size,
@@ -691,13 +699,6 @@ def check_langchain_memory_key(cls, v) -> str:
691699
return SettingsDefaults.LANGCHAIN_MEMORY_KEY
692700
return v
693701

694-
@field_validator("settings_aws_bucket")
695-
def check_lambda_openai_function_config_url(cls, v) -> str:
696-
"""Check settings_aws_bucket"""
697-
if v in [None, ""]:
698-
return SettingsDefaults.SETTINGS_AWS_S3_BUCKET
699-
return v
700-
701702
@field_validator("openai_api_organization")
702703
def check_openai_api_organization(cls, v) -> str:
703704
"""Check openai_api_organization"""

api/terraform/python/openai_api/common/tests/test_configuration.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,8 @@ def test_settings_dynamodb(self):
306306
def test_settings_aws_s3_bucket_name(self):
307307
"""Test that the S3 bucket name is valid."""
308308
mock_settings = Settings(init_info="test_settings_aws_s3_bucket_name()")
309-
self.assertIsNotNone(mock_settings.aws_s3_bucket_name)
310-
self.assertTrue(mock_settings.aws_s3_bucket_name.startswith(mock_settings.aws_account_id))
309+
if mock_settings.aws_apigateway_create_custom_domaim:
310+
self.assertIsNotNone(mock_settings.aws_s3_bucket_name)
311311

312312
def test_settings_aws_apigateway_domain_name(self):
313313
"""Test that the API Gateway domain name is valid."""

api/terraform/python/openai_api/lambda_openai_function/refers_to.py renamed to api/terraform/python/openai_api/lambda_openai_function/custom_config.py

+31-35
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ class CustomConfigs:
263263

264264
_custom_configs: list[CustomConfig] = None
265265
_aws_bucket_name: str = None
266-
_aws_bucket_path: str = "/aws_openai/lambda_openai_function/custom_configs/"
266+
_aws_bucket_path: str = "aws_openai/lambda_openai_function/custom_configs/"
267267
_aws_bucket_path_validated: bool = False
268268

269269
def __init__(self, config_path: str = None, aws_s3_bucket_name: str = None):
@@ -285,21 +285,14 @@ def __init__(self, config_path: str = None, aws_s3_bucket_name: str = None):
285285
s3 = settings.aws_session.resource("s3")
286286
bucket = s3.Bucket(self._aws_bucket_name)
287287

288-
for obj in bucket.objects.filter(Prefix=self._aws_bucket_path):
289-
i += 1
290-
file_content = obj.get()["Body"].read().decode("utf-8")
291-
config_json = yaml.safe_load(file_content)
292-
custom_config = CustomConfig(config_json=config_json, index=i)
293-
self._custom_configs.append(custom_config)
294-
295-
def list_yaml_files(bucket_name):
296-
"""List all the YAML files in the AWS S3 bucket"""
297-
s3 = settings.aws_session.resource("s3")
298-
bucket = s3.Bucket(bucket_name)
299-
300-
for obj in bucket.objects.all():
301-
if obj.key.endswith(".yaml") or obj.key.endswith(".yml"):
302-
print("Found YAML file:", obj.key)
288+
for obj in bucket.objects.filter(Prefix=self.aws_bucket_path):
289+
if obj.key.endswith(".yaml") or obj.key.endswith(".yml"):
290+
i += 1
291+
file_content = obj.get()["Body"].read().decode("utf-8")
292+
config_json = yaml.safe_load(file_content)
293+
if config_json:
294+
custom_config = CustomConfig(config_json=config_json, index=i)
295+
self._custom_configs.append(custom_config)
303296

304297
@property
305298
def valid_configs(self) -> list[CustomConfig]:
@@ -311,38 +304,36 @@ def invalid_configs(self) -> list[CustomConfig]:
311304
"""Return a list of invalid configs"""
312305
return [config for config in self._custom_configs if not config.is_valid]
313306

307+
@property
308+
def aws_bucket_path(self) -> str:
309+
"""Return the remote host"""
310+
return self._aws_bucket_path
311+
314312
@property
315313
def aws_bucket_path_validated(self) -> bool:
316314
"""Return True if the remote host is valid"""
317315
return self._aws_bucket_path_validated
318316

319-
@property
320-
def aws_bucket_full_path(self) -> str:
321-
"""Return the remote host"""
322-
if self.aws_bucket_path_validated:
323-
return self._aws_bucket_name + self._aws_bucket_path
324-
return None
325-
326317
def verify_bucket(self, bucket_name: str):
327318
"""Verify that the remote host is valid"""
319+
if not bucket_name:
320+
return
321+
328322
s3 = settings.aws_session.resource("s3")
329-
bucket = s3.Bucket(bucket_name)
330-
folder_path = self._aws_bucket_path
331323
try:
332324
# Check if bucket exists
333325
s3.meta.client.head_bucket(Bucket=bucket_name)
334326
# pylint: disable=broad-exception-caught
335-
except Exception:
327+
except Exception as e:
328+
log.warning("Bucket %s does not exist: %s", bucket_name, e)
336329
return
337330

338-
try:
339-
# Create any missing folders
340-
if not any(s3_object.key.startswith(folder_path) for s3_object in bucket.objects.all()):
341-
s3.Object(bucket_name, folder_path).put()
342-
self._aws_bucket_path_validated = True
343-
# pylint: disable=broad-exception-caught
344-
except Exception:
345-
pass
331+
# Create any missing folders
332+
bucket = s3.Bucket(bucket_name)
333+
if not any(s3_object.key.startswith(self.aws_bucket_path) for s3_object in bucket.objects.all()):
334+
print(f"Creating folder {self.aws_bucket_path} in bucket {bucket_name}")
335+
s3.Object(bucket_name, self.aws_bucket_path).put()
336+
self._aws_bucket_path_validated = True
346337

347338
def to_json(self) -> json:
348339
"""Return the _custom_configs list as a JSON object"""
@@ -370,4 +361,9 @@ def custom_configs(self) -> CustomConfigs:
370361
return self._custom_configs
371362

372363

373-
config = SingletonCustomConfigs().custom_configs.valid_configs
364+
_custom_configs = SingletonCustomConfigs().custom_configs
365+
config = _custom_configs.valid_configs
366+
if len(_custom_configs.invalid_configs) > 0:
367+
invalid_configurations = list(_custom_configs.invalid_configs.file_name)
368+
invalid_configurations = ", ".join(invalid_configurations)
369+
log.error("Invalid custom config files: %s", invalid_configurations)

api/terraform/python/openai_api/lambda_openai_function/function_refers_to.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import json
77

88
from openai_api.common.const import PYTHON_ROOT
9+
from openai_api.lambda_openai_function.custom_config import CustomConfig
10+
from openai_api.lambda_openai_function.custom_config import config as refers_to_config
911
from openai_api.lambda_openai_function.natural_language_processing import does_refer_to
10-
from openai_api.lambda_openai_function.refers_to import CustomConfig
11-
from openai_api.lambda_openai_function.refers_to import config as refers_to_config
1212

1313

1414
def search_terms_are_in_messages(messages: list, search_terms: list = None, search_pairs: list = None) -> bool:

api/terraform/python/openai_api/lambda_openai_function/lambda_handler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
validate_completion_request,
3939
validate_item,
4040
)
41+
from openai_api.lambda_openai_function.custom_config import config as refers_to_config
4142
from openai_api.lambda_openai_function.function_refers_to import (
4243
customized_prompt,
4344
get_additional_info,
@@ -50,7 +51,6 @@
5051
get_current_weather,
5152
weather_tool_factory,
5253
)
53-
from openai_api.lambda_openai_function.refers_to import config as refers_to_config
5454

5555

5656
openai.organization = settings.openai_api_organization

api/terraform/python/openai_api/lambda_openai_function/tests/test_refers_to.py renamed to api/terraform/python/openai_api/lambda_openai_function/tests/test_custom_config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
# pylint: disable=no-name-in-module
24-
from openai_api.lambda_openai_function.refers_to import (
24+
from openai_api.lambda_openai_function.custom_config import (
2525
AdditionalInformation,
2626
CustomConfig,
2727
SearchTerms,

api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_refers_to.py renamed to api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_custom_config.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020
sys.path.append(PYTHON_ROOT) # noqa: E402
2121

2222

23+
from openai_api.lambda_openai_function.custom_config import CustomConfig
24+
2325
# pylint: disable=no-name-in-module
2426
from openai_api.lambda_openai_function.function_refers_to import (
2527
get_additional_info,
2628
info_tool_factory,
2729
)
28-
from openai_api.lambda_openai_function.refers_to import CustomConfig
2930
from openai_api.lambda_openai_function.tests.test_setup import get_test_file_path
3031

3132

api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_function.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818
sys.path.append(PYTHON_ROOT) # noqa: E402
1919

2020

21+
from openai_api.lambda_openai_function.custom_config import CustomConfig
2122
from openai_api.lambda_openai_function.lambda_handler import ( # noqa: E402; handler,
2223
handler,
2324
search_terms_are_in_messages,
2425
)
2526
from openai_api.lambda_openai_function.natural_language_processing import does_refer_to
26-
from openai_api.lambda_openai_function.refers_to import CustomConfig
2727
from openai_api.lambda_openai_function.tests.test_setup import (
2828
get_test_file,
2929
get_test_file_path,

api/terraform/s3.tf

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
2+
locals {
3+
s3_bucket_name = "api.${var.shared_resource_identifier}.${var.root_domain}"
4+
}
5+
6+
resource "aws_s3_bucket" "openai" {
7+
bucket = local.s3_bucket_name
8+
tags = var.tags
9+
}
10+
11+
data "aws_iam_policy_document" "s3_bucket_policy" {
12+
statement {
13+
actions = [
14+
"s3:PutObject",
15+
"s3:GetObject",
16+
"s3:ListBucket",
17+
"s3:DeleteObject",
18+
]
19+
20+
principals {
21+
type = "AWS"
22+
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:user/mcdaniel"]
23+
}
24+
25+
resources = [
26+
"${aws_s3_bucket.openai.arn}/*",
27+
"${aws_s3_bucket.openai.arn}"
28+
]
29+
}
30+
}
31+
32+
resource "aws_s3_bucket_policy" "openai_bucket_policy" {
33+
bucket = aws_s3_bucket.openai.id
34+
policy = data.aws_iam_policy_document.s3_bucket_policy.json
35+
}

0 commit comments

Comments
 (0)