aws · haotianw465 · Jan 8, 2019 · Oct 31, 2018 · Nov 1, 2018 · Nov 7, 2018
@@ -2,6 +2,10 @@
 CHANGELOG
 =========
 
+unreleased
+==========
+* feature: Stream dbapi2 SQL queries and add flag to toggle their streaming
+
 2.2.0
 =====
 * feature: Added context managers on segment/subsegment capture. `PR97 <https://github.com/aws/aws-xray-sdk-python/pull/97>`_.
@@ -32,11 +36,11 @@ CHANGELOG
 * **Breaking**: The original sampling modules for local defined rules are moved from `models.sampling` to `models.sampling.local`.
 * **Breaking**: The default behavior of `patch_all` changed to selectively patches libraries to avoid double patching. You can use `patch_all(double_patch=True)` to force it to patch ALL supported libraries. See more details on `ISSUE63 <https://github.com/aws/aws-xray-sdk-python/issues/63>`_
 * **Breaking**: The latest `botocore` that has new X-Ray service API `GetSamplingRules` and `GetSamplingTargets` are required.
-* **Breaking**: Version 2.x doesn't support pynamodb and aiobotocore as it requires botocore >= 1.11.3 which isn’t currently supported by the pynamodb and aiobotocore libraries. Please continue to use version 1.x if you’re using pynamodb or aiobotocore until those haven been updated to use botocore > = 1.11.3. 
+* **Breaking**: Version 2.x doesn't support pynamodb and aiobotocore as it requires botocore >= 1.11.3 which isn’t currently supported by the pynamodb and aiobotocore libraries. Please continue to use version 1.x if you’re using pynamodb or aiobotocore until those haven been updated to use botocore > = 1.11.3.
 * feature: Environment variable `AWS_XRAY_DAEMON_ADDRESS` now takes an additional notation in `tcp:127.0.0.1:2000 udp:127.0.0.2:2001` to set TCP and UDP destination separately. By default it assumes a X-Ray daemon listening to both UDP and TCP traffic on `127.0.0.1:2000`.
 * feature: Added MongoDB python client support. `PR65 <https://github.com/aws/aws-xray-sdk-python/pull/65>`_.
-* bugfix: Support binding connection in sqlalchemy as well as engine. `PR78 <https://github.com/aws/aws-xray-sdk-python/pull/78>`_. 
-* bugfix: Flask middleware safe request teardown. `ISSUE75 <https://github.com/aws/aws-xray-sdk-python/issues/75>`_. 
+* bugfix: Support binding connection in sqlalchemy as well as engine. `PR78 <https://github.com/aws/aws-xray-sdk-python/pull/78>`_.
+* bugfix: Flask middleware safe request teardown. `ISSUE75 <https://github.com/aws/aws-xray-sdk-python/issues/75>`_.
 
 
 1.1.2
@@ -68,7 +72,7 @@ CHANGELOG
 * bugfix: Fixed an issue where arbitrary fields in trace header being dropped when calling downstream.
 * bugfix: Fixed a compatibility issue between botocore and httplib patcher. `ISSUE48 <https://github.com/aws/aws-xray-sdk-python/issues/48>`_.
 * bugfix: Fixed a typo in sqlalchemy decorators. `PR50 <https://github.com/aws/aws-xray-sdk-python/pull/50>`_.
-* Updated `README` with more usage examples. 
+* Updated `README` with more usage examples.
 
 0.97
 ====

@@ -251,6 +251,20 @@ with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
             pass
 ```
 
+### Trace SQL queries
+By default, if no other value is provided to `.configure()`, SQL trace streaming is enabled
+for all the supported DB engines. Those currently are:
+- Any engine attached to the Django ORM.
+- Any engine attached to SQLAlchemy.
+- SQLite3.
+
+The behaviour can be toggled by sending the appropriate `stream_sql` value, for example:
+```python
+from aws_xray_sdk.core import xray_recorder
+
+xray_recorder.configure(service='fallback_name', stream_sql=True)
+```
+
 ### Patch third-party libraries
 
 ```python
@@ -260,7 +274,8 @@ libs_to_patch = ('boto3', 'mysql', 'requests')
 patch(libs_to_patch)
 ```
 
-### Add Django middleware
+### Django
+#### Add middleware
 
 In django settings.py, use the following.
 
@@ -275,6 +290,10 @@ MIDDLEWARE = [
     # ... other middlewares
 ]
 ```
+#### SQL tracing
+If Django's ORM is patched - either using the `AUTO_INSTRUMENT = True` in your settings file
+or explicitly calling `patch_db()` - the SQL query trace streaming can be enabled or disabled 
+updating the `STREAM_SQL` variable in your settings file.
 
 ### Add Flask middleware
 

@@ -72,6 +72,7 @@ def __init__(self):
         self._dynamic_naming = None
         self._aws_metadata = copy.deepcopy(XRAY_META)
         self._origin = None
+        self._stream_sql = False
 
         if type(self.sampler).__name__ == 'DefaultSampler':
             self.sampler.load_settings(DaemonConfig(), self.context)
@@ -81,7 +82,8 @@ def configure(self, sampling=None, plugins=None,
                   daemon_address=None, service=None,
                   context=None, emitter=None, streaming=None,
                   dynamic_naming=None, streaming_threshold=None,
-                  max_trace_back=None, sampler=None):
+                  max_trace_back=None, sampler=None,
+                  stream_sql=True):
         """Configure global X-Ray recorder.
 
         Configure needs to run before patching thrid party libraries
@@ -130,6 +132,7 @@ class to have your own implementation of the streaming process.
             maximum number of subsegments within a segment.
         :param int max_trace_back: The maxinum number of stack traces recorded
             by auto-capture. Lower this if a single document becomes too large.
+        :param bool stream_sql: Whether SQL query texts should be streamed.
 
         Environment variables AWS_XRAY_DAEMON_ADDRESS, AWS_XRAY_CONTEXT_MISSING
         and AWS_XRAY_TRACING_NAME respectively overrides arguments
@@ -159,6 +162,8 @@ class to have your own implementation of the streaming process.
             self.streaming_threshold = streaming_threshold
         if max_trace_back:
             self.max_trace_back = max_trace_back
+        if stream_sql is not None:
+            self.stream_sql = stream_sql
 
         if plugins:
             plugin_modules = get_plugin_modules(plugins)
@@ -548,3 +553,11 @@ def max_trace_back(self):
     @max_trace_back.setter
     def max_trace_back(self, value):
         self._max_trace_back = value
+
+    @property
+    def stream_sql(self):
+        return self._stream_sql
+
+    @stream_sql.setter
+    def stream_sql(self, value):
+        self._stream_sql = value
diff --git a/aws_xray_sdk/ext/dbapi2.py b/aws_xray_sdk/ext/dbapi2.py
@@ -43,23 +43,23 @@ def __enter__(self):
     @xray_recorder.capture()
     def execute(self, query, *args, **kwargs):
 
-        add_sql_meta(self._xray_meta)
+        add_sql_meta(self._xray_meta, query)
         return self.__wrapped__.execute(query, *args, **kwargs)
 
     @xray_recorder.capture()
     def executemany(self, query, *args, **kwargs):
 
-        add_sql_meta(self._xray_meta)
+        add_sql_meta(self._xray_meta, query)
         return self.__wrapped__.executemany(query, *args, **kwargs)
 
     @xray_recorder.capture()
     def callproc(self, proc, args):
 
-        add_sql_meta(self._xray_meta)
+        add_sql_meta(self._xray_meta, proc)
         return self.__wrapped__.callproc(proc, args)
 
 
-def add_sql_meta(meta):
+def add_sql_meta(meta, query):
 
     subsegment = xray_recorder.current_subsegment()
 
@@ -72,5 +72,7 @@ def add_sql_meta(meta):
     sql_meta = copy.copy(meta)
     if sql_meta.get('name', None):
         del sql_meta['name']
+    if xray_recorder.stream_sql:
+        sql_meta['sanitized_query'] = query
     subsegment.set_sql(sql_meta)
     subsegment.namespace = 'remote'
@@ -36,6 +36,7 @@ def ready(self):
             dynamic_naming=settings.DYNAMIC_NAMING,
             streaming_threshold=settings.STREAMING_THRESHOLD,
             max_trace_back=settings.MAX_TRACE_BACK,
+            stream_sql=settings.STREAM_SQL,
         )
 
         # if turned on subsegment will be generated on

@@ -14,6 +14,7 @@
     'DYNAMIC_NAMING': None,
     'STREAMING_THRESHOLD': None,
     'MAX_TRACE_BACK': None,
+    'STREAM_SQL': False,
 }
 
 XRAY_NAMESPACE = 'XRAY_RECORDER'

@@ -47,7 +47,8 @@ def wrapper(*args, **kw):
                 if isinstance(arg, XRayQuery):
                     try:
                         sql = parse_bind(arg.session.bind)
-                        sql['sanitized_query'] = str(arg)
+                        if xray_recorder.stream_sql:
+                            sql['sanitized_query'] = str(arg)
                     except Exception:
                         sql = None
         if sql is not None:

@@ -22,10 +22,15 @@ class User(db.Model):
     password = db.Column(db.String(255), nullable=False)
 
 
-@pytest.fixture()
-def session():
+@pytest.fixture(
+    params=[
+        False,
+        True,
+    ],
+)
+def session(request):
     """Test Fixture to Create DataBase Tables and start a trace segment"""
-    xray_recorder.configure(service='test', sampling=False, context=Context())
+    xray_recorder.configure(service='test', sampling=False, context=Context(), stream_sql=request.param)
     xray_recorder.clear_trace_entities()
     xray_recorder.begin_segment('SQLAlchemyTest')
     db.create_all()
@@ -41,8 +46,8 @@ def test_all(capsys, session):
     User.query.all()
     subsegment = find_subsegment_by_annotation(xray_recorder.current_segment(), 'sqlalchemy', 'sqlalchemy.orm.query.all')
     assert subsegment['annotations']['sqlalchemy'] == 'sqlalchemy.orm.query.all'
-    assert subsegment['sql']['sanitized_query']
     assert subsegment['sql']['url']
+    assert bool(subsegment['sql'].get('sanitized_query', None)) is xray_recorder.stream_sql
 
 
 def test_add(capsys, session):

diff --git a/tests/ext/psycopg2/test_psycopg2.py b/tests/ext/psycopg2/test_psycopg2.py
@@ -12,20 +12,34 @@
 patch(('psycopg2',))
 
 
-@pytest.fixture(autouse=True)
-def construct_ctx():
+@pytest.fixture(
+    autouse=True,
+    params=[
+        False,
+        True,
+    ],
+)
+def construct_ctx(request):
     """
     Clean up context storage on each test run and begin a segment
     so that later subsegment can be attached. After each test run
     it cleans up context storage again.
     """
-    xray_recorder.configure(service='test', sampling=False, context=Context())
+    xray_recorder.configure(service='test', sampling=False, context=Context(), stream_sql=request.param)
     xray_recorder.clear_trace_entities()
     xray_recorder.begin_segment('name')
     yield
     xray_recorder.clear_trace_entities()
 
 
+def _assert_query(sql_meta, query):
+    if xray_recorder.stream_sql:
+        assert 'sanitized_query' in sql_meta
+        assert sql_meta['sanitized_query'] == query
+    else:
+        assert 'sanitized_query' not in sql_meta
+
+
 def test_execute_dsn_kwargs():
     q = 'SELECT 1'
     with testing.postgresql.Postgresql() as postgresql:
@@ -46,6 +60,7 @@ def test_execute_dsn_kwargs():
     assert sql['user'] == dsn['user']
     assert sql['url'] == url
     assert sql['database_version']
+    _assert_query(sql, q)
 
 
 def test_execute_dsn_kwargs_alt_dbname():
@@ -72,6 +87,7 @@ def test_execute_dsn_kwargs_alt_dbname():
     assert sql['user'] == dsn['user']
     assert sql['url'] == url
     assert sql['database_version']
+    _assert_query(sql, q)
 
 
 def test_execute_dsn_string():
@@ -94,6 +110,7 @@ def test_execute_dsn_string():
     assert sql['user'] == dsn['user']
     assert sql['url'] == url
     assert sql['database_version']
+    _assert_query(sql, q)
 
 
 def test_execute_in_pool():
@@ -117,6 +134,7 @@ def test_execute_in_pool():
     assert sql['user'] == dsn['user']
     assert sql['url'] == url
     assert sql['database_version']
+    _assert_query(sql, q)
 
 
 def test_execute_bad_query():
@@ -145,6 +163,7 @@ def test_execute_bad_query():
 
     exception = subsegment.cause['exceptions'][0]
     assert exception.type == 'ProgrammingError'
+    _assert_query(sql, q)
 
 
 def test_register_extensions():