Skip to content

Commit 2b0e377

Browse files
authored
5087 libpq pool leak (hasura#5089)
Shrink libpq buffers to 1MB before returning connection to pool. Closes hasura#5087 See: hasura/pg-client-hs#19 Also related: hasura#3388 hasura#4077
1 parent bc234b0 commit 2b0e377

File tree

6 files changed

+58
-24
lines changed

6 files changed

+58
-24
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
(Add entries here in the order of: server, console, cli, docs, others)
88

9+
- server: add new `--conn-lifetime` and `HASURA_GRAPHQL_PG_CONN_LIFETIME` options for expiring connections after some amount of active time (#5087)
10+
- server: shrink libpq connection request/response buffers back to 1MB if they grow beyond 2MB, fixing leak-like behavior on active servers (#5087)
911
- docs: add note for managed databases in postgres requirements (close #1677, #3783) (#5228)
1012

1113

scripts/dev.sh

+24-19
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,19 @@ fi
142142
# export for psql, etc.
143143
export PGPASSWORD=postgres
144144

145-
DB_URL="postgres://postgres:$PGPASSWORD@127.0.0.1:$PG_PORT/postgres"
145+
# The URL for the postgres server we might launch
146+
CONTAINER_DB_URL="postgres://postgres:$PGPASSWORD@127.0.0.1:$PG_PORT/postgres"
147+
# ... but we might like to use a different PG instance when just launching graphql-engine:
148+
HASURA_GRAPHQL_DATABASE_URL=${HASURA_GRAPHQL_DATABASE_URL-$CONTAINER_DB_URL}
146149

147150
PG_CONTAINER_NAME="hasura-dev-postgres-$PG_PORT"
148151

149-
# We can remove psql as a dependency:
150-
DOCKER_PSQL="docker exec -u postgres -it $PG_CONTAINER_NAME psql -p $PG_PORT"
152+
# We can remove psql as a dependency by using it from the (running) PG container:
153+
DOCKER_PSQL="docker exec -u postgres -it $PG_CONTAINER_NAME psql $HASURA_GRAPHQL_DATABASE_URL"
151154

152-
function wait_docker_postgres {
155+
function wait_postgres {
153156
echo -n "Waiting for postgres to come up"
154-
until $DOCKER_PSQL postgres -c '\l' &>/dev/null; do
157+
until ( $DOCKER_PSQL -c '\l' || psql $HASURA_GRAPHQL_DATABASE_URL -c '\l') &>/dev/null; do
155158
echo -n '.' && sleep 0.2
156159
done
157160
echo " Ok"
@@ -202,26 +205,28 @@ if [ "$MODE" = "graphql-engine" ]; then
202205
}
203206
trap cleanup EXIT
204207

205-
208+
export HASURA_GRAPHQL_DATABASE_URL # Defined above
206209
export HASURA_GRAPHQL_SERVER_PORT=${HASURA_GRAPHQL_SERVER_PORT-8181}
207210

208-
echo_pretty "We will connect to postgres container '$PG_CONTAINER_NAME'"
209-
echo_pretty "If you haven't yet, please launch a postgres container in a separate terminal with:"
211+
echo_pretty "We will connect to postgres at '$HASURA_GRAPHQL_DATABASE_URL'"
212+
echo_pretty "If you haven't overridden HASURA_GRAPHQL_DATABASE_URL, you can"
213+
echo_pretty "launch a fresh postgres container for us to connect to, in a"
214+
echo_pretty "separate terminal with:"
210215
echo_pretty " $ $0 postgres"
211-
echo_pretty "or press CTRL-C and invoke graphql-engine manually"
212216
echo_pretty ""
213217

214-
RUN_INVOCATION=(cabal new-run --project-file=cabal.project.dev-sh --RTS -- exe:graphql-engine +RTS -N -T -RTS
215-
--database-url="$DB_URL" serve
216-
--enable-console --console-assets-dir "$PROJECT_ROOT/console/static/dist")
218+
RUN_INVOCATION=(cabal new-run --project-file=cabal.project.dev-sh --RTS --
219+
exe:graphql-engine +RTS -N -T -s -RTS serve
220+
--enable-console --console-assets-dir "$PROJECT_ROOT/console/static/dist"
221+
)
217222

218223
echo_pretty 'About to do:'
219224
echo_pretty ' $ cabal new-build --project-file=cabal.project.dev-sh exe:graphql-engine'
220225
echo_pretty " $ ${RUN_INVOCATION[*]}"
221226
echo_pretty ''
222227

223228
cabal new-build --project-file=cabal.project.dev-sh exe:graphql-engine
224-
wait_docker_postgres
229+
wait_postgres
225230

226231
# Print helpful info after startup logs so it's visible:
227232
{
@@ -337,7 +342,7 @@ EOL
337342

338343
if [ "$MODE" = "postgres" ]; then
339344
launch_postgres_container
340-
wait_docker_postgres
345+
wait_postgres
341346
echo_pretty "Postgres logs will start to show up in realtime here. Press CTRL-C to exit and "
342347
echo_pretty "shutdown this container."
343348
echo_pretty ""
@@ -347,7 +352,7 @@ if [ "$MODE" = "postgres" ]; then
347352
echo_pretty " $ PGPASSWORD="$PGPASSWORD" psql -h 127.0.0.1 -p "$PG_PORT" postgres -U postgres"
348353
echo_pretty ""
349354
echo_pretty "Here is the database URL:"
350-
echo_pretty " $DB_URL"
355+
echo_pretty " $CONTAINER_DB_URL"
351356
echo_pretty ""
352357
echo_pretty "If you want to launch a 'graphql-engine' that works with this database:"
353358
echo_pretty " $ $0 graphql-engine"
@@ -375,19 +380,19 @@ elif [ "$MODE" = "test" ]; then
375380
# rebuilding twice... ugh
376381
cabal new-build --project-file=cabal.project.dev-sh exe:graphql-engine test:graphql-engine-tests
377382
launch_postgres_container
378-
wait_docker_postgres
383+
wait_postgres
379384

380385
# These also depend on a running DB:
381386
if [ "$RUN_UNIT_TESTS" = true ]; then
382387
echo_pretty "Running Haskell test suite"
383-
HASURA_GRAPHQL_DATABASE_URL="$DB_URL" cabal new-run --project-file=cabal.project.dev-sh -- test:graphql-engine-tests
388+
HASURA_GRAPHQL_DATABASE_URL="$CONTAINER_DB_URL" cabal new-run --project-file=cabal.project.dev-sh -- test:graphql-engine-tests
384389
fi
385390

386391
if [ "$RUN_INTEGRATION_TESTS" = true ]; then
387392
GRAPHQL_ENGINE_TEST_LOG=/tmp/hasura-dev-test-engine.log
388393
echo_pretty "Starting graphql-engine, logging to $GRAPHQL_ENGINE_TEST_LOG"
389394
export HASURA_GRAPHQL_SERVER_PORT=8088
390-
cabal new-run --project-file=cabal.project.dev-sh -- exe:graphql-engine --database-url="$DB_URL" serve --stringify-numeric-types \
395+
cabal new-run --project-file=cabal.project.dev-sh -- exe:graphql-engine --database-url="$CONTAINER_DB_URL" serve --stringify-numeric-types \
391396
--enable-console --console-assets-dir ../console/static/dist \
392397
&> "$GRAPHQL_ENGINE_TEST_LOG" & GRAPHQL_ENGINE_PID=$!
393398

@@ -449,7 +454,7 @@ elif [ "$MODE" = "test" ]; then
449454

450455

451456
# TODO MAYBE: fix deprecation warnings, make them an error
452-
if pytest -W ignore::DeprecationWarning --hge-urls http://127.0.0.1:$HASURA_GRAPHQL_SERVER_PORT --pg-urls "$DB_URL" $PYTEST_ARGS; then
457+
if pytest -W ignore::DeprecationWarning --hge-urls http://127.0.0.1:$HASURA_GRAPHQL_SERVER_PORT --pg-urls "$CONTAINER_DB_URL" $PYTEST_ARGS; then
453458
PASSED=true
454459
else
455460
PASSED=false

server/cabal.project

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ package graphql-engine
3636
source-repository-package
3737
type: git
3838
location: https://github.com/hasura/pg-client-hs.git
39-
tag: 70a849d09bea9461e72c5a5bbde06df65aab61c0
39+
tag: cbfc69b935d19dc40be8cdcc73a70b81cf511d34
4040

4141
source-repository-package
4242
type: git

server/graphql-engine.cabal

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ common common-exe
4242
ghc-options:
4343
-threaded -rtsopts
4444
-- Re. `-I2` see #2565
45+
-- TODO try the new: -Iw flag:
46+
-- http://downloads.haskell.org/~ghc/latest/docs/html/users_guide/runtime_control.html#rts-flag--Iw%20%E2%9F%A8seconds%E2%9F%A9
4547
--
4648
-- `-qn2` limits the parallel GC to at most 2 capabilities. This came up in #3354/#3394, as the
4749
-- parallel GC was causing significant performance overhead. Folks in #ghc on freenode advised

server/src-lib/Hasura/Server/Init.hs

+20-4
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,15 @@ mkServeOptions rso = do
175175
#else
176176
defaultAPIs = [METADATA,GRAPHQL,PGDUMP,CONFIG]
177177
#endif
178-
mkConnParams (RawConnParams s c i p) = do
178+
mkConnParams (RawConnParams s c i cl p) = do
179179
stripes <- fromMaybe 1 <$> withEnv s (fst pgStripesEnv)
180180
-- Note: by Little's Law we can expect e.g. (with 50 max connections) a
181181
-- hard throughput cap at 1000RPS when db queries take 50ms on average:
182182
conns <- fromMaybe 50 <$> withEnv c (fst pgConnsEnv)
183183
iTime <- fromMaybe 180 <$> withEnv i (fst pgTimeoutEnv)
184+
connLifetime <- withEnv cl (fst pgConnLifetimeEnv)
184185
allowPrepare <- fromMaybe True <$> withEnv p (fst pgUsePrepareEnv)
185-
return $ Q.ConnParams stripes conns iTime allowPrepare
186+
return $ Q.ConnParams stripes conns iTime allowPrepare connLifetime
186187

187188
mkAuthHook (AuthHookG mUrl mType) = do
188189
mUrlEnv <- withEnv mUrl $ fst authHookEnv
@@ -359,6 +360,13 @@ pgTimeoutEnv =
359360
, "Each connection's idle time before it is closed (default: 180 sec)"
360361
)
361362

363+
pgConnLifetimeEnv :: (String, String)
364+
pgConnLifetimeEnv =
365+
( "HASURA_GRAPHQL_PG_CONN_LIFETIME"
366+
, "Time from connection creation after which the connection should be destroyed and a new one "
367+
<> "created. (default: none)"
368+
)
369+
362370
pgUsePrepareEnv :: (String, String)
363371
pgUsePrepareEnv =
364372
( "HASURA_GRAPHQL_USE_PREPARED_STATEMENTS"
@@ -574,7 +582,7 @@ parseTxIsolation = optional $
574582

575583
parseConnParams :: Parser RawConnParams
576584
parseConnParams =
577-
RawConnParams <$> stripes <*> conns <*> timeout <*> allowPrepare
585+
RawConnParams <$> stripes <*> conns <*> idleTimeout <*> connLifetime <*> allowPrepare
578586
where
579587
stripes = optional $
580588
option auto
@@ -592,12 +600,20 @@ parseConnParams =
592600
help (snd pgConnsEnv)
593601
)
594602

595-
timeout = optional $
603+
idleTimeout = optional $
596604
option auto
597605
( long "timeout" <>
598606
metavar "<SECONDS>" <>
599607
help (snd pgTimeoutEnv)
600608
)
609+
610+
connLifetime = fmap (fmap fromRational) $ optional $
611+
option auto
612+
( long "conn-lifetime" <>
613+
metavar "<SECONDS>" <>
614+
help (snd pgConnLifetimeEnv)
615+
)
616+
601617
allowPrepare = optional $
602618
option (eitherReader parseStringAsBool)
603619
( long "use-prepared-statements" <>

server/src-lib/Hasura/Server/Init/Config.hs

+9
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import qualified Data.Text as T
99
import qualified Database.PG.Query as Q
1010

1111
import Data.Char (toLower)
12+
import Data.Time
1213
import Network.Wai.Handler.Warp (HostPreference)
1314

1415
import qualified Hasura.Cache as Cache
@@ -26,6 +27,9 @@ data RawConnParams
2627
{ rcpStripes :: !(Maybe Int)
2728
, rcpConns :: !(Maybe Int)
2829
, rcpIdleTime :: !(Maybe Int)
30+
, rcpConnLifetime :: !(Maybe NominalDiffTime)
31+
-- ^ Time from connection creation after which to destroy a connection and
32+
-- choose a different/new one.
2933
, rcpAllowPrepare :: !(Maybe Bool)
3034
} deriving (Show, Eq)
3135

@@ -202,6 +206,11 @@ readJson = J.eitherDecodeStrict . txtToBs . T.pack
202206
class FromEnv a where
203207
fromEnv :: String -> Either String a
204208

209+
-- Deserialize from seconds, in the usual way
210+
instance FromEnv NominalDiffTime where
211+
fromEnv s = maybe (Left "could not parse as a Double") (Right . realToFrac) $
212+
(readMaybe s :: Maybe Double)
213+
205214
instance FromEnv String where
206215
fromEnv = Right
207216

0 commit comments

Comments
 (0)