From 21010959f304ee0356a89900873febcac936bb3c Mon Sep 17 00:00:00 2001 From: Alex Hoppen Date: Fri, 6 Dec 2024 16:37:33 -0800 Subject: [PATCH] Read stdout from sourcekit-lsp in binary mode instead of text mode `self.process.stdout.read` reads a number of Unicode characters, but the `Content-Length` specifies number of bytes. This started causing deterministic issues since https://github.com/swiftlang/sourcekit-lsp/pull/1861 is logging messages from package loading, which contains emojis. Switch `stdout` and `stderr` to binary mode and manually decode them into UTF-8 when needed. --- test-sourcekit-lsp/test-sourcekit-lsp.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/test-sourcekit-lsp/test-sourcekit-lsp.py b/test-sourcekit-lsp/test-sourcekit-lsp.py index 969388d..51c1d26 100644 --- a/test-sourcekit-lsp/test-sourcekit-lsp.py +++ b/test-sourcekit-lsp/test-sourcekit-lsp.py @@ -30,8 +30,7 @@ def __init__(self, server_path: str): self.process = subprocess.Popen( [server_path], stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - encoding="utf-8", + stdout=subprocess.PIPE ) def send_data(self, dict: Dict[str, object]): @@ -40,8 +39,8 @@ def send_data(self, dict: Dict[str, object]): """ assert self.process.stdin body = json.dumps(dict) - data = "Content-Length: {}\r\n\r\n{}".format(len(body), body) - self.process.stdin.write(data) + data = f"Content-Length: {len(body)}\r\n\r\n{body}" + self.process.stdin.write(data.encode('utf-8')) self.process.stdin.flush() def read_message_from_lsp_server(self) -> str: @@ -51,17 +50,16 @@ def read_message_from_lsp_server(self) -> str: """ assert self.process.stdout # Read Content-Length: 123\r\n - # Note: Even though the Content-Length header ends with \r\n, `readline` returns it with a single \n. - header = self.process.stdout.readline() - match = re.match(r"Content-Length: ([0-9]+)\n$", header) + header = self.process.stdout.readline().decode('utf-8') + match = re.match(r"Content-Length: ([0-9]+)\r\n$", header) assert match, f"Expected Content-Length header, got '{header}'" # The Content-Length header is followed by an empty line - empty_line = self.process.stdout.readline() - assert empty_line == "\n", f"Expected empty line, got '{empty_line}'" + empty_line = self.process.stdout.readline().decode('utf-8') + assert empty_line == "\r\n", f"Expected empty line, got '{empty_line}'" # Read the actual response - return self.process.stdout.read(int(match.group(1))) + return self.process.stdout.read(int(match.group(1))).decode('utf-8') def read_request_reply_from_lsp_server(self, request_id: int) -> str: """ @@ -71,7 +69,7 @@ def read_request_reply_from_lsp_server(self, request_id: int) -> str: message = self.read_message_from_lsp_server() message_obj = json.loads(message) if "result" not in message_obj: - # We received a message that wasn't the request reply. + # We received a message that wasn't the request reply. # Log it, ignore it and wait for the next message. print("Received message") print(message)