Skip to content

Commit 36d5019

Browse files
Fix stdio encoding issue: Enforce explicit UTF-8 for correct Unicode handling (#73)
* fix: Enforce UTF-8 encoding for stdio transport This change replaces the default system encoding with an explicit UTF8Encoding (without BOM) for both client and server transports. This ensures proper handling of Unicode characters, including Chinese characters and emoji. - Use UTF8Encoding explicitly for StreamReader and StreamWriter. - Add tests for Chinese characters ("上下文伺服器") and emoji (🔍🚀👍) to confirm the fix. Fixes #35. * Remove relaxed JSON encoder * Address PR feedback --------- Co-authored-by: Stephen Toub <[email protected]>
1 parent 96ba2fa commit 36d5019

File tree

5 files changed

+285
-105
lines changed

5 files changed

+285
-105
lines changed

Diff for: src/ModelContextProtocol/Logging/Log.cs

+32-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ internal static partial class Log
114114
internal static partial void TransportNotConnected(this ILogger logger, string endpointName);
115115

116116
[LoggerMessage(Level = LogLevel.Information, Message = "Transport sending message for {endpointName} with ID {messageId}, JSON {json}")]
117-
internal static partial void TransportSendingMessage(this ILogger logger, string endpointName, string messageId, string json);
117+
internal static partial void TransportSendingMessage(this ILogger logger, string endpointName, string messageId, string? json = null);
118118

119119
[LoggerMessage(Level = LogLevel.Information, Message = "Transport message sent for {endpointName} with ID {messageId}")]
120120
internal static partial void TransportSentMessage(this ILogger logger, string endpointName, string messageId);
@@ -347,4 +347,35 @@ public static partial void SSETransportPostNotAccepted(
347347
string endpointName,
348348
string messageId,
349349
string responseContent);
350+
351+
/// <summary>
352+
/// Logs the byte representation of a message in UTF-8 encoding.
353+
/// </summary>
354+
/// <param name="logger">The logger to use.</param>
355+
/// <param name="endpointName">The name of the endpoint.</param>
356+
/// <param name="byteRepresentation">The byte representation as a hex string.</param>
357+
[LoggerMessage(EventId = 39000, Level = LogLevel.Trace, Message = "Transport {EndpointName}: Message bytes (UTF-8): {ByteRepresentation}")]
358+
private static partial void TransportMessageBytes(this ILogger logger, string endpointName, string byteRepresentation);
359+
360+
/// <summary>
361+
/// Logs the byte representation of a message for diagnostic purposes.
362+
/// This is useful for diagnosing encoding issues with non-ASCII characters.
363+
/// </summary>
364+
/// <param name="logger">The logger to use.</param>
365+
/// <param name="endpointName">The name of the endpoint.</param>
366+
/// <param name="message">The message to log bytes for.</param>
367+
internal static void TransportMessageBytesUtf8(this ILogger logger, string endpointName, string message)
368+
{
369+
if (logger.IsEnabled(LogLevel.Trace))
370+
{
371+
var bytes = System.Text.Encoding.UTF8.GetBytes(message);
372+
var byteRepresentation =
373+
#if NET
374+
Convert.ToHexString(bytes);
375+
#else
376+
BitConverter.ToString(bytes).Replace("-", " ");
377+
#endif
378+
logger.TransportMessageBytes(endpointName, byteRepresentation);
379+
}
380+
}
350381
}

Diff for: src/ModelContextProtocol/Protocol/Transport/StdioClientTransport.cs

+57-27
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
using System.Diagnostics;
2-
using System.Text.Json;
1+
using Microsoft.Extensions.Logging;
2+
using Microsoft.Extensions.Logging.Abstractions;
33
using ModelContextProtocol.Configuration;
44
using ModelContextProtocol.Logging;
55
using ModelContextProtocol.Protocol.Messages;
66
using ModelContextProtocol.Utils;
77
using ModelContextProtocol.Utils.Json;
8-
using Microsoft.Extensions.Logging;
9-
using Microsoft.Extensions.Logging.Abstractions;
8+
using System.Diagnostics;
9+
using System.Text;
10+
using System.Text.Json;
1011

1112
namespace ModelContextProtocol.Protocol.Transport;
1213

@@ -59,6 +60,8 @@ public async Task ConnectAsync(CancellationToken cancellationToken = default)
5960

6061
_shutdownCts = new CancellationTokenSource();
6162

63+
UTF8Encoding noBomUTF8 = new(encoderShouldEmitUTF8Identifier: false);
64+
6265
var startInfo = new ProcessStartInfo
6366
{
6467
FileName = _options.Command,
@@ -68,6 +71,11 @@ public async Task ConnectAsync(CancellationToken cancellationToken = default)
6871
UseShellExecute = false,
6972
CreateNoWindow = true,
7073
WorkingDirectory = _options.WorkingDirectory ?? Environment.CurrentDirectory,
74+
StandardOutputEncoding = noBomUTF8,
75+
StandardErrorEncoding = noBomUTF8,
76+
#if NET
77+
StandardInputEncoding = noBomUTF8,
78+
#endif
7179
};
7280

7381
if (!string.IsNullOrWhiteSpace(_options.Arguments))
@@ -92,13 +100,35 @@ public async Task ConnectAsync(CancellationToken cancellationToken = default)
92100
// Set up error logging
93101
_process.ErrorDataReceived += (sender, args) => _logger.TransportError(EndpointName, args.Data ?? "(no data)");
94102

95-
if (!_process.Start())
103+
// We need both stdin and stdout to use a no-BOM UTF-8 encoding. On .NET Core,
104+
// we can use ProcessStartInfo.StandardOutputEncoding/StandardInputEncoding, but
105+
// StandardInputEncoding doesn't exist on .NET Framework; instead, it always picks
106+
// up the encoding from Console.InputEncoding. As such, when not targeting .NET Core,
107+
// we temporarily change Console.InputEncoding to no-BOM UTF-8 around the Process.Start
108+
// call, to ensure it picks up the correct encoding.
109+
#if NET
110+
_processStarted = _process.Start();
111+
#else
112+
Encoding originalInputEncoding = Console.InputEncoding;
113+
try
114+
{
115+
Console.InputEncoding = noBomUTF8;
116+
_processStarted = _process.Start();
117+
}
118+
finally
119+
{
120+
Console.InputEncoding = originalInputEncoding;
121+
}
122+
#endif
123+
124+
if (!_processStarted)
96125
{
97126
_logger.TransportProcessStartFailed(EndpointName);
98127
throw new McpTransportException("Failed to start MCP server process");
99128
}
129+
100130
_logger.TransportProcessStarted(EndpointName, _process.Id);
101-
_processStarted = true;
131+
102132
_process.BeginErrorReadLine();
103133

104134
// Start reading messages in the background
@@ -134,9 +164,10 @@ public override async Task SendMessageAsync(IJsonRpcMessage message, Cancellatio
134164
{
135165
var json = JsonSerializer.Serialize(message, _jsonOptions.GetTypeInfo<IJsonRpcMessage>());
136166
_logger.TransportSendingMessage(EndpointName, id, json);
167+
_logger.TransportMessageBytesUtf8(EndpointName, json);
137168

138-
// Write the message followed by a newline
139-
await _process!.StandardInput.WriteLineAsync(json.AsMemory(), cancellationToken).ConfigureAwait(false);
169+
// Write the message followed by a newline using our UTF-8 writer
170+
await _process!.StandardInput.WriteLineAsync(json).ConfigureAwait(false);
140171
await _process.StandardInput.FlushAsync(cancellationToken).ConfigureAwait(false);
141172

142173
_logger.TransportSentMessage(EndpointName, id);
@@ -161,12 +192,10 @@ private async Task ReadMessagesAsync(CancellationToken cancellationToken)
161192
{
162193
_logger.TransportEnteringReadMessagesLoop(EndpointName);
163194

164-
using var reader = _process!.StandardOutput;
165-
166-
while (!cancellationToken.IsCancellationRequested && !_process.HasExited)
195+
while (!cancellationToken.IsCancellationRequested && !_process!.HasExited)
167196
{
168197
_logger.TransportWaitingForMessage(EndpointName);
169-
var line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
198+
var line = await _process.StandardOutput.ReadLineAsync(cancellationToken).ConfigureAwait(false);
170199
if (line == null)
171200
{
172201
_logger.TransportEndOfStream(EndpointName);
@@ -179,6 +208,7 @@ private async Task ReadMessagesAsync(CancellationToken cancellationToken)
179208
}
180209

181210
_logger.TransportReceivedMessage(EndpointName, line);
211+
_logger.TransportMessageBytesUtf8(EndpointName, line);
182212

183213
await ProcessMessageAsync(line, cancellationToken).ConfigureAwait(false);
184214
}
@@ -230,28 +260,27 @@ private async Task ProcessMessageAsync(string line, CancellationToken cancellati
230260
private async Task CleanupAsync(CancellationToken cancellationToken)
231261
{
232262
_logger.TransportCleaningUp(EndpointName);
233-
if (_process != null && _processStarted && !_process.HasExited)
263+
264+
if (_process is Process process && _processStarted && !process.HasExited)
234265
{
235266
try
236267
{
237-
// Try to close stdin to signal the process to exit
238-
_logger.TransportClosingStdin(EndpointName);
239-
_process.StandardInput.Close();
240-
241268
// Wait for the process to exit
242269
_logger.TransportWaitingForShutdown(EndpointName);
243270

244271
// Kill the while process tree because the process may spawn child processes
245272
// and Node.js does not kill its children when it exits properly
246-
_process.KillTree(_options.ShutdownTimeout);
273+
process.KillTree(_options.ShutdownTimeout);
247274
}
248275
catch (Exception ex)
249276
{
250277
_logger.TransportShutdownFailed(EndpointName, ex);
251278
}
252-
253-
_process.Dispose();
254-
_process = null;
279+
finally
280+
{
281+
process.Dispose();
282+
_process = null;
283+
}
255284
}
256285

257286
if (_shutdownCts is { } shutdownCts)
@@ -261,29 +290,30 @@ private async Task CleanupAsync(CancellationToken cancellationToken)
261290
_shutdownCts = null;
262291
}
263292

264-
if (_readTask != null)
293+
if (_readTask is Task readTask)
265294
{
266295
try
267296
{
268297
_logger.TransportWaitingForReadTask(EndpointName);
269-
await _readTask.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
298+
await readTask.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
270299
}
271300
catch (TimeoutException)
272301
{
273302
_logger.TransportCleanupReadTaskTimeout(EndpointName);
274-
// Continue with cleanup
275303
}
276304
catch (OperationCanceledException)
277305
{
278306
_logger.TransportCleanupReadTaskCancelled(EndpointName);
279-
// Ignore cancellation
280307
}
281308
catch (Exception ex)
282309
{
283310
_logger.TransportCleanupReadTaskFailed(EndpointName, ex);
284311
}
285-
_readTask = null;
286-
_logger.TransportReadTaskCleanedUp(EndpointName);
312+
finally
313+
{
314+
_logger.TransportReadTaskCleanedUp(EndpointName);
315+
_readTask = null;
316+
}
287317
}
288318

289319
SetConnected(false);

Diff for: src/ModelContextProtocol/Protocol/Transport/StdioServerTransport.cs

+63-18
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
using System.Text.Json;
1+
using Microsoft.Extensions.Logging;
2+
using Microsoft.Extensions.Logging.Abstractions;
3+
using Microsoft.Extensions.Options;
24
using ModelContextProtocol.Logging;
35
using ModelContextProtocol.Protocol.Messages;
46
using ModelContextProtocol.Server;
57
using ModelContextProtocol.Utils;
68
using ModelContextProtocol.Utils.Json;
7-
using Microsoft.Extensions.Logging;
8-
using Microsoft.Extensions.Logging.Abstractions;
9-
using Microsoft.Extensions.Options;
9+
using System.Text;
10+
using System.Text.Json;
1011

1112
namespace ModelContextProtocol.Protocol.Transport;
1213

@@ -15,12 +16,14 @@ namespace ModelContextProtocol.Protocol.Transport;
1516
/// </summary>
1617
public sealed class StdioServerTransport : TransportBase, IServerTransport
1718
{
19+
private static readonly byte[] s_newlineBytes = "\n"u8.ToArray();
20+
1821
private readonly string _serverName;
1922
private readonly ILogger _logger;
2023

2124
private readonly JsonSerializerOptions _jsonOptions = McpJsonUtilities.DefaultOptions;
22-
private readonly TextReader _stdin = Console.In;
23-
private readonly TextWriter _stdout = Console.Out;
25+
private readonly TextReader _stdInReader;
26+
private readonly Stream _stdOutStream;
2427

2528
private Task? _readTask;
2629
private CancellationTokenSource? _shutdownCts;
@@ -83,16 +86,50 @@ public StdioServerTransport(string serverName, ILoggerFactory? loggerFactory = n
8386

8487
_serverName = serverName;
8588
_logger = (ILogger?)loggerFactory?.CreateLogger<StdioClientTransport>() ?? NullLogger.Instance;
89+
90+
// Get raw console streams and wrap them with UTF-8 encoding
91+
_stdInReader = new StreamReader(Console.OpenStandardInput(), Encoding.UTF8);
92+
_stdOutStream = new BufferedStream(Console.OpenStandardOutput());
93+
}
94+
95+
/// <summary>
96+
/// Initializes a new instance of the <see cref="StdioServerTransport"/> class with explicit input/output streams.
97+
/// </summary>
98+
/// <param name="serverName">The name of the server.</param>
99+
/// <param name="stdinStream">The input TextReader to use.</param>
100+
/// <param name="stdoutStream">The output TextWriter to use.</param>
101+
/// <param name="loggerFactory">Optional logger factory used for logging employed by the transport.</param>
102+
/// <exception cref="ArgumentNullException"><paramref name="serverName"/> is <see langword="null"/>.</exception>
103+
/// <remarks>
104+
/// <para>
105+
/// This constructor is useful for testing scenarios where you want to redirect input/output.
106+
/// </para>
107+
/// </remarks>
108+
public StdioServerTransport(string serverName, Stream stdinStream, Stream stdoutStream, ILoggerFactory? loggerFactory = null)
109+
: base(loggerFactory)
110+
{
111+
Throw.IfNull(serverName);
112+
Throw.IfNull(stdinStream);
113+
Throw.IfNull(stdoutStream);
114+
115+
_serverName = serverName;
116+
_logger = (ILogger?)loggerFactory?.CreateLogger<StdioClientTransport>() ?? NullLogger.Instance;
117+
118+
_stdInReader = new StreamReader(stdinStream, Encoding.UTF8);
119+
_stdOutStream = stdoutStream;
86120
}
87121

88122
/// <inheritdoc/>
89123
public Task StartListeningAsync(CancellationToken cancellationToken = default)
90124
{
125+
_logger.LogDebug("Starting StdioServerTransport listener for {EndpointName}", EndpointName);
126+
91127
_shutdownCts = new CancellationTokenSource();
92128

93129
_readTask = Task.Run(async () => await ReadMessagesAsync(_shutdownCts.Token).ConfigureAwait(false), CancellationToken.None);
94130

95131
SetConnected(true);
132+
_logger.LogDebug("StdioServerTransport now connected for {EndpointName}", EndpointName);
96133

97134
return Task.CompletedTask;
98135
}
@@ -114,11 +151,11 @@ public override async Task SendMessageAsync(IJsonRpcMessage message, Cancellatio
114151

115152
try
116153
{
117-
var json = JsonSerializer.Serialize(message, _jsonOptions.GetTypeInfo<IJsonRpcMessage>());
118-
_logger.TransportSendingMessage(EndpointName, id, json);
154+
_logger.TransportSendingMessage(EndpointName, id);
119155

120-
await _stdout.WriteLineAsync(json.AsMemory(), cancellationToken).ConfigureAwait(false);
121-
await _stdout.FlushAsync(cancellationToken).ConfigureAwait(false);
156+
await JsonSerializer.SerializeAsync(_stdOutStream, message, _jsonOptions.GetTypeInfo<IJsonRpcMessage>(), cancellationToken).ConfigureAwait(false);
157+
await _stdOutStream.WriteAsync(s_newlineBytes, cancellationToken).ConfigureAwait(false);
158+
await _stdOutStream.FlushAsync(cancellationToken).ConfigureAwait(false);;
122159

123160
_logger.TransportSentMessage(EndpointName, id);
124161
}
@@ -146,7 +183,7 @@ private async Task ReadMessagesAsync(CancellationToken cancellationToken)
146183
{
147184
_logger.TransportWaitingForMessage(EndpointName);
148185

149-
var reader = _stdin;
186+
var reader = _stdInReader;
150187
var line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
151188
if (line == null)
152189
{
@@ -160,6 +197,7 @@ private async Task ReadMessagesAsync(CancellationToken cancellationToken)
160197
}
161198

162199
_logger.TransportReceivedMessage(EndpointName, line);
200+
_logger.TransportMessageBytesUtf8(EndpointName, line);
163201

164202
try
165203
{
@@ -207,19 +245,20 @@ private async Task CleanupAsync(CancellationToken cancellationToken)
207245
{
208246
_logger.TransportCleaningUp(EndpointName);
209247

210-
if (_shutdownCts != null)
248+
if (_shutdownCts is { } shutdownCts)
211249
{
212-
await _shutdownCts.CancelAsync().ConfigureAwait(false);
213-
_shutdownCts.Dispose();
250+
await shutdownCts.CancelAsync().ConfigureAwait(false);
251+
shutdownCts.Dispose();
252+
214253
_shutdownCts = null;
215254
}
216255

217-
if (_readTask != null)
256+
if (_readTask is { } readTask)
218257
{
219258
try
220259
{
221260
_logger.TransportWaitingForReadTask(EndpointName);
222-
await _readTask.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
261+
await readTask.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
223262
}
224263
catch (TimeoutException)
225264
{
@@ -235,10 +274,16 @@ private async Task CleanupAsync(CancellationToken cancellationToken)
235274
{
236275
_logger.TransportCleanupReadTaskFailed(EndpointName, ex);
237276
}
238-
_readTask = null;
239-
_logger.TransportReadTaskCleanedUp(EndpointName);
277+
finally
278+
{
279+
_logger.TransportReadTaskCleanedUp(EndpointName);
280+
_readTask = null;
281+
}
240282
}
241283

284+
_stdInReader?.Dispose();
285+
_stdOutStream?.Dispose();
286+
242287
SetConnected(false);
243288
_logger.TransportCleanedUp(EndpointName);
244289
}

0 commit comments

Comments
 (0)