Skip to content

Add AsyncBytes, AsyncLineSequence, AsyncCharacterSequence, and AsyncUnicodeScalarSequence #3036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
24 changes: 24 additions & 0 deletions Foundation.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,12 @@
61E011821C1B599A000037DD /* CFMachPort.c in Sources */ = {isa = PBXBuildFile; fileRef = 5B5D88D01BBC9AAC00234F36 /* CFMachPort.c */; };
63DCE9D21EAA430100E9CB02 /* ISO8601DateFormatter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63DCE9D11EAA430100E9CB02 /* ISO8601DateFormatter.swift */; };
63DCE9D41EAA432400E9CB02 /* TestISO8601DateFormatter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63DCE9D31EAA432400E9CB02 /* TestISO8601DateFormatter.swift */; };
63FAA81926C3398400EE3DAD /* AsyncUnicodeScalarSequence.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63FAA81426C3398300EE3DAD /* AsyncUnicodeScalarSequence.swift */; };
63FAA81A26C3398400EE3DAD /* AsyncLineSequence.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63FAA81526C3398400EE3DAD /* AsyncLineSequence.swift */; };
63FAA81B26C3398400EE3DAD /* AsyncCharacterSequence.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63FAA81626C3398400EE3DAD /* AsyncCharacterSequence.swift */; };
63FAA81C26C3398400EE3DAD /* FileHandle+Async.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63FAA81726C3398400EE3DAD /* FileHandle+Async.swift */; };
63FAA81D26C3398400EE3DAD /* URL+AsyncBytes.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63FAA81826C3398400EE3DAD /* URL+AsyncBytes.swift */; };
63FAA81F26C33DE500EE3DAD /* TestFileHandle+Async.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63FAA81E26C33DE500EE3DAD /* TestFileHandle+Async.swift */; };
659FB6DE2405E5E300F5F63F /* TestBridging.swift in Sources */ = {isa = PBXBuildFile; fileRef = 659FB6DD2405E5E200F5F63F /* TestBridging.swift */; };
684C79011F62B611005BD73E /* TestNSNumberBridging.swift in Sources */ = {isa = PBXBuildFile; fileRef = 684C79001F62B611005BD73E /* TestNSNumberBridging.swift */; };
6EB768281D18C12C00D4B719 /* UUID.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6EB768271D18C12C00D4B719 /* UUID.swift */; };
Expand Down Expand Up @@ -1102,6 +1108,12 @@
61F8AE7C1C180FC600FB62F0 /* TestNotificationCenter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestNotificationCenter.swift; sourceTree = "<group>"; };
63DCE9D11EAA430100E9CB02 /* ISO8601DateFormatter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ISO8601DateFormatter.swift; sourceTree = "<group>"; };
63DCE9D31EAA432400E9CB02 /* TestISO8601DateFormatter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestISO8601DateFormatter.swift; sourceTree = "<group>"; };
63FAA81426C3398300EE3DAD /* AsyncUnicodeScalarSequence.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AsyncUnicodeScalarSequence.swift; sourceTree = "<group>"; };
63FAA81526C3398400EE3DAD /* AsyncLineSequence.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AsyncLineSequence.swift; sourceTree = "<group>"; };
63FAA81626C3398400EE3DAD /* AsyncCharacterSequence.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AsyncCharacterSequence.swift; sourceTree = "<group>"; };
63FAA81726C3398400EE3DAD /* FileHandle+Async.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "FileHandle+Async.swift"; sourceTree = "<group>"; };
63FAA81826C3398400EE3DAD /* URL+AsyncBytes.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "URL+AsyncBytes.swift"; sourceTree = "<group>"; };
63FAA81E26C33DE500EE3DAD /* TestFileHandle+Async.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "TestFileHandle+Async.swift"; sourceTree = "<group>"; };
659FB6DD2405E5E200F5F63F /* TestBridging.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestBridging.swift; sourceTree = "<group>"; };
684C79001F62B611005BD73E /* TestNSNumberBridging.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestNSNumberBridging.swift; sourceTree = "<group>"; };
6E203B8C1C1303BB003B2576 /* TestBundle.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestBundle.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -1856,6 +1868,7 @@
B9D9733E23D19D3900AB249C /* TestDimension.swift */,
BDBB658F1E256BFA001A7286 /* TestEnergyFormatter.swift */,
D512D17B1CD883F00032E6A5 /* TestFileHandle.swift */,
63FAA81E26C33DE500EE3DAD /* TestFileHandle+Async.swift */,
525AECEB1BF2C96400D15BB0 /* TestFileManager.swift */,
BF85E9D71FBDCC2000A79793 /* TestHost.swift */,
848A30571C137B3500C83206 /* TestHTTPCookie.swift */,
Expand Down Expand Up @@ -2118,6 +2131,11 @@
EADE0B5B1BD15DFF00C49C64 /* EnergyFormatter.swift */,
5B4092111D1B30B40022B067 /* ExtraStringAPIs.swift */,
EADE0B5D1BD15DFF00C49C64 /* FileHandle.swift */,
63FAA81626C3398400EE3DAD /* AsyncCharacterSequence.swift */,
63FAA81526C3398400EE3DAD /* AsyncLineSequence.swift */,
63FAA81426C3398300EE3DAD /* AsyncUnicodeScalarSequence.swift */,
63FAA81726C3398400EE3DAD /* FileHandle+Async.swift */,
63FAA81826C3398400EE3DAD /* URL+AsyncBytes.swift */,
EADE0B5E1BD15DFF00C49C64 /* FileManager.swift */,
91B668A22252B3C5001487A1 /* FileManager+POSIX.swift */,
91B668A42252B3E7001487A1 /* FileManager+Win32.swift */,
Expand Down Expand Up @@ -2971,6 +2989,7 @@
5BF7AEA81BCD51F9008F214A /* NSData.swift in Sources */,
5B424C761D0B6E5B007B39C8 /* IndexPath.swift in Sources */,
EADE0BB51BD15E0000C49C64 /* Scanner.swift in Sources */,
63FAA81A26C3398400EE3DAD /* AsyncLineSequence.swift in Sources */,
EADE0BA01BD15DFF00C49C64 /* NSIndexPath.swift in Sources */,
5BF7AEB51BCD51F9008F214A /* NSPathUtilities.swift in Sources */,
B96C113725BA376D00985A32 /* NSDateComponents.swift in Sources */,
Expand Down Expand Up @@ -3017,8 +3036,10 @@
5BA0106E1DF212B300E56898 /* NSPlatform.swift in Sources */,
D3BCEB9E1C2EDED800295652 /* NSLog.swift in Sources */,
15CA750A24F8336A007DF6C1 /* NSCFTypeShims.swift in Sources */,
63FAA81D26C3398400EE3DAD /* URL+AsyncBytes.swift in Sources */,
61E0117D1C1B5590000037DD /* RunLoop.swift in Sources */,
B96C110025BA20A600985A32 /* NSURLQueryItem.swift in Sources */,
63FAA81C26C3398400EE3DAD /* FileHandle+Async.swift in Sources */,
5B23AB8B1CE62F9B000DB898 /* PersonNameComponents.swift in Sources */,
EADE0BA61BD15E0000C49C64 /* MassFormatter.swift in Sources */,
5BECBA3A1D1CAE9A00B39B1F /* NSMeasurement.swift in Sources */,
Expand All @@ -3042,6 +3063,8 @@
EADE0BAC1BD15E0000C49C64 /* NSOrderedSet.swift in Sources */,
474E124D26BCD6D00016C28A /* AttributedString+Locking.swift in Sources */,
5BC1B9A421F2757F00524D8C /* ContiguousBytes.swift in Sources */,
63FAA81926C3398400EE3DAD /* AsyncUnicodeScalarSequence.swift in Sources */,
63FAA81B26C3398400EE3DAD /* AsyncCharacterSequence.swift in Sources */,
EADE0B971BD15DFF00C49C64 /* Decimal.swift in Sources */,
5B78185B1D6CB5D2004A01F2 /* CGFloat.swift in Sources */,
5BF7AEB71BCD51F9008F214A /* PropertyListSerialization.swift in Sources */,
Expand Down Expand Up @@ -3218,6 +3241,7 @@
5B13B33D1C582D4C00651CE2 /* TestPipe.swift in Sources */,
F9E0BB371CA70B8000F7FF3C /* TestURLCredential.swift in Sources */,
5B13B3341C582D4C00651CE2 /* TestNSKeyedArchiver.swift in Sources */,
63FAA81F26C33DE500EE3DAD /* TestFileHandle+Async.swift in Sources */,
5B13B3441C582D4C00651CE2 /* TestNSSet.swift in Sources */,
3E55A2331F52463B00082000 /* TestUnit.swift in Sources */,
5B13B3321C582D4C00651CE2 /* TestIndexSet.swift in Sources */,
Expand Down
53 changes: 53 additions & 0 deletions Sources/Foundation/AsyncCharacterSequence.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
public struct AsyncCharacterSequence<Base: AsyncSequence>: AsyncSequence where Base.Element == UInt8 {
public typealias Element = Character

var underlying: AsyncUnicodeScalarSequence<Base>

@frozen
public struct AsyncIterator: AsyncIteratorProtocol {
@usableFromInline var remaining: AsyncUnicodeScalarSequence<Base>.AsyncIterator
@usableFromInline var accumulator = ""

@inlinable @inline(__always)
public mutating func next() async rethrows -> Character? {
while let scalar = try await remaining.next() {
accumulator.unicodeScalars.append(scalar)
if accumulator.count > 1 {
return accumulator.removeFirst()
}
}
return accumulator.count > 0 ? accumulator.removeFirst() : nil
}
}

public func makeAsyncIterator() -> AsyncIterator {
return AsyncIterator(remaining: underlying.makeAsyncIterator())
}

internal init(underlyingSequence: Base) {
underlying = AsyncUnicodeScalarSequence(underlyingSequence: underlyingSequence)
}
}

@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
public extension AsyncSequence where Self.Element == UInt8 {
/**
A non-blocking sequence of `Characters` created by decoding the elements of `self` as UTF8.
*/
var characters: AsyncCharacterSequence<Self> {
AsyncCharacterSequence(underlyingSequence: self)
}
}
166 changes: 166 additions & 0 deletions Sources/Foundation/AsyncLineSequence.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
public struct AsyncLineSequence<Base: AsyncSequence>: AsyncSequence where Base.Element == UInt8 {
public typealias Element = String

var base: Base

@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
public struct AsyncIterator: AsyncIteratorProtocol {
public typealias Element = String

var byteSource: Base.AsyncIterator
var buffer: Array<UInt8> = []
var leftover: UInt8? = nil

internal init(underlyingIterator: Base.AsyncIterator) {
byteSource = underlyingIterator
}

// We'd like to reserve flexibility to improve the implementation of
// next() in the future, so aren't marking it @inlinable. Manually
// specializing for the common source types helps us get back some of
// the performance we're leaving on the table.
@_specialize(where Base == URL.AsyncBytes)
@_specialize(where Base == FileHandle.AsyncBytes)
//@_specialize(where Base == URLSession.AsyncBytes)
public mutating func next() async rethrows -> String? {
/*
0D 0A: CR-LF
0A | 0B | 0C | 0D: LF, VT, FF, CR
E2 80 A8: U+2028 (LINE SEPARATOR)
E2 80 A9: U+2029 (PARAGRAPH SEPARATOR)
*/
let _CR: UInt8 = 0x0D
let _LF: UInt8 = 0x0A
let _NEL_PREFIX: UInt8 = 0xC2
let _NEL_SUFFIX: UInt8 = 0x85
let _SEPARATOR_PREFIX: UInt8 = 0xE2
let _SEPARATOR_CONTINUATION: UInt8 = 0x80
let _SEPARATOR_SUFFIX_LINE: UInt8 = 0xA8
let _SEPARATOR_SUFFIX_PARAGRAPH: UInt8 = 0xA9

func yield() -> String? {
defer {
buffer.removeAll(keepingCapacity: true)
}
if buffer.isEmpty {
return nil
}
return String(decoding: buffer, as: UTF8.self)
}

func nextByte() async throws -> UInt8? {
defer { leftover = nil }
if let leftover = leftover {
return leftover
}
return try await byteSource.next()
}

while let first = try await nextByte() {
switch first {
case _CR:
let result = yield()
// Swallow up any subsequent LF
guard let next = try await byteSource.next() else {
return result //if we ran out of bytes, the last byte was a CR
}
if next != _LF {
leftover = next
}
if let result = result {
return result
}
continue
case _LF..<_CR:
guard let result = yield() else {
continue
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgive me if I'm oversimplifying... there's a lot going on here. 😊 It looks like this completely removes empty lines, which seems suboptimal, given that...

  1. blank lines are meaningful in countless data applications, and
  2. it's easy enough to add your own isEmpty filter on the sequence if you really do want to strip them out.

Based on this, the default behavior should probably be to include empty lines (i.e., change continue to return "" in the _CR and _LF cases).

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If in doubt, please cross-check with Foundation's behavior. It needs to have the same semantics, else we widen the (already large) gap between APPLE and !APPLE.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yeah, good point. This does match Foundation's behavior, so the issue would be against Apple, not this code. Thanks for the check!

}
return result
case _NEL_PREFIX: // this may be used to compose other UTF8 characters
guard let next = try await byteSource.next() else {
// technically invalid UTF8 but it should be repaired to "\u{FFFD}"
buffer.append(first)
return yield()
}
if next != _NEL_SUFFIX {
buffer.append(first)
buffer.append(next)
} else {
guard let result = yield() else {
continue
}
return result
}
case _SEPARATOR_PREFIX:
// Try to read: 80 [A8 | A9].
// If we can't, then we put the byte in the buffer for error correction
guard let next = try await byteSource.next() else {
buffer.append(first)
return yield()
}
guard next == _SEPARATOR_CONTINUATION else {
buffer.append(first)
buffer.append(next)
continue
}
guard let fin = try await byteSource.next() else {
buffer.append(first)
buffer.append(next)
return yield()

}
guard fin == _SEPARATOR_SUFFIX_LINE || fin == _SEPARATOR_SUFFIX_PARAGRAPH else {
buffer.append(first)
buffer.append(next)
buffer.append(fin)
continue
}
if let result = yield() {
return result
}
continue
default:
buffer.append(first)
}
}
// Don't emit an empty newline when there is no more content (e.g. end of file)
if !buffer.isEmpty {
return yield()
}
return nil
}

}

public func makeAsyncIterator() -> AsyncIterator {
return AsyncIterator(underlyingIterator: base.makeAsyncIterator())
}

internal init(underlyingSequence: Base) {
base = underlyingSequence
}
}

@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
public extension AsyncSequence where Self.Element == UInt8 {
/**
A non-blocking sequence of newline-separated `Strings` created by decoding the elements of `self` as UTF8.
*/
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
var lines: AsyncLineSequence<Self> {
AsyncLineSequence(underlyingSequence: self)
}
}
Loading