From dd59d5caa51b216d2fd2ca27bd2374cb834ec73e Mon Sep 17 00:00:00 2001 From: George Barnett Date: Mon, 13 Sep 2021 13:29:12 +0100 Subject: [PATCH] Improve performance of `HTTPHeaders.subscript(canonicalForm:)` Motivation: When getting the canonical form of header values any ascii whitespace is stripped from the values. The current implementation does this by doing equality checks on `Character` which is quite expensive. Modifications: - Update the `Substring.trimWhitespace()` function to trim on a UTF8 view Result: Retrieving the canonical form of header values is cheaper, significantly so when values contain whitespace. --- Sources/NIOHTTP1/HTTPTypes.swift | 33 ++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/Sources/NIOHTTP1/HTTPTypes.swift b/Sources/NIOHTTP1/HTTPTypes.swift index ddc7c1430e..50009dff03 100644 --- a/Sources/NIOHTTP1/HTTPTypes.swift +++ b/Sources/NIOHTTP1/HTTPTypes.swift @@ -540,28 +540,37 @@ extension HTTPHeaders: RandomAccessCollection { } } -extension Character { +extension UTF8.CodeUnit { var isASCIIWhitespace: Bool { - return self == " " || self == "\t" || self == "\r" || self == "\n" || self == "\r\n" + switch self { + case UInt8(ascii: " "), + UInt8(ascii: "\t"), + UInt8(ascii: "\r"), + UInt8(ascii: "\n"): + return true + + default: + return false + } } } extension String { func trimASCIIWhitespace() -> Substring { - return self.dropFirst(0).trimWhitespace() + return Substring(self).trimWhitespace() } } -private extension Substring { - func trimWhitespace() -> Substring { - var me = self - while me.first?.isASCIIWhitespace == .some(true) { - me = me.dropFirst() - } - while me.last?.isASCIIWhitespace == .some(true) { - me = me.dropLast() +extension Substring { + fileprivate func trimWhitespace() -> Substring { + guard let firstNonWhitespace = self.utf8.firstIndex(where: { !$0.isASCIIWhitespace }) else { + // The whole substring is ASCII whitespace. + return Substring() } - return me + + // There must be at least one non-ascii whitespace character, so banging here is safe. + let lastNonWhitespace = self.utf8.lastIndex(where: { !$0.isASCIIWhitespace })! + return Substring(self.utf8[firstNonWhitespace...lastNonWhitespace]) } }