我深入研究了这个问题,但我设法想出了一个扩展方法,我认为它回答了你的问题:
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Linq;
public static class ReadOnlySequenceExtensions
{
public static SequencePosition? LastPositionOf(
this ReadOnlySequence<byte> source,
byte[] delimiter)
{
if (delimiter == null)
{
throw new ArgumentNullException(nameof(delimiter));
}
if (!delimiter.Any())
{
throw new ArgumentException($"{nameof(delimiter)} is empty", nameof(delimiter));
}
var reader = new SequenceReader<byte>(source);
var delimiterToFind = new ReadOnlySpan<byte>(delimiter);
var delimiterFound = false;
// Keep reading until we've consumed all delimiters
while (reader.TryReadTo(out _, delimiterToFind, true))
{
delimiterFound = true;
}
if (!delimiterFound)
{
return null;
}
// If we got this far, we've consumed bytes up to,
// and including, the last byte of the delimiter,
// so we can use that to get the position of
// the starting byte of the delimiter
return reader.Sequence.GetPosition(reader.Consumed - delimiter.Length);
}
}
这里还有一些测试用例:
var cases = new List<byte[]>
{
// Case 1: Check an empty array
new byte[0],
// Case 2: Check an array with no delimiter
new byte[] { 0xf },
// Case 3: Check an array with part of the delimiter
new byte[] { 0x1c },
// Case 4: Check an array with the other part of the delimiter
new byte[] { 0x0d },
// Case 5: Check an array with the delimiter in the wrong order
new byte[] { 0x0d, 0x1c },
// Case 6: Check an array with a correct delimiter
new byte[] { 0x1c, 0x0d },
// Case 7: Check an array with a byte followed by a correct delimiter
new byte[] { 0x1, 0x1c, 0x0d },
// Case 8: Check an array with multiple correct delimiters
new byte[] { 0x1, 0x1c, 0x0d, 0x2, 0x1c, 0x0d },
// Case 9: Check an array with multiple correct delimiters
// where the delimiter isn't the last byte
new byte[] { 0x1, 0x1c, 0x0d, 0x2, 0x1c, 0x0d, 0x3 },
// Case 10: Check an array with multiple sequential bytes of a delimiter
new byte[] { 0x1, 0x1c, 0x0d, 0x2, 0x1c, 0x1c, 0x0d, 0x3 },
};
var delimiter = new byte[] { 0x1c, 0x0d };
foreach (var item in cases)
{
var source = new ReadOnlySequence<byte>(item);
var result = source.LastPositionOf(delimiter);
} // Put a breakpoint here and examine result
Cases 1
to 5
全部正确返回null
. Cases 6
to 10
全部正确返回SequencePosition
到分隔符中的第一个字节(即在这种情况下,0x1c
).
我还尝试创建一个迭代版本,在找到分隔符后会产生一个位置,如下所示:
while (reader.TryReadTo(out _, delimiterToFind, true))
{
yield return reader.Sequence.GetPosition(reader.Consumed - delimiter.Length);
}
But SequenceReader<T>
and ReadOnlySpan<T>
不能在迭代器块中使用,所以我想出了AllPositionsOf
反而:
public static IEnumerable<SequencePosition> AllPositionsOf(
this ReadOnlySequence<byte> source,
byte[] delimiter)
{
if (delimiter == null)
{
throw new ArgumentNullException(nameof(delimiter));
}
if (!delimiter.Any())
{
throw new ArgumentException($"{nameof(delimiter)} is empty", nameof(delimiter));
}
var reader = new SequenceReader<byte>(source);
var delimiterToFind = new ReadOnlySpan<byte>(delimiter);
var results = new List<SequencePosition>();
while (reader.TryReadTo(out _, delimiterToFind, true))
{
results.Add(reader.Sequence.GetPosition(reader.Consumed - delimiter.Length));
}
return results;
}
测试用例也能正常工作。
Update
现在我已经睡了一些觉,并且有机会思考一些事情,我认为上述问题可以改进,原因如下:
-
SequenceReader<T>
has a Rewind()
方法,这让我思考SequenceReader<T>
被设计为可重复使用
-
SequenceReader<T>
似乎是为了让它更容易使用而设计的ReadOnlySequence<T>
一般而言
- 创建扩展方法
ReadOnlySequence<T>
为了使用SequenceReader<T>
读取ReadOnlySequence<T>
似乎倒退
鉴于上述情况,我认为尝试避免直接与ReadOnlySequence<T>
在可能的情况下,优先选择并重复使用,SequenceReader<T>
反而。考虑到这一点,这是一个不同的版本LastPositionOf
现在是一个扩展方法SequenceReader<T>
:
public static class SequenceReaderExtensions
{
/// <summary>
/// Finds the last occurrence of a delimiter in a given sequence.
/// </summary>
/// <param name="reader">The reader to read from.</param>
/// <param name="delimiter">The delimeter to look for.</param>
/// <param name="rewind">If true, rewinds the reader to its position prior to this method being called.</param>
/// <returns>A SequencePosition if a delimiter is found, otherwise null.</returns>
public static SequencePosition? LastPositionOf(
this ref SequenceReader<byte> reader,
byte[] delimiter,
bool rewind)
{
if (delimiter == null)
{
throw new ArgumentNullException(nameof(delimiter));
}
if (!delimiter.Any())
{
throw new ArgumentException($"{nameof(delimiter)} is empty", nameof(delimiter));
}
var delimiterToFind = new ReadOnlySpan<byte>(delimiter);
var consumed = reader.Consumed;
var delimiterFound = false;
// Keep reading until we've consumed all delimiters
while (reader.TryReadTo(out _, delimiterToFind, true))
{
delimiterFound = true;
}
if (!delimiterFound)
{
if (rewind)
{
reader.Rewind(reader.Consumed - consumed);
}
return null;
}
// If we got this far, we've consumed bytes up to,
// and including, the last byte of the delimiter,
// so we can use that to get the starting byte
// of the delimiter
var result = reader.Sequence.GetPosition(reader.Consumed - delimiter.Length);
if (rewind)
{
reader.Rewind(reader.Consumed - consumed);
}
return result;
}
}
上面的测试用例继续通过,但我们现在可以重用相同的reader
。此外,它还允许您指定是否要倒带到原始位置reader
在被调用之前。