Combine framework serialize async operations
Use flatMap(maxPublishers:transform:)
with .max(1)
, e.g.
func imagesPublisher(for urls: [URL]) -> AnyPublisher<UIImage, URLError> {
Publishers.Sequence(sequence: urls.map { self.imagePublisher(for: $0) })
.flatMap(maxPublishers: .max(1)) { $0 }
.eraseToAnyPublisher()
}
Where
func imagePublisher(for url: URL) -> AnyPublisher<UIImage, URLError> {
URLSession.shared.dataTaskPublisher(for: url)
.compactMap { UIImage(data: $0.data) }
.receive(on: RunLoop.main)
.eraseToAnyPublisher()
}
and
var imageRequests: AnyCancellable?
func fetchImages() {
imageRequests = imagesPublisher(for: urls).sink { completion in
switch completion {
case .finished:
print("done")
case .failure(let error):
print("failed", error)
}
} receiveValue: { image in
// do whatever you want with the images as they come in
}
}
That resulted in:
But we should recognize that you take a big performance hit doing them sequentially, like that. For example, if I bump it up to 6 at a time, it’s more than twice as fast:
Personally, I’d recommend only downloading sequentially if you absolutely must (which, when downloading a series of images/files, is almost certainly not the case). Yes, performing requests concurrently can result in them not finishing in a particular order, but we just use a structure that is order independent (e.g. a dictionary rather than a simple array), but the performance gains are so significant that it’s generally worth it.
But, if you want them downloaded sequentially, the maxPublishers
parameter can achieve that.
You could create custom Subscriber where receive returning Subscribers.Demand.max(1). In that case the subscriber will request next value only when received one. The example is for Int.publisher, but some random delay in map mimics network traffic :-)
import PlaygroundSupport
import SwiftUI
import Combine
class MySubscriber: Subscriber {
typealias Input = String
typealias Failure = Never
func receive(subscription: Subscription) {
print("Received subscription", Thread.current.isMainThread)
subscription.request(.max(1))
}
func receive(_ input: Input) -> Subscribers.Demand {
print("Received input: \(input)", Thread.current.isMainThread)
return .max(1)
}
func receive(completion: Subscribers.Completion<Never>) {
DispatchQueue.main.async {
print("Received completion: \(completion)", Thread.current.isMainThread)
PlaygroundPage.current.finishExecution()
}
}
}
(110...120)
.publisher.receive(on: DispatchQueue.global())
.map {
print(Thread.current.isMainThread, Thread.current)
usleep(UInt32.random(in: 10000 ... 1000000))
return String(format: "%02x", $0)
}
.subscribe(on: DispatchQueue.main)
.subscribe(MySubscriber())
print("Hello")
PlaygroundPage.current.needsIndefiniteExecution = true
Playground print ...
Hello
Received subscription true
false <NSThread: 0x600000064780>{number = 5, name = (null)}
Received input: 6e false
false <NSThread: 0x60000007cc80>{number = 9, name = (null)}
Received input: 6f false
false <NSThread: 0x60000007cc80>{number = 9, name = (null)}
Received input: 70 false
false <NSThread: 0x60000007cc80>{number = 9, name = (null)}
Received input: 71 false
false <NSThread: 0x60000007cc80>{number = 9, name = (null)}
Received input: 72 false
false <NSThread: 0x600000064780>{number = 5, name = (null)}
Received input: 73 false
false <NSThread: 0x600000064780>{number = 5, name = (null)}
Received input: 74 false
false <NSThread: 0x60000004dc80>{number = 8, name = (null)}
Received input: 75 false
false <NSThread: 0x60000004dc80>{number = 8, name = (null)}
Received input: 76 false
false <NSThread: 0x60000004dc80>{number = 8, name = (null)}
Received input: 77 false
false <NSThread: 0x600000053400>{number = 3, name = (null)}
Received input: 78 false
Received completion: finished true
UPDATE
finally i found .flatMap(maxPublishers: )
, which force me to update this interesting topic with little bit different approach. Please, see that I am using global queue for scheduling, not only some random delay, just to be sure that receiving serialized stream is not "random" or "lucky" behavior :-)
import PlaygroundSupport
import Combine
import Foundation
PlaygroundPage.current.needsIndefiniteExecution = true
let A = (1 ... 9)
.publisher
.flatMap(maxPublishers: .max(1)) { value in
[value].publisher
.flatMap { value in
Just(value)
.delay(for: .milliseconds(Int.random(in: 0 ... 100)), scheduler: DispatchQueue.global())
}
}
.sink { value in
print(value, "A")
}
let B = (1 ... 9)
.publisher
.flatMap { value in
[value].publisher
.flatMap { value in
Just(value)
.delay(for: .milliseconds(Int.random(in: 0 ... 100)), scheduler: RunLoop.main)
}
}
.sink { value in
print(" ",value, "B")
}
prints
1 A
4 B
5 B
7 B
1 B
2 B
8 B
6 B
2 A
3 B
9 B
3 A
4 A
5 A
6 A
7 A
8 A
9 A
Based on written here
.serialize()?
defined by Clay Ellis accepted answer could be replaced by
.publisher.flatMap(maxPublishers: .max(1)){$0}
while "unserialzed" version must use
.publisher.flatMap{$0}
"real world example"
import PlaygroundSupport
import Foundation
import Combine
let path = "postman-echo.com/get"
let urls: [URL] = "... which proves the downloads are happening serially .-)".map(String.init).compactMap { (parameter) in
var components = URLComponents()
components.scheme = "https"
components.path = path
components.queryItems = [URLQueryItem(name: parameter, value: nil)]
return components.url
}
//["https://postman-echo.com/get?]
struct Postman: Decodable {
var args: [String: String]
}
let collection = urls.compactMap { value in
URLSession.shared.dataTaskPublisher(for: value)
.tryMap { data, response -> Data in
return data
}
.decode(type: Postman.self, decoder: JSONDecoder())
.catch {_ in
Just(Postman(args: [:]))
}
}
extension Collection where Element: Publisher {
func serialize() -> AnyPublisher<Element.Output, Element.Failure>? {
guard let start = self.first else { return nil }
return self.dropFirst().reduce(start.eraseToAnyPublisher()) {
return $0.append($1).eraseToAnyPublisher()
}
}
}
var streamA = ""
let A = collection
.publisher.flatMap{$0}
.sink(receiveCompletion: { (c) in
print(streamA, " ", c, " .publisher.flatMap{$0}")
}, receiveValue: { (postman) in
print(postman.args.keys.joined(), terminator: "", to: &streamA)
})
var streamC = ""
let C = collection
.serialize()?
.sink(receiveCompletion: { (c) in
print(streamC, " ", c, " .serialize()?")
}, receiveValue: { (postman) in
print(postman.args.keys.joined(), terminator: "", to: &streamC)
})
var streamD = ""
let D = collection
.publisher.flatMap(maxPublishers: .max(1)){$0}
.sink(receiveCompletion: { (c) in
print(streamD, " ", c, " .publisher.flatMap(maxPublishers: .max(1)){$0}")
}, receiveValue: { (postman) in
print(postman.args.keys.joined(), terminator: "", to: &streamD)
})
PlaygroundPage.current.needsIndefiniteExecution = true
prints
.w.h i.c hporves ht edownloadsa erh appeninsg eriall y.-) finished .publisher.flatMap{$0}
... which proves the downloads are happening serially .-) finished .publisher.flatMap(maxPublishers: .max(1)){$0}
... which proves the downloads are happening serially .-) finished .serialize()?
Seem to me very useful in other scenarios as well. Try to use default value of maxPublishers in next snippet and compare the results :-)
import Combine
let sequencePublisher = Publishers.Sequence<Range<Int>, Never>(sequence: 0..<Int.max)
let subject = PassthroughSubject<String, Never>()
let handle = subject
.zip(sequencePublisher.print())
//.publish
.flatMap(maxPublishers: .max(1), { (pair) in
Just(pair)
})
.print()
.sink { letters, digits in
print(letters, digits)
}
"Hello World!".map(String.init).forEach { (s) in
subject.send(s)
}
subject.send(completion: .finished)
From the original question:
I did try making an array of the URLs and map it to an array of publishers. I know I can "produce" a publisher and cause it to publish on down the pipeline using
flatMap
. But then I'm still doing all the downloading simultaneously. There isn't any Combine way to walk the array in a controlled manner — or is there?
Here's a toy example to stand in for the real problem:
let collection = (1 ... 10).map {
Just($0).delay(
for: .seconds(Double.random(in:1...5)),
scheduler: DispatchQueue.main)
.eraseToAnyPublisher()
}
collection.publisher
.flatMap() {$0}
.sink {print($0)}.store(in:&self.storage)
This emits the integers from 1 to 10 in random order arriving at random times. The goal is to do something with collection
that will cause it to emit the integers from 1 to 10 in order.
Now we're going to change just one thing: in the line
.flatMap {$0}
we add the maxPublishers
parameter:
let collection = (1 ... 10).map {
Just($0).delay(
for: .seconds(Double.random(in:1...5)),
scheduler: DispatchQueue.main)
.eraseToAnyPublisher()
}
collection.publisher
.flatMap(maxPublishers:.max(1)) {$0}
.sink {print($0)}.store(in:&self.storage)
Presto, we now do emit the integers from 1 to 10, in order, with random intervals between them.
Let's apply this to the original problem. To demonstrate, I need a fairly slow Internet connection and a fairly large resource to download. First, I'll do it with ordinary .flatMap
:
let eph = URLSessionConfiguration.ephemeral
let session = URLSession(configuration: eph)
let url = "https://photojournal.jpl.nasa.gov/tiff/PIA23172.tif"
let collection = [url, url, url]
.map {URL(string:$0)!}
.map {session.dataTaskPublisher(for: $0)
.eraseToAnyPublisher()
}
collection.publisher.setFailureType(to: URLError.self)
.handleEvents(receiveOutput: {_ in print("start")})
.flatMap() {$0}
.map {$0.data}
.sink(receiveCompletion: {comp in
switch comp {
case .failure(let err): print("error", err)
case .finished: print("finished")
}
}, receiveValue: {_ in print("done")})
.store(in:&self.storage)
The result is
start
start
start
done
done
done
finished
which shows that we are doing the three downloads simultaneously. Okay, now change
.flatMap() {$0}
to
.flatMap(maxPublishers:.max(1) {$0}
The result now is:
start
done
start
done
start
done
finished
So we are now downloading serially, which is the problem originally to be solved.
append
In keeping with the principle of TIMTOWTDI, we can instead chain the publishers with append
to serialize them:
let collection = (1 ... 10).map {
Just($0).delay(
for: .seconds(Double.random(in:1...5)),
scheduler: DispatchQueue.main)
.eraseToAnyPublisher()
}
let pub = collection.dropFirst().reduce(collection.first!) {
return $0.append($1).eraseToAnyPublisher()
}
The result is a publisher that serializes the delayed publishers in the original collection. Let's prove it by subscribing to it:
pub.sink {print($0)}.store(in:&self.storage)
Sure enough, the integers now arrive in order (with random intervals between).
We can encapsulate the creation of pub
from a collection of publishers with an extension on Collection, as suggested by Clay Ellis:
extension Collection where Element: Publisher {
func serialize() -> AnyPublisher<Element.Output, Element.Failure>? {
guard let start = self.first else { return nil }
return self.dropFirst().reduce(start.eraseToAnyPublisher()) {
return $0.append($1).eraseToAnyPublisher()
}
}
}
I've only briefly tested this, but at first pass it appears that each request waits for the previous request to finish before starting.
I'm posting this solution in search of feedback. Please be critical if this isn't a good solution.
extension Collection where Element: Publisher {
func serialize() -> AnyPublisher<Element.Output, Element.Failure>? {
// If the collection is empty, we can't just create an arbititary publisher
// so we return nil to indicate that we had nothing to serialize.
if isEmpty { return nil }
// We know at this point that it's safe to grab the first publisher.
let first = self.first!
// If there was only a single publisher then we can just return it.
if count == 1 { return first.eraseToAnyPublisher() }
// We're going to build up the output starting with the first publisher.
var output = first.eraseToAnyPublisher()
// We iterate over the rest of the publishers (skipping over the first.)
for publisher in self.dropFirst() {
// We build up the output by appending the next publisher.
output = output.append(publisher).eraseToAnyPublisher()
}
return output
}
}
A more concise version of this solution (provided by @matt):
extension Collection where Element: Publisher {
func serialize() -> AnyPublisher<Element.Output, Element.Failure>? {
guard let start = self.first else { return nil }
return self.dropFirst().reduce(start.eraseToAnyPublisher()) {
$0.append($1).eraseToAnyPublisher()
}
}
}