Swift Calculate MD5 Checksum for Large Files
Solution (based on Martin R's answer) for SHA256 hash:
func sha256(url: URL) -> Data? {
do {
let bufferSize = 1024 * 1024
// Open file for reading:
let file = try FileHandle(forReadingFrom: url)
defer {
file.closeFile()
}
// Create and initialize SHA256 context:
var context = CC_SHA256_CTX()
CC_SHA256_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update SHA256 context:
while autoreleasepool(invoking: {
// Read up to `bufferSize` bytes
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_SHA256_Update(&context, $0, numericCast(data.count))
}
// Continue
return true
} else {
// End of file
return false
}
}) { }
// Compute the SHA256 digest:
var digest = Data(count: Int(CC_SHA256_DIGEST_LENGTH))
digest.withUnsafeMutableBytes {
_ = CC_SHA256_Final($0, &context)
}
return digest
} catch {
print(error)
return nil
}
}
Usage with instance of type URL
with name fileURL
previously created:
if let digestData = sha256(url: fileURL) {
let calculatedHash = digestData.map { String(format: "%02hhx", $0) }.joined()
DDLogDebug(calculatedHash)
}
Since iOS13
'CC_MD5_Init' was deprecated in iOS 13.0
You may replace the code with CryptoKit
import Foundation
import CryptoKit
extension URL {
func checksumInBase64() -> String? {
let bufferSize = 16*1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: self)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var md5 = CryptoKit.Insecure.MD5()
// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
md5.update(data: data)
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
let data = Data(md5.finalize())
return data.base64EncodedString()
} catch {
error_log(error)
return nil
}
}
}
You can compute the MD5 checksum in chunks, as demonstrated e.g. in Is there a MD5 library that doesn't require the whole input at the same time?.
Here is a possible implementation using Swift (now updated for Swift 5)
import CommonCrypto
func md5File(url: URL) -> Data? {
let bufferSize = 1024 * 1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: url)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0.baseAddress, numericCast(data.count))
}
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
var digest: [UInt8] = Array(repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
_ = CC_MD5_Final(&digest, &context)
return Data(digest)
} catch {
print("Cannot open file:", error.localizedDescription)
return nil
}
}
The autorelease pool is needed to release the memory returned by
file.readData()
, without it the entire (potentially huge) file
would be loaded into memory. Thanks to Abhi Beckert for noticing that
and providing an implementation.
If you need the digest as a hex-encoded string then change the
return type to String?
and replace
return digest
by
let hexDigest = digest.map { String(format: "%02hhx", $0) }.joined()
return hexDigest