Get Unicode characters with charcode values greater hex `FFFF`
The function that works for Unicode characters outside the basic multilingual plane (BMP) is WorksheetFunction.Unichar()
. This example converts cells containing hexadecimal into their Unicode equivalent:
Sub Convert()
For i = 1 To Selection.Cells.Count
n = WorksheetFunction.Hex2Dec(Selection.Cells(i).Text)
Selection.Cells(i) = WorksheetFunction.Unichar(n)
Next
End Sub
Original selection before running macro:
After running macro:
If your Excel is older and WorksheetFunction
is not available, building UTF-16 surrogates manually works, too:
Sub Convert()
For i = 1 To Selection.Cells.Count
n = CLng("&H" + Selection.Cells(i).Text) 'Convert hexadecimal text to integer
If n < &H10000 Then 'BMP characters
Selection.Cells(i) = ChrW(n)
Else
'UTF-16 hi/lo surrogate conversion
'Algorithm:
'1. Code point - 10000h (max U+10FFFF give 9FFFF...20 bits)
'2. In binary, but 10 bits in first surrogate (x) and 10 in 2nd surrogate (y)
' 110110xxxxxxxxxx 110111yyyyyyyyyy
tmp = n - &H10000
h = &HD800 + Int(tmp / (2 ^ 10)) 'bitwise right shift by 10
l = &HDC00 + (tmp And &H3FF) 'bitwise AND of last 10 bits
Selection.Cells(i) = ChrW(h) + ChrW(l)
End If
Next
End Sub
Something like this should work. Most code I didn't write, but I knew what to look for. Basically map the Hex to the byte array equivalent, then get the string back.
Option Explicit
'Pulled from https://www.di-mgt.com.au/howto-convert-vba-unicode-to-utf8.html
''' Maps a character string to a UTF-16 (wide character) string
Private Declare PtrSafe Function MultiByteToWideChar Lib "kernel32" ( _
ByVal CodePage As Long, _
ByVal dwFlags As Long, _
ByVal lpMultiByteStr As LongPtr, _
ByVal cchMultiByte As Long, _
ByVal lpWideCharStr As LongPtr, _
ByVal cchWideChar As Long _
) As Long
' CodePage constant for UTF-8
Private Const CP_UTF8 = 65001
''' Return length of byte array or zero if uninitialized
Private Function BytesLength(abBytes() As Byte) As Long
' Trap error if array is uninitialized
On Error Resume Next
BytesLength = UBound(abBytes) - LBound(abBytes) + 1
End Function
''' Return VBA "Unicode" string from byte array encoded in UTF-8
Public Function Utf8BytesToString(abUtf8Array() As Byte) As String
Dim nBytes As Long
Dim nChars As Long
Dim strOut As String
Utf8BytesToString = ""
' Catch uninitialized input array
nBytes = BytesLength(abUtf8Array)
If nBytes <= 0 Then Exit Function
' Get number of characters in output string
nChars = MultiByteToWideChar(CP_UTF8, 0&, VarPtr(abUtf8Array(0)), nBytes, 0&, 0&)
' Dimension output buffer to receive string
strOut = String(nChars, 0)
nChars = MultiByteToWideChar(CP_UTF8, 0&, VarPtr(abUtf8Array(0)), nBytes, StrPtr(strOut), nChars)
Utf8BytesToString = Left$(strOut, nChars)
End Function
'Grabbed from https://stackoverflow.com/questions/28798759/how-convert-hex-string-into-byte-array-in-vb6
Private Function HexToBytes(ByVal HexString As String) As Byte()
'Quick and dirty hex String to Byte array. Accepts:
'
' "HH HH HH"
' "HHHHHH"
' "H HH H"
' "HH,HH, HH" and so on.
Dim Bytes() As Byte
Dim HexPos As Integer
Dim HexDigit As Integer
Dim BytePos As Integer
Dim Digits As Integer
ReDim Bytes(Len(HexString) \ 2) 'Initial estimate.
For HexPos = 1 To Len(HexString)
HexDigit = InStr("0123456789ABCDEF", _
UCase$(Mid$(HexString, HexPos, 1))) - 1
If HexDigit >= 0 Then
If BytePos > UBound(Bytes) Then
'Add some room, we'll add room for 4 more to decrease
'how often we end up doing this expensive step:
ReDim Preserve Bytes(UBound(Bytes) + 4)
End If
Bytes(BytePos) = Bytes(BytePos) * &H10 + HexDigit
Digits = Digits + 1
End If
If Digits = 2 Or HexDigit < 0 Then
If Digits > 0 Then BytePos = BytePos + 1
Digits = 0
End If
Next
If Digits = 0 Then BytePos = BytePos - 1
If BytePos < 0 Then
Bytes = "" 'Empty.
Else
ReDim Preserve Bytes(BytePos)
End If
HexToBytes = Bytes
End Function
Example call
Public Sub ExampleLock()
Dim LockBytes() As Byte
LockBytes = HexToBytes("F0 9F 94 92") ' Lock Hex representation, found by -->http://www.ltg.ed.ac.uk/~richard/utf-8.cgi
Sheets(1).Range("A1").Value = Utf8BytesToString(LockBytes) ' Output
End Sub
Here's what is outputting to A1.