Extract hostname from a URL
I found that there can be a lot of variation here, especially when running against a table of referrers. For this reason I created a SQL script that gets the host name from a web address that also covers all of the edge cases I found.
DECLARE @WebAddress varchar(300) = 'https://www.stevefenton.co.uk/2015/09/select-the-host-name-from-a-string-in-sql/'
SELECT
/* Get just the host name from a URL */
SUBSTRING(@WebAddress,
/* Starting Position (After any '//') */
(CASE WHEN CHARINDEX('//', @WebAddress)= 0 THEN 1 ELSE CHARINDEX('//', @WebAddress) + 2 END),
/* Length (ending on first '/' or on a '?') */
CASE
WHEN CHARINDEX('/', @WebAddress, CHARINDEX('//', @WebAddress) + 2) > 0 THEN CHARINDEX('/', @WebAddress, CHARINDEX('//', @WebAddress) + 2) - (CASE WHEN CHARINDEX('//', @WebAddress)= 0 THEN 1 ELSE CHARINDEX('//', @WebAddress) + 2 END)
WHEN CHARINDEX('?', @WebAddress, CHARINDEX('//', @WebAddress) + 2) > 0 THEN CHARINDEX('?', @WebAddress, CHARINDEX('//', @WebAddress) + 2) - (CASE WHEN CHARINDEX('//', @WebAddress)= 0 THEN 1 ELSE CHARINDEX('//', @WebAddress) + 2 END)
ELSE LEN(@WebAddress)
END
) AS 'HostName'
This will handle...
- An address starting
www.
(i.e. no scheme) - A address starting
//
- Host names that terminate with a
/
- Host names that terminate with a query string
Please use the below SQL code to extract the domain name:
DECLARE @WebUrl VARCHAR(35);
SET @WebUrl = 'http://stackoverflow.com/questions/ask?title=trim'
SELECT @WebUrl AS 'WebsiteURL',
LEFT(SUBSTRING(@WebUrl,
(CASE WHEN CHARINDEX('//',@WebUrl)=0
THEN 5
ELSE CHARINDEX('//',@WebUrl) + 2
END), 35),
(CASE
WHEN CHARINDEX('/', SUBSTRING(@WebUrl, CHARINDEX('//', @WebUrl) + 2, 35))=0
THEN LEN(@WebUrl)
else CHARINDEX('/', SUBSTRING(@WebUrl, CHARINDEX('//', @WebUrl) + 2, 35))- 1
END)
) AS 'Domain';