mySQL WHERE IN from JSON Array

Before JSON being introduced in MySQL, I use this:

  1. Ur original data: [1,2,3]

  2. After replace comma with '][': [1][2][3]

  3. Wrap ur id in '[]'

  4. Then use REVERSE LIKE instead of IN: WHERE '[1][2][3]' LIKE '%[1]%'

Answer to your question:

SELECT * FROM other_items 
WHERE
    REPLACE(SELECT items.data->"$.matrix[*].id" FROM items, ',', '][')
    LIKE CONCAT('%', CONCAT('[', id, ']'), '%')

Why wrap into '[]'

'[12,23,34]' LIKE '%1%' --> true
'[12,23,34]' LIKE '%12%' --> true

If wrap into '[]'

'[12][23][34]' LIKE '%[1]%' --> false
'[12][23][34]' LIKE '%[12]%' --> true

Take care that the accepted answer won't use index on tmp_other_items leading to slow performances for bigger tables.

In such case, I usually use an integers table, containing integers from 0 to an arbitrary fixed number N (below, about 1 million), and I join on that integers table to get the nth JSON element:

DROP TABLE IF EXISTS `integers`;
DROP TABLE IF EXISTS `tmp_items`;
DROP TABLE IF EXISTS `tmp_other_items`;

CREATE TABLE `integers` (`n` int NOT NULL PRIMARY KEY);
CREATE TABLE `tmp_items` (`id` int NOT NULL PRIMARY KEY AUTO_INCREMENT, `data` json NOT NULL);
CREATE TABLE `tmp_other_items` (`id` int NOT NULL PRIMARY KEY, `text` nvarchar(30) NOT NULL);

INSERT INTO `tmp_items` (`data`) 
VALUES 
    ('{ "matrix": [ { "id": 11 }, { "id": 12 }, { "id": 13 } ] }'),
   ('{ "matrix": [ { "id": 21 }, { "id": 22 }, { "id": 23 }, { "id": 24 } ] }'),
   ('{ "matrix": [ { "id": 31 }, { "id": 32 }, { "id": 33 }, { "id": 34 }, { "id": 35 } ] }')
;

-- Put a lot of rows in integers (~1M)
INSERT INTO `integers` (`n`) 
(
    SELECT 
        a.X
        + (b.X << 1)
        + (c.X << 2)
        + (d.X << 3)
        + (e.X << 4)
        + (f.X << 5)
        + (g.X << 6)
        + (h.X << 7)
        + (i.X << 8)
        + (j.X << 9)
        + (k.X << 10)
        + (l.X << 11)
        + (m.X << 12)
        + (n.X << 13)
        + (o.X << 14)
        + (p.X << 15)
        + (q.X << 16)
        + (r.X << 17)
        + (s.X << 18)
        + (t.X << 19) AS i
    FROM (SELECT 0 AS x UNION SELECT 1) AS a
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS b ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS c ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS d ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS e ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS f ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS g ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS h ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS i ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS j ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS k ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS l ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS m ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS n ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS o ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS p ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS q ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS r ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS s ON TRUE
        INNER JOIN (SELECT 0 AS x UNION SELECT 1) AS t ON TRUE)
;

-- Insert normal rows (a lot!)
INSERT INTO `tmp_other_items` (`id`, `text`) 
    (SELECT n, CONCAT('text for ', n) FROM integers);

Now you cna try again the accepted answer's query, which takes about 11seconds to run (but is simple):

-- Show join working (slow)
SELECT 
    t1.`id` AS json_table_id
,   t2.`id` AS joined_table_id
,   t2.`text` AS joined_table_text
FROM 
    (SELECT st1.id, st1.data->'$.matrix[*].id' as ids FROM `tmp_items` st1) t1
INNER JOIN `tmp_other_items` t2 ON JSON_CONTAINS(t1.ids, CAST(t2.`id` as JSON), '$')
;

And compare it to the faster approach of converting the JSON into a (temporary) table of ids, and then doing a JOIN over it (which lead to instant results, 0.000sec according to heidiSQL):

-- Fast
SELECT
    i.json_table_id,
    t2.id AS joined_table_id,
    t2.`text` AS joined_table_text
FROM (
    SELECT 
        j.json_table_id,
        -- Don't forget to CAST if needed, so the column type matches the index type
        -- Do an "EXPLAIN" and check its warnings if needed
        CAST(JSON_EXTRACT(j.ids, CONCAT('$[', i.n - 1, ']')) AS UNSIGNED) AS id
    FROM (
        SELECT 
            st1.id AS json_table_id,
            st1.data->'$.matrix[*].id' as ids,
            JSON_LENGTH(st1.data->'$.matrix[*].id') AS len
        FROM `tmp_items` AS st1) AS j
        INNER JOIN integers AS i ON i.n BETWEEN 1 AND len) AS i
    INNER JOIN tmp_other_items AS t2 ON t2.id = i.id
    ;

The most inner SELECT retrieves the list of JSON ids, along with their length (for outer join).

The 2nd inner SELECT takes this list of ids, and JOIN on the integers to retrieve the nth id of every JSON list, leading to a table of ids (instead of a table of jsons).

The outer most SELECT now only has to join this table of ids with the table containing the data you wanted.

Below is the same query using WHERE IN, to match the question title:

-- Fast (using WHERE IN)
SELECT t2.*
FROM tmp_other_items AS t2
WHERE t2.id IN (
    SELECT 
        CAST(JSON_EXTRACT(j.ids, CONCAT('$[', i.n - 1, ']')) AS UNSIGNED) AS id
    FROM (
        SELECT 
            st1.data->'$.matrix[*].id' as ids, 
            JSON_LENGTH(st1.data->'$.matrix[*].id') AS len
        FROM `tmp_items` AS st1) AS j
        INNER JOIN integers AS i ON i.n BETWEEN 1 AND len)
    ;

Starting from MySQL 8.0.13, there is MEMBER OF operator, which does exactly what you're looking for.

The query should be rewritten in the form of JOIN, though:

SELECT o.* FROM other_items o
JOIN items i ON o.id MEMBER OF(i.data->>'$.id')

If you want your query to have better performance, consider using multi-valued indexes on your JSON column.


Using of MEMBER OF() can be explained more clearly on the following example:

CREATE TABLE items ( data JSON );

INSERT INTO items
SET data = '{"id":[1,2,3]}';

That is how you find out whether the value is present in the JSON array:

SELECT * FROM items
WHERE 3 MEMBER OF(data->>'$.id');
+-------------------+
| data              |
+-------------------+
| {"id": [1, 2, 3]} |
+-------------------+
1 row in set (0.00 sec)

Note that type of the value matters in this case, unlike regular comparison. If you pass it in a form of string, there will be no match:

SELECT * FROM items
WHERE "3" MEMBER OF(data->>'$.id');

Empty set (0.00 sec)

Although regular comparison would return 1:

SELECT 3 = "3";
+---------+
| 3 = "3" |
+---------+
|       1 |
+---------+
1 row in set (0.00 sec)

Below is a complete answer. You may want a 'use <db_name>;' statement at the top of the script. The point is to show that JSON_CONTAINS() may be used to achieve the desired join.

DROP TABLE IF EXISTS `tmp_items`;
DROP TABLE IF EXISTS `tmp_other_items`;

CREATE TABLE `tmp_items` (`id` int NOT NULL PRIMARY KEY AUTO_INCREMENT, `data` json NOT NULL);
CREATE TABLE `tmp_other_items` (`id` int NOT NULL, `text` nvarchar(30) NOT NULL);

INSERT INTO `tmp_items` (`data`) 
VALUES 
    ('{ "matrix": [ { "id": 11 }, { "id": 12 }, { "id": 13 } ] }')
,   ('{ "matrix": [ { "id": 21 }, { "id": 22 }, { "id": 23 }, { "id": 24 } ] }')
,   ('{ "matrix": [ { "id": 31 }, { "id": 32 }, { "id": 33 }, { "id": 34 }, { "id": 35 } ] }')
;

INSERT INTO `tmp_other_items` (`id`, `text`) 
VALUES 
    (11, 'text for 11')
,   (12, 'text for 12')
,   (13, 'text for 13')
,   (14, 'text for 14 - never retrieved')
,   (21, 'text for 21')
,   (22, 'text for 22')
-- etc...
;

-- Show join working:
SELECT 
    t1.`id` AS json_table_id
,   t2.`id` AS joined_table_id
,   t2.`text` AS joined_table_text
FROM 
    (SELECT st1.id, st1.data->'$.matrix[*].id' as ids FROM `tmp_items` st1) t1
INNER JOIN `tmp_other_items` t2 ON JSON_CONTAINS(t1.ids, CAST(t2.`id` as json), '$')

You should see the following results:

Results

Tags:

Mysql

Arrays

Json