How to replace a list of values in a numpy array?

EDIT: I implemented a TensorFlow version of this in this answer (almost exactly the same, except replacements are a dict).


Here is a simple way to do it:

import numpy as np

numbers = np.arange(0,40)
np.random.shuffle(numbers)
problem_numbers = [33, 23, 15]  # table, night_stand, plant
alternative_numbers = [12, 14, 26]  # desk, dresser, flower_pot

# Replace values
problem_numbers = np.asarray(problem_numbers)
alternative_numbers = np.asarray(alternative_numbers)
n_min, n_max = numbers.min(), numbers.max()
replacer = np.arange(n_min, n_max + 1)
# Mask replacements out of range
mask = (problem_numbers >= n_min) & (problem_numbers <= n_max)
replacer[problem_numbers[mask] - n_min] = alternative_numbers[mask]
numbers = replacer[numbers - n_min]

This works well an should be efficient as long as the range of the values in numbers (the difference between the smallest and the biggest) is not huge (e.g you don't have something like 1, 7 and 10000000000).

Benchmarking

I've compared the code in the OP with the three (as of now) proposed solutions with this code:

import numpy as np

def method_itzik(numbers, problem_numbers, alternative_numbers):
    numbers = np.asarray(numbers)
    for i in range(len(problem_numbers)):
        idx = numbers == problem_numbers[i]
        numbers[idx] = alternative_numbers[i]
    return numbers

def method_mseifert(numbers, problem_numbers, alternative_numbers):
    numbers = np.asarray(numbers)
    replacer = dict(zip(problem_numbers, alternative_numbers))
    numbers_list = numbers.tolist()
    numbers = np.array(list(map(replacer.get, numbers_list, numbers_list)))
    return numbers

def method_divakar(numbers, problem_numbers, alternative_numbers):
    numbers = np.asarray(numbers)
    problem_numbers = np.asarray(problem_numbers)
    problem_numbers = np.asarray(alternative_numbers)
    # Pre-process problem_numbers and correspondingly alternative_numbers
    # such that repeats and no matches are taken care of
    sidx_pn = problem_numbers.argsort()
    pn = problem_numbers[sidx_pn]
    mask = np.concatenate(([True],pn[1:] != pn[:-1]))
    an = alternative_numbers[sidx_pn]

    minN, maxN = numbers.min(), numbers.max()
    mask &= (pn >= minN) & (pn <= maxN)

    pn = pn[mask]
    an = an[mask]

    # Pre-pocessing done. Now, we need to use pn and an in place of
    # problem_numbers and alternative_numbers repectively. Map, index and assign.
    sidx = numbers.argsort()
    idx = sidx[np.searchsorted(numbers, pn, sorter=sidx)]
    valid_mask = numbers[idx] == pn
    numbers[idx[valid_mask]] = an[valid_mask]

def method_jdehesa(numbers, problem_numbers, alternative_numbers):
    numbers = np.asarray(numbers)
    problem_numbers = np.asarray(problem_numbers)
    alternative_numbers = np.asarray(alternative_numbers)
    n_min, n_max = numbers.min(), numbers.max()
    replacer = np.arange(n_min, n_max + 1)
    # Mask replacements out of range
    mask = (problem_numbers >= n_min) & (problem_numbers <= n_max)
    replacer[problem_numbers[mask] - n_min] = alternative_numbers[mask]
    numbers = replacer[numbers - n_min]
    return numbers

The results:

import numpy as np

np.random.seed(100)

MAX_NUM = 100000
numbers = np.random.randint(0, MAX_NUM, size=100000)
problem_numbers = np.unique(np.random.randint(0, MAX_NUM, size=500))
alternative_numbers = np.random.randint(0, MAX_NUM, size=len(problem_numbers))

%timeit method_itzik(numbers, problem_numbers, alternative_numbers)
10 loops, best of 3: 63.3 ms per loop

# This method expects lists
problem_numbers_l = list(problem_numbers)
alternative_numbers_l = list(alternative_numbers)
%timeit method_mseifert(numbers, problem_numbers_l, alternative_numbers_l)
10 loops, best of 3: 20.5 ms per loop

%timeit method_divakar(numbers, problem_numbers, alternative_numbers)
100 loops, best of 3: 9.45 ms per loop

%timeit method_jdehesa(numbers, problem_numbers, alternative_numbers)
1000 loops, best of 3: 822 µs per loop

In case not all problem_values are in numbers and they may even occur multiple times:

In that case I would just use a dict to keep the values to be replaced and use dict.get to translate problematic numbers:

replacer = dict(zip(problem_numbers, alternative_numbers))
numbers_list = numbers.tolist()
numbers = np.array(list(map(replacer.get, numbers_list, numbers_list)))

Even though it has to go "through Python" this is almost self-explaining and it's not much slower than a NumPy solution (probably).

In case every problem_value is actually present in the numbers array and only once:

If you have the numpy_indexed package you could simply use numpy_indexed.indices:

>>> import numpy_indexed as ni
>>> numbers[ni.indices(numbers, problem_numbers)] = alternative_numbers

That should be pretty efficient even for big arrays.