How to write a case when like statement in numpy array

This statement can be write using np.select as

import numpy as np

residual = np.random.rand(10) -0.3 # -0.3 to get some negative values
condlist = [(residual>=0.0)&(residual<=0.5), (residual>=0.5)&(residual<=0.7), residual>0.7]
choicelist = [-2*1.2*residual, -2*1.0*residual,-2*2.0*residual]

residual = np.select(condlist, choicelist, default=residual)

Note that, when multiple conditions are satisfied in condlist, the first one encountered is used. When all conditions evaluate to False, it will use the default value. Moreover, for your information, you need to use bitwise operator & on boolean numpy arrays as and python keyword won't work on them.

Let's benchmark these answers :

Click to copy

residual = np.random.rand(10000) -0.3

def charl_3where(residual):
    residual = np.where((residual>=0.0)&(residual<=0.5), -2*1.2*residual, residual)
    residual = np.where((residual>=0.5)&(residual<=0.7), -2*1.0*residual, residual)
    residual = np.where(residual>0.7, -2*2.0*residual, residual)
    return residual

def yaco_select(residual):
    condlist = [(residual>=0.0)&(residual<=0.5), (residual>=0.5)&(residual<=0.7), residual>0.7]
    choicelist = [-2*1.2*residual, -2*1.0*residual,-2*2.0*residual]
    residual = np.select(condlist, choicelist, default=residual)
    return residual


%timeit charl_3where(residual)
>>> 112 µs ± 1.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

%timeit yaco_select(residual)
>>> 141 µs ± 2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

let's try to optimize these with numba

Click to copy

from numba import jit

@jit(nopython=True)
def yaco_numba(residual):
    out = np.empty_like(residual)
    for i in range(residual.shape[0]):
        if residual[i]<0.0 :
            out[i] = residual[i]
        elif residual[i]<=0.5 :
            out[i] = -2*1.2*residual[i]
        elif residual[i]<=0.7:
            out[i] = -2*1.0*residual[i]
        else: # residual>0.7
            out[i] = -2*2.0*residual[i]        
    return out

%timeit yaco_numba(residual)
>>> 6.65 µs ± 123 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

Final check

Click to copy

res1 = charl_3where(residual)
res2 = yaco_select(residual)
res3 = yaco_numba(residual)
np.allclose(res1,res3)
>>> True
np.allclose(res2,res3)
>>> True

This one is about 15x faster than the previously best one. Hope this helps.

You can use the syntax (condition1) & (condition2) in np.where() calls, so you would modify your function's np.where() calls like so:

Click to copy

def custom_asymmetric_train(y_true, y_pred):
    residual = (y_true - y_pred).astype("float")
    residual = np.where((residual>=0.0)&(residual<=0.5), -2*1.2*residual, residual)
    residual = np.where((residual>=0.5)&(residual<=0.7), -2*1.0*residual, residual)
    residual = np.where(residual>0.7, -2*2.0*residual, residual)
    ...

The first argument is the condition to meet, the second argument is the value to use if the condition is met, the third argument is the value to use if the condition is not met.

You can also use vectorization since conditions are mutually exclusive:

Click to copy

residual = (y_true - y_pred).astype(float)
m1 = (residual>=0.0)&(residual<=0.5)
m2 = (residual>=0.5)&(residual<=0.7)
m3 = (residual >0.7)

new_residual = -2*(m1 *1.2 *residual + m2*residual + m3*2.0*residual)
return new_residual

This will have the following performances:

Click to copy

residual = np.random.rand(10000) -0.3
def speed_test(residual):

    residual = (y_true - y_pred).astype(float)

    m1 = (residual>=0.0)&(residual<=0.5)
    m2 = (residual>=0.5)&(residual<=0.7)
    m3 = residual >0.7
    return -2*(m1 *1.2 *residual + m2*residual + m3*2.0*residual)

%timeit speed_test(residual)
123 µs ± 35.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

How to write a case when like statement in numpy array

Tags:

Python

Numpy

Numpy Ndarray

Related

Recent Posts