Find Index Given Multiple Values Of Array With Numpy

May 30, 2024 Post a Comment

I understand that Numpy can generate index of an array given the value that we are looking for with numpy.where. My question: Is there a function that can generate index given mult

Solution 1:

Seems like I give this answer once a week or so. Fastest and most memory-efficient way is to use void views over as_strided views

defrolling_window(a, window):  #based on @senderle's answer: https://stackoverflow.com/q/7100242/2901002
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    c = np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
    return c

defvview(a):  #based on @jaime's answer: https://stackoverflow.com/a/16973510/4427777return np.ascontiguousarray(a).view(np.dtype((np.void, a.dtype.itemsize * a.shape[1])))

defpattwhere_void(pattern, a): # Using @PaulPanzer's template form above
    k, n = map(len, (pattern, a))
    pattern = np.atleast_2d(pattern)
    a = np.asanyarray(a)
    if k>n:return np.empty([0], int)
    return np.flatnonzero(np.in1d(vview(rolling_window(a, k)), vview(pattern)))

Solution 2:

This should work and be quite efficient:

a = np.array([1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0])
a0 = a==0
np.where(a0[:-3] & a0[1:-2] & a0[2:-1] & a0[3:])

(array([6, 12], dtype=int64),) # indices of first of 4 consecutive 0's

Solution 3:

Here are three different approaches.

Approach 1: linear correlation

import numpy as np

def pattwhere_corr(pattern, a):
    pattern, a = map(np.asanyarray, (pattern, a))
    k = len(pattern)
    if k>len(a):
        return np.empty([0], int)
    n = np.dot(pattern, pattern)
    a2 = a*a
    slf = a2[:k].sum() + np.r_[0, np.cumsum(a2[k:] - a2[:-k])]
    crs = np.correlate(a, pattern, 'valid')
    return np.flatnonzero(np.isclose(slf, n) & np.isclose(crs, n))

Approach 2: whittle down element by element

def pattwhere_sequ(pattern, a):
    pattern, a = map(np.asanyarray, (pattern, a))
    k = len(pattern)
    if k>len(a):
        return np.empty([0], int)
    hits = np.flatnonzero(a ==pattern[-1])
    for p inpattern[-2::-1]:
        hits -=1
        hits = hits[a[hits] == p]
    return hits

Approach 3: brute force

defpattwhere_direct(pattern, a):
    pattern, a = map(np.asanyarray, (pattern, a))
    k, n = map(len, (pattern, a))
    if k>n:
        return np.empty([0], int)
    astr = np.lib.stride_tricks.as_strided(a, (n-k+1, k), 2*a.strides)
    return np.flatnonzero((astr == pattern).all(axis=1))

Some testing:

k, n, p = 4, 100, 5
pattern, a = np.random.randint(0, p, (k,)), np.random.randint(0, p, (n,))

print('results consistent:',
      np.all(pattwhere_sequ(pattern, a) == pattwhere_corr(pattern, a)) &
      np.all(pattwhere_sequ(pattern, a) == pattwhere_direct(pattern, a)))

from timeit import timeit

for k, n, p in [(4, 100, 5), (10, 1000000, 4), (1000, 10000, 3)]:
    print('k, n, p = ', k, n, p)
    pattern, a = np.random.randint(0, p, (k,)), np.random.randint(0, p, (n,))

    glb = {'pattern': pattern, 'a': a}
    kwds = {'number': 1000, 'globals': glb}
    for name, glb['func'] inlist(locals().items()):
        ifnot name.startswith('pattwhere'):
            continueprint(name.replace('pattwhere_', '').ljust(8), '{:8.6f} ms'.format(
            timeit('func(pattern, a)', **kwds)))

Sample output. Note that these benchmarks are in a scenario where pattern occurs with random frequency. Results may change if it is for example overrepresented.

results consistent:Truek,n,p=41005corr0.090752mssequ0.015759msdirect0.023338msk,n,p=1010000004corr39.290270mssequ8.182161msdirect34.399724msk,n,p=1000 100003corr6.319400mssequ2.225807msdirect9.001689ms

Solution 4:

Try this code !

This will return you the range of consecutive zero location of numpy array . So you can replace the zero with any integer values in between these range.

Code :

import numpy as np
from itertools import groupby
a = np.array([1.,0.,0.,0.,1.,1.,0.,0.,0.,0.,1.,1.])
b = range(len(a))
for group in groupby(iter(b), lambda x: a[x]):
    if group[0]==0:
        lis=list(group[1])
        print([min(lis),max(lis)])

Output :

[1, 3]
[6, 9]

Solution 5:

Not the fastest method, but you can generate a robust solution using scipy for n-dimensional arrays and n-dimensional patterns.

import scipy
from scipy.ndimage import label


#=================# Helper functions#=================# Nested list to nested tuple helper function# from https://stackoverflow.com/questions/27049998/convert-a-mixed-nested-list-to-a-nested-tupledefto_tuple(L):
    returntuple(to_tuple(i) ifisinstance(i, list) else i for i in L)

# Helper function to convert array to set of tuplesdefarr2set(arr):
    returnset(to_tuple(arr.tolist()))

#===============# Main algorithm#===============# First pass: filter for exact matches
a1 = scipy.zeros_like(a, dtype=bool)
freq_dict = {}
notnan = ~scipy.isnan(pattern)
for i in scipy.unique(pattern[notnan]):
    a1 = a1 + (a == i)
    freq_dict[i] = (pattern == i).sum()

# Minimise amount of pattern checking by choosing least frequent occurrence
check_val = freq_dict.keys()[scipy.argmin(freq_dict.values())]

# Get set of indices of pattern
pattern_inds = scipy.transpose(scipy.nonzero(scipy.ones_like(pattern)*notnan))
check_ind = scipy.transpose(scipy.nonzero(pattern == check_val))[0]
pattern_inds = pattern_inds - check_ind
pattern_inds_set = arr2set(pattern_inds)

# Label different regions found in first pass which may contains pattern
label_arr, n = label(a1)

found_inds_list = []
pattern_size = len(pattern_inds)

for i inrange(1, n+1):
    arr_inds = scipy.transpose(scipy.nonzero(label_arr == i))
    bbox_inds = [ind for ind in arr_inds if a[tuple(ind)] == check_val]
    for ind in bbox_inds:
        check_inds_set = arr2set(arr_inds - ind)
        iflen(pattern_inds_set - check_inds_set) == 0:
            found_inds_list.append(tuple(scipy.transpose(pattern_inds + ind)))

# Replace valuesfor inds in found_inds_list:
    a[inds] = replace_value

Generate a random test array, pattern, and final replace value for a 4D case

replace_value = scipy.random.rand() # Final value that you want to replace everything with
nan = scipy.nan # Use this for places in the rectangular pattern array that you don't care about checking# Generate random data
a = scipy.random.random([12,12,12,12])*12
pattern = scipy.random.random([3,3,3,3])*12

# Put the pattern in random places
for i in range(4):
    j1, j2, j3, j4 = scipy.random.choice(xrange(10), 4, replace=True)
    a[j1:j1+3, j2:j2+3, j3:j3+3, j4:j4+3] = pattern

a_org = scipy.copy(a)

# Randomly insert nans in the pattern
for i in range(20):
    j1, j2, j3, j4 = scipy.random.choice(xrange(3), 4, replace=True)
    pattern[j1, j2, j3, j4] = nan

After running the main algorithm...

>>>print found_inds_list[-1]
(array([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
        9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
       10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
       11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11], dtype=int64), array([1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1,
       1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3],
      dtype=int64), array([5, 5, 5, 6, 6, 6, 7, 7, 5, 5, 6, 6, 7, 7, 7, 5, 5, 6, 6, 7, 5, 5,
       5, 6, 6, 7, 7, 5, 5, 6, 7, 7, 7, 5, 5, 5, 6, 6, 6, 7, 7, 7, 5, 5,
       5, 6, 6, 7, 5, 5, 5, 6, 6, 6, 7, 5, 5, 6, 6, 6, 7, 7, 7],
      dtype=int64), array([1, 2, 3, 1, 2, 3, 1, 3, 1, 3, 1, 3, 1, 2, 3, 2, 3, 1, 2, 2, 1, 2,
       3, 1, 2, 1, 3, 1, 2, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2,
       3, 1, 3, 1, 1, 2, 3, 1, 2, 3, 2, 1, 3, 1, 2, 3, 1, 2, 3],
      dtype=int64))
>>>>>>replace_value # Display value that's going to be replaced
0.9263912485289564
>>>>>>print a_org[9:12, 1:4, 5:8, 1:4] # Display original rectangular window of replacement
[[[[ 9.68507479  1.77585089  5.06069382]
   [10.63768984 11.41148096  1.13120712]
   [ 6.83684611  2.46838238 11.40490158]]

  [[ 9.17344668 11.21669704  7.60737639]
   [ 3.14870787  6.22857282  5.61295454]
   [ 4.32709261  8.00493326  9.96124294]]

  [[ 4.16785078 10.66054365  2.95677408]
   [11.53789218  2.70725911 11.98647139]
   [ 5.00346525  4.75230895  4.05213149]]]


 [[[11.23856096  8.45979355  7.53268864]
   [ 6.14703327 11.90052117  5.48127994]
   [ 2.16777734 10.27373562  7.75420214]]

  [[10.04726853 11.44895046  7.78071007]
   [ 0.79030038  3.69735083  1.51921116]
   [11.29782542  2.58494314  9.8714708 ]]

  [[ 7.9356587   1.48053473  9.71362122]
   [ 5.11866341  3.43895455  6.86491947]
   [ 8.33774813  5.66923131  2.27884056]]]


 [[[ 0.75091443  2.02917445  5.68207987]
   [ 4.58299978  7.14960394  9.13853129]
   [10.60912932  4.52190424  0.6557605 ]]

  [[ 0.54393627  8.02341744 11.69489975]
   [ 9.09878676 10.60836714  2.41188805]
   [ 9.13098333  6.12284334  8.9349382 ]]

  [[ 5.84489355 10.19848245  1.65080169]
   [ 2.75161562  1.05154552  0.17804374]
   [ 3.3166642  10.74081484  5.13601563]]]]
>>>>>>print a[9:12, 1:4, 5:8, 1:4] # Same window in the replaced array
[[[[ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  2.46838238  0.92639125]]

  [[ 0.92639125 11.21669704  0.92639125]
   [ 0.92639125  6.22857282  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]]

  [[ 4.16785078  0.92639125  0.92639125]
   [ 0.92639125  0.92639125 11.98647139]
   [ 5.00346525  0.92639125  4.05213149]]]


 [[[ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  0.92639125  5.48127994]
   [ 0.92639125 10.27373562  0.92639125]]

  [[ 0.92639125  0.92639125  7.78071007]
   [ 0.79030038  0.92639125  1.51921116]
   [ 0.92639125  0.92639125  0.92639125]]

  [[ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]]]


 [[[ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  7.14960394  0.92639125]
   [ 0.92639125  4.52190424  0.6557605 ]]

  [[ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]
   [ 9.13098333  0.92639125  8.9349382 ]]

  [[ 0.92639125 10.19848245  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]
   [ 0.92639125  0.92639125  0.92639125]]]]
>>>>>>print pattern # The pattern that was matched and replaced
[[[[ 9.68507479  1.77585089  5.06069382]
   [10.63768984 11.41148096  1.13120712]
   [ 6.83684611         nan 11.40490158]]

  [[ 9.17344668         nan  7.60737639]
   [ 3.14870787         nan  5.61295454]
   [ 4.32709261  8.00493326  9.96124294]]

  [[        nan 10.66054365  2.95677408]
   [11.53789218  2.70725911         nan]
   [        nan  4.75230895         nan]]]


 [[[11.23856096  8.45979355  7.53268864]
   [ 6.14703327 11.90052117         nan]
   [ 2.16777734         nan  7.75420214]]

  [[10.04726853 11.44895046         nan]
   [        nan  3.69735083         nan]
   [11.29782542  2.58494314  9.8714708 ]]

  [[ 7.9356587   1.48053473  9.71362122]
   [ 5.11866341  3.43895455  6.86491947]
   [ 8.33774813  5.66923131  2.27884056]]]


 [[[ 0.75091443  2.02917445  5.68207987]
   [ 4.58299978         nan  9.13853129]
   [10.60912932         nan         nan]]

  [[ 0.54393627  8.02341744 11.69489975]
   [ 9.09878676 10.60836714  2.41188805]
   [        nan  6.12284334         nan]]

  [[ 5.84489355         nan  1.65080169]
   [ 2.75161562  1.05154552  0.17804374]
   [ 3.3166642  10.74081484  5.13601563]]]]

Learn Python Tutorials