numpy slicing using user defined input - arrays

I have (in a larger project) data contained in numpy.array.
Based on user input I need to move a selected axis (dimAxisNr) to the first dimension of the array and slice one or more (including the first) dimension based on user input (such as Select2 and Select0 in the example).
Using this input I generate a DataSelect which contains the information needed to slice. But the output size of the sliced array is different from the one using inline indexing. So basically I need a way to generate the '37:40:2' and '0:2' from an input list.
import numpy as np
dimAxisNr = 1
Select2 = [37,39]
Select0 = [0,1]
plotData = np.random.random((102,72,145,2))
DataSetSize = np.shape(plotData)
DataSelect = [slice(0,item) for item in DataSetSize]
DataSelect[2] = np.array(Select2)
DataSelect[0] = np.array(Select0)
def shift(seq, n):
n = n % len(seq)
return seq[n:] + seq[:n]
#Sort and Slice the data
print(np.shape(plotData))
print(DataSelect)
plotData = np.transpose(plotData, np.roll(range(plotData.ndim),-dimAxisNr))
DataSelect = shift(DataSelect,dimAxisNr)
print(DataSelect)
print(np.shape(plotData))
plotData = plotData[DataSelect]
print(np.shape(plotData))
plotDataDirect = plotData[slice(0, 72, None), 37:40:2, slice(0, 2, None), 0:2]
print(np.shape(plotDataDirect))

I'm not sure I've understood your question at all...
But if the question is "How do I generate a slice based on a list of indices like [37,39,40,23] ?"
then I would answer : you don't have to, just use the list as is to select the right indices, like so :
a = np.random.rand(4,5)
print(a)
indices = [2,3,1]
print(a[0:2,indices])
Note that the sorting of the list matters: [2,3,1] yields a different result from [1,2,3]
Output :
>>> a
array([[ 0.47814802, 0.42069094, 0.96244966, 0.23886243, 0.86159478],
[ 0.09248812, 0.85569145, 0.63619014, 0.65814667, 0.45387509],
[ 0.25933109, 0.84525826, 0.31608609, 0.99326598, 0.40698516],
[ 0.20685221, 0.1415642 , 0.21723372, 0.62213483, 0.28025124]])
>>> a[0:2,[2,3,1]]
array([[ 0.96244966, 0.23886243, 0.42069094],
[ 0.63619014, 0.65814667, 0.85569145]])

I have found the answer to my question. I need to use numpy.ix_.
Here is the working code:
import numpy as np
dimAxisNr = 1
Select2 = [37,39]
Select0 = [0,1]
plotData = np.random.random((102,72,145,2))
DataSetSize = np.shape(plotData)
DataSelect = [np.arange(0,item) for item in DataSetSize]
DataSelect[2] = Select2
DataSelect[0] = Select0
#print(list(37:40:2))
def shift(seq, n):
n = n % len(seq)
return seq[n:] + seq[:n]
#Sort and Slice the data
print(np.shape(plotData))
print(DataSelect)
plotData = np.transpose(plotData, np.roll(range(plotData.ndim),-dimAxisNr))
DataSelect = shift(DataSelect,dimAxisNr)
plotDataSlice = plotData[np.ix_(*DataSelect)]
print(np.shape(plotDataSlice))
plotDataDirect = plotData[slice(0, 72, None), 37:40:2, slice(0, 2, None), 0:1]
print(np.shape(plotDataDirect))

Related

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

I am trying to identify Global Feature Relationships with SHAP values. The SHAP library returns three matrices and I am trying to select the SHAP matrix however, I am getting this error: "IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed".
The code I have is below:
df_score = spark.sql("select * from sandbox.yt_trng_churn_device")
#XGBoost Model
import pickle
from xgboost import XGBClassifier
from mlflow.tracking import MlflowClient
client = MlflowClient()
local_dir = "/dbfs/FileStore/"
local_path = client.download_artifacts
model_path = '/dbfs/FileStore/'
model = XGBClassifier()
model = pickle.load(open(model_path, 'rb'))
HorizonDate = datetime.datetime(2022, 9, 5)
df = df_score
score_data = df.toPandas()
results = model.predict_proba(score_data)
results_l = model.predict(score_data)
score_data["p"]=pd.Series( (v[1] for v in results) )
score_data["l"]=pd.Series( (v for v in results_l) )
spark.createDataFrame(score_data).createOrReplaceTempView("yt_vw_tmp_dev__scores")
spark.sql("create or replace table sandbox.yt_vw_tmp_dev__scores as select * from yt_vw_tmp_dev__scores")
#SHAP Analysis on XGBoost
from shap import KernelExplainer, summary_plot
sql = """
select d_a.*
from
hive_metastore.sandbox.yt_trng_device d_a
right join
(select decile, msisdn, MSISDN_L2L
from(
select ntile(10) over (order by p desc) as decile, msisdn, MSISDN_L2L
from sandbox.yt_vw_tmp_dev__scores
) inc
order by decile) d_b
on d_a.MSISDN_L2L = d_b.MSISDN_L2L and d_a.msisdn = d_b.msisdn
"""
df = spark.sql(sql).drop('msisdn', 'imei', 'imsi', 'event_date', 'MSISDN_L2L', 'account_id')
score_df = df.toPandas()
mode = score_df.mode().iloc[0]
sample = score_df.sample(n=min(100, score_df.shape[0]), random_state=508502835).fillna(mode)
predict = lambda x: model.predict(pd.DataFrame(x, columns=score_df.columns))
explainer = KernelExplainer(predict, sample, link="identity")
shap_values = explainer.shap_values(sample, l1_reg=False)
# The return of the explainer has three matrices, we will get the shap values one
shap_values = shap_values[ :, :, 0]
I am fairly new to coding but it would be great if someone could give some direction on this

RandomizedSearchCV - IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

I'm using HDBSCAN clustering algorithm and using RandomizedSearchCV. When I fit the features with labels, I get error "IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed". Shape of embedding is (5000,4) and of hdb_labels is (5000,). Below is my code
# UMAP
umap_hdb = umap.UMAP(n_components=4, random_state = 42)
embedding = umap_hdb.fit_transform(customer_data_hdb)
# creating HDBSCAN wrapper
class HDBSCANWrapper(hdbscan.HDBSCAN):
def predict(self,X):
return self.labels_.astype(int)
# HBDSCAN
clusterer_hdb = HDBSCANWrapper(min_samples=40, min_cluster_size=1000, metric='manhattan', gen_min_span_tree=True).fit(embedding)
hdb_labels = clusterer_hdb.labels_
# specify parameters and distributions to sample from
param_dist = {'min_samples': [10,30,50,60,100,150],
'min_cluster_size':[100,200,300,400,500],
'cluster_selection_method' : ['eom','leaf'],
'metric' : ['euclidean','manhattan']
}
# validity_scroer
validity_scorer = make_scorer(hdbscan.validity.validity_index,greater_is_better=True)
n_iter_search = 20
random_search = RandomizedSearchCV(clusterer_hdb
,param_distributions=param_dist
,n_iter=n_iter_search
,scoring=validity_scorer
,random_state=42)
random_search.fit(embedding, hdb_labels)
I'm getting an error in the random_search.fit and could not get rid of it. Any suggestions/help would be appreciated.

Storing numpy.ndarrays from a loop

I am trying to store the numpy.ndarrays defined as x_c, y_c, and z_c for every iteration of the loop:
for z_value in np.arange(0, 5, 1):
ms.set_current_mesh(0)
planeoffset : float = z_value
ms.compute_planar_section(planeaxis = 'Z Axis', planeoffset = planeoffset)
m = ms.current_mesh()
matrix_name = m.vertex_matrix()
x_c = matrix_name[:,0]
y_c = matrix_name[:,1]
z_c = matrix_name[:,2]
I would like to be able to recall the three arrays at any z_value, preferably with reference to the z_value i.e x_c # z_value = 2 or similar.
Thanks for any help!
p.s very new to coding, so please go easy on me.
You have to store each array in an external variable, for example a dictionary
x_c={}
y_c={}
z_c={}
for z_value in np.arange(0, 5, 1):
ms.set_current_mesh(0)
planeoffset = float(z_value)
ms.compute_planar_section(planeaxis = 'Z Axis', planeoffset = planeoffset)
m = ms.current_mesh()
m.compact()
print(m.vertex_number(), "vertices in Planar Section Z =", planeoffset)
matrix_name = m.vertex_matrix()
x_c[planeoffset] = matrix_name[:,0]
y_c[planeoffset] = matrix_name[:,1]
z_c[planeoffset] = matrix_name[:,2]
Please, ensure you call m.compact() before accessing the vertex_matrix or you will get a MissingCompactnessException error. Please, note that it is not the same to store anything in x_c[2] or in x_c[2.0], so choose if your index has to be integers o floats and keep the same type (in this example, they are floats).
Later, you can recall values like this:
print("X Values with z=2.0")
print(x_c[2.0])

Python, face_recognition convert string to array

I want to convert a variable to a string and then to an array that I can use to compare, but i dont know how to do that.
my code:
import face_recognition
import numpy as np
a = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_10_32_24_Pro.jpg') # my picture 1
b = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_09_48_56_Pro.jpg') # my picture 2
c = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_09_48_52_Pro.jpg') # my picture 3
d = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\ziv sion.jpg') # my picture 4
e = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191120_17_46_40_Pro.jpg') # my picture 5
f = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191117_16_19_11_Pro.jpg') # my picture 6
a = face_recognition.face_encodings(a)[0]
b = face_recognition.face_encodings(b)[0]
c = face_recognition.face_encodings(c)[0]
d = face_recognition.face_encodings(d)[0]
e = face_recognition.face_encodings(e)[0]
f = face_recognition.face_encodings(f)[0]
Here I tried to convert the variable to a string
str_variable = str(a)
array_variable = np.array(str_variable)
my_face = a, b, c, d, e, f, array_variable
while True:
new = input('path: ')
print('Recognizing...')
unknown = face_recognition.load_image_file(new)
unknown_encodings = face_recognition.face_encodings(unknown)[0]
The program cannot use the variable:
results = face_recognition.compare_faces(array_variable, unknown_encodings, tolerance=0.4)
print(results)
recognize_times = int(results.count(True))
if (3 <= recognize_times):
print('hello boss!')
my_face = *my_face, unknown_encodings
please help me
The error shown:
Traceback (most recent call last):
File "C:/Users/zivsi/PycharmProjects/AI/pytt.py", line 37, in <module>
results = face_recognition.compare_faces(my_face, unknown_encodings, tolerance=0.4)
File "C:\Users\zivsi\AppData\Local\Programs\Python\Python36\lib\site-
packages\face_recognition\api.py", line 222, in compare_faces
return list(face_distance(known_face_encodings, face_encoding_to_check) <= tolerance)
File "C:\Users\zivsi\AppData\Local\Programs\Python\Python36\lib\site-packages\face_recognition\api.py", line 72, in face_distance
return np.linalg.norm(face_encodings - face_to_compare, axis=1)
ValueError: operands could not be broadcast together with shapes (7,) (128,)
First of all, the array_variable should actually be a list of the known encodings and not a numpy array.
Also you do not need str.
Now, in your case, if the input images i.e., a,b,c,d,f,e do NOT have the same dimensions, the error will persist. You can not compare images that have different sizes using this function. The reason is that the comparison is based on the distance and distance is defined on vectors of the same length.
Here is a working simple example using the photos from https://github.com/ageitgey/face_recognition/tree/master/examples:
import face_recognition
import numpy as np
from PIL import Image, ImageDraw
from IPython.display import display
# Load a sample picture and learn how to recognize it.
obama_image = face_recognition.load_image_file("obama.jpg")
obama_face_encoding = face_recognition.face_encodings(obama_image)[0]
# Load a second sample picture and learn how to recognize it.
biden_image = face_recognition.load_image_file("biden.jpg")
biden_face_encoding = face_recognition.face_encodings(biden_image)[0]
array_variable = [obama_face_encoding,biden_face_encoding] # list of known encodings
# compare the list with the biden_face_encoding
results = face_recognition.compare_faces(array_variable, biden_face_encoding, tolerance=0.4)
print(results)
[False, True] # True means match, False mismatch
# False: coming from obama_face_encoding VS biden_face_encoding
# True: coming from biden_face_encoding VS biden_face_encoding
To run it go here: https://beta.deepnote.com/project/09705740-31c0-4d9a-8890-269ff1c3dfaf#
Documentation: https://face-recognition.readthedocs.io/en/latest/face_recognition.html
EDIT
To save the known encodings you can use numpy.save
np.save('encodings',biden_face_encoding) # save
load_again = np.load('encodings.npy') # load again

Accessing instance variables inside an array

I am trying to access a specific value inside an array. The array contains specific class instance variables and is as follows:
[[#<Supermarket:0x007f8e989daef8 #id=1, #name="Easybuy">,
#<Delivery:0x007f8e989f98a8 #type=:standard, #price=5.0>],
[#<Supermarket:0x007f8e99039f88 #id=2, #name="Walmart">,
#<Delivery:0x007f8e989f98a8 #type=:standard, #price=5.0>],
[#<Supermarket:0x007f8e9901a390 #id=3, #name="Forragers">,
#<Delivery:0x007f8e989eae20 #type=:express, #price=10.0>]]
I want to iterate over each array inside the array and find out how many Delivery's within the array have #type:standard. Is this possible? Thank you in advance
array_of_array.inject(0) do |sum, array|
sum + array.count { |el| el.class == Delivery && el.instance_variable_get(:#type) == :standard }
end
You can use select() to filter the elements of an array.
Reconstructing your data:
require 'ostruct'
require 'pp'
supermarket_data = [
['Easybuy', 1],
['Walmart', 2],
['Forragers', 3],
]
supermarkets = supermarket_data.map do |(name, id)|
supermarket = OpenStruct.new
supermarket.name = name
supermarket.id = id
supermarket
end
delivery_data = [
['standard', 5.0],
['standard', 5.0],
['express', 10.0],
]
deliveries = delivery_data.map do |(type, price)|
delivery = OpenStruct.new
delivery.type = type
delivery.price = price
delivery
end
combined = supermarkets.zip deliveries
pp combined
[[#<OpenStruct name="Easybuy", id=1>,
#<OpenStruct type="standard", price=5.0>],
[#<OpenStruct name="Walmart", id=2>,
#<OpenStruct type="standard", price=5.0>],
[#<OpenStruct name="Forragers", id=3>,
#<OpenStruct type="express", price=10.0>]]
Filtering the array with select():
standard_deliveries = combined.select do |(supermarket, delivery)|
delivery.type == 'standard'
end
pp standard_deliveries # pretty print
p standard_deliveries.count
[[#<OpenStruct name="Easybuy", id=1>,
#<OpenStruct type="standard", price=5.0>],
[#<OpenStruct name="Walmart", id=2>,
#<OpenStruct type="standard", price=5.0>]]
2

Resources