Dictionary output has array inside - arrays

I am trying on of the online tutorials to have a dictionary of nine numbers and create another dictionary with statistics, below is the code with the input data, and the result as well
import numpy as np
a = [0, 1, 2, 3, 4, 5, 6, 7, 8]
arr = np.array(a).reshape(3, 3).astype(int)
result = {
"mean": [],
"variance": [],
"standard deviation": [],
"max": [],
"min": [],
"sum": []
}
# Creating a function1
def calculate1(a):
calculate1 = arr.mean(axis = a)
return(calculate1)
result["mean"].append(calculate1(0))
result["mean"].append(calculate1(1))
result["mean"].append(calculate1(None))
# Creating a function2
def calculate2(a):
calculate2 = arr.var(axis = a)
return(calculate2)
result["variance"].append(calculate2(0))
result["variance"].append(calculate2(1))
result["variance"].append(calculate2(None))
# Creating a function3
def calculate3(a):
calculate3 = arr.std(axis = a)
return(calculate3)
result["standard deviation"].append(calculate3(0))
result["standard deviation"].append(calculate3(1))
result["standard deviation"].append(calculate3(None))
# Creating a function4
def calculate4(a):
calculate4 = arr.max(axis = a)
return(calculate4)
result["max"].append(calculate4(0))
result["max"].append(calculate4(1))
result["max"].append(calculate4(None))
# Creating a function5
def calculate5(a):
calculate5 = arr.min(axis = a)
return(calculate5)
result["min"].append(calculate5(0))
result["min"].append(calculate5(1))
result["min"].append(calculate5(None))
# Creating a function6
def calculate6(a):
calculate6 = arr.sum(axis = a)
return(calculate6)
result["sum"].append(calculate6(0))
result["sum"].append(calculate6(1))
result["sum"].append(calculate6(None))
for k, v in result.items():
print(k, v)
And here is the result
mean [array([3., 4., 5.]), array([1., 4., 7.]), 4.0]
variance [array([6., 6., 6.]), array([0.66666667, 0.66666667, 0.66666667]), 6.666666666666667]
standard deviation [array([2.44948974, 2.44948974, 2.44948974]), array([0.81649658, 0.81649658, 0.81649658]), 2.581988897471611]
max [array([6, 7, 8]), array([2, 5, 8]), 8]
min [array([0, 1, 2]), array([0, 3, 6]), 0]
sum [array([ 9, 12, 15]), array([ 3, 12, 21]), 36]
I have two questions here:
1- Is there a way that I can combine or minimize the number of functions to one or something like that. Please note that I (have to) use the function.
2- The output is correct (in values), however I am not sure why the word (array) is printing as well, and when I check the type of the values inside the dictionary, it shows that they are <class 'list'>, so where this array word is coming from?
I tried tolist value and plenty of online suggestions but nothing worked
Any help or suggestion is highly appreciated

You can store your functions inside a dict and then iterate over it:
from pprint import pprint
import numpy as np
def main():
arr = np.random.rand(3, 3)
functions = {
"mean": lambda axis: arr.mean(axis=axis),
"var": lambda axis: arr.var(axis=axis),
"std": lambda axis: arr.std(axis=axis),
"max": lambda axis: arr.max(axis=axis),
"min": lambda axis: arr.min(axis=axis),
"sum": lambda axis: arr.sum(axis=axis),
}
axes = (0, 1, None)
result = {}
for funcname, func in functions.items():
result[funcname] = [func(axis).tolist() for axis in axes]
# Alternatively:
result = {
funcname: [func(axis).tolist() for axis in axes]
for funcname, func in functions.items()
}
pprint(result)
if __name__ == "__main__":
main()
Prints:
{'max': [[0.33149413492721314, 0.9252576833729358, 0.9616249059176883],
[0.37580580905770067, 0.9616249059176883, 0.9252576833729358],
0.9616249059176883],
'mean': [[0.23391570323037428, 0.4063894010374775, 0.6764668740080081],
[0.20197437573445387, 0.4652236940918113, 0.6495739084495947],
0.43892399275862],
'min': [[0.0958037701384552, 0.13431354800720574, 0.37580580905770067],
[0.0958037701384552, 0.15959697173229104, 0.33149413492721314],
0.0958037701384552],
'std': [[0.10039824223253171, 0.3670404461719236, 0.23941075106262735],
[0.1239187264736742, 0.35412651334119355, 0.24424967197333333],
0.3170854368356986],
'sum': [[0.7017471096911229, 1.2191682031124325, 2.029400622024024],
[0.6059231272033616, 1.395671082275434, 1.948721725348784],
3.95031593482758],
'var': [[0.010079807043382115, 0.13471868912608476, 0.057317507724371324],
[0.015355850770857285, 0.12540558745119054, 0.05965790225908093],
0.10054317425328584]}
As for why there is "array" printed, it is because, e.g., np.mean(arr, axis=1) returns a numpy array.

Related

Plotting a list vs a list of arrays with matplotlib

Let's say I have two lists a and b, whereas one is a list of arrays
a = [1200, 1400, 1600, 1800]
b = [array([ 1.84714754, 4.94204658, 11.61580355, ..., 17.09772144,
17.09537562, 17.09499705]), array([ 3.08541849, 5.11338795, 10.26957508, ..., 16.90633304,
16.90417909, 16.90458781]), array([ 4.61916789, 4.58351918, 4.37590053, ..., -2.76705271,
-2.46715664, -1.94577492]), array([7.11040853, 7.79529924, 8.48873734, ..., 7.78736448, 8.47749987,
9.36040364])]
The shape of both is said to be (4,)
If I now try to plot these via plt.scatter(a, b)
I get an error I can't relate to: ValueError: setting an array element with a sequence.
At the end I want a plot where per n-th value in a a set of values stored as n-th array in b shall be plotted.
I'm pretty sure I've done this before, but I can't get this working.
Any ideas? ty
You need to adjust the elements in a to match the elements in b
len_b = [len(sub_array) for sub_array in b]
a = [repeat_a for i,repeat_a in enumerate(a) for _ in range(len_b[i])]
# convert list of array to just list of values
b = np.ravel(b).tolist()
# check if lengths are same
assert len(a) == len(b)
# if yes, now this should work
plt.scatter(a,b)
I am afraid repetition it is. If all lists in b have the same length, you can use numpy.repeat:
import numpy as np
import matplotlib.pyplot as plt
#fake data
np.random.seed(123)
a = [1200, 1400, 1600, 1800]
b = np.random.randint(1, 100, (4, 11)).tolist()
plt.scatter(np.repeat(a, len(b[0])), b)
plt.show()
If you are not sure and want to be on the safe side, list comprehension it is.
import numpy as np
import matplotlib.pyplot as plt
#fake data
np.random.seed(123)
a = [1200, 1400, 1600, 1800]
b = np.random.randint(1, 100, (4, 11)).tolist()
plt.scatter([[x]*len(b[i]) for i, x in enumerate(a)], b)
plt.show()
The output is the same:
Referring to the suggestion of #sai I tried
import numpy as np
arr0 = np.array([1, 2, 3, 4, 5])
arr1 = np.array([6, 7, 8, 9])
arr2 = np.array([10, 11])
old_b = [arr0, arr1, arr2]
b = np.ravel(old_b).tolist()
print(len(b))
Which will give me length 3 instead of the length 11 I expected. How can I collapse a list of arrays to a single list?
edit:
b = np.concatenate(old_b).ravel().tolist()
will lead to the desired result. Thanks all.

Numpy make the product between all elemens and then insert into a triangular 2d array

Suppose we got a 1D array below
arr = np.array([a,b,c])
The first thing I need to do is the make the product of all of the elments, i.e
[ab,ac,bc]
Then construct a 2d triangular array with this element
[
[a,ab,ac],
[0,b,bc],
[0,0,c]
]
Create a diagonal with your 1-D array and fill the upper triangle of it with upper triangle of outer:
out = np.diag(arr)
#upper triangle indices
uidx = np.triu_indices(arr.size,k=1)
#replacing upper triangle with outer
out[uidx]=np.outer(arr,arr)[uidx]
One way to do this is to calculate the outer product of your 1d array and then use masking informed by the knowledge that you only want the upper triangle of the 2d triangular matrix.
import numpy as np
a = np.array([5,4,3])
n = len(a)
outer = np.outer(a, a)
outer[np.tril_indices(n)] = 0
outer[np.diag_indices(n)] = a
outer
array([[ 5, 20, 15],
[ 0, 4, 12],
[ 0, 0, 3]])
We can use masking to achieve our desired result, like so -
def upper_outer(a):
out = a[:,None]*a
out[np.tri(len(a), k=-1, dtype=bool)] = 0
np.fill_diagonal(out,a)
return out
Sample run -
In [84]: a = np.array([3,6,2])
In [86]: upper_outer(a)
Out[86]:
array([[ 3, 18, 6],
[ 0, 6, 12],
[ 0, 0, 2]])
Benchmarking
Other approaches :
# #Nick Becker's soln
def tril_diag(a):
n = len(a)
outer = np.outer(a, a)
outer[np.tril_indices(n)] = 0
outer[np.diag_indices(n)] = a
return outer
# #Ehsan's soln
def triu_outer(arr):
out = np.diag(arr)
uidx = np.triu_indices(arr.size,k=1)
out[uidx]=np.outer(arr,arr)[uidx]
return out
Using benchit package (few benchmarking tools packaged together; disclaimer: I am its author) to benchmark proposed solutions.
import benchit
in_ = [np.random.rand(n) for n in [10,100,200,500,1000,5000]]
funcs = [upper_outer, tril_diag, triu_outer]
t = benchit.timings(funcs, in_)
t.rank()
t.plot(logx=True, save='timings.png')
For large datasets, we can also use numexpr to leverage multi-cores -
import numexpr as ne
def upper_outer_v2(a):
mask = ~np.tri(len(a), dtype=bool)
out = ne.evaluate('a2D*a*mask',{'a2D':a[:,None], 'a':a, 'mask':mask})
np.fill_diagonal(out,a)
return out
New timings plot :
There is a blas function for (almost) that:
# example
a = np.array([1.,2.,5.])
from scipy.linalg.blas import dsyr
# apply blas function; transpose since blas uses FORTRAN order
out = dsyr(1,a,1).T
# fix diagonal
out.reshape(-1)[::a.size+1] = a
out
# array([[ 1., 2., 5.],
# [ 0., 2., 10.],
# [ 0., 0., 5.]])
benchit (thanks #Divakar)

How to check if a numpy array is inside a Python sequence?

I'd like to check if a given array is inside a regular Python sequence (list, tuple, etc). For example, consider the following code:
import numpy as np
xs = np.array([1, 2, 3])
ys = np.array([4, 5, 6])
myseq = (xs, 1, True, ys, 'hello')
I would expect that simple membership checking with in would work, e.g.:
>>> xs in myseq
True
But apparently it fails if the element I'm trying to find isn't at the first position of myseq, e.g.:
>>> ys in myseq
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
So how can I perform this check?
If possible I'd like to do this without having to cast myseq into a numpy array or any other sort of data structure.
You can use any with the test that is appropriate:
import numpy as np
xs = np.array([1, 2, 3])
ys = np.array([4, 5, 6])
zs = np.array([7, 8, 9])
myseq = (xs, 1, True, ys, 'hello')
def arr_in_seq(arr, seq):
tp=type(arr)
return any(isinstance(e, tp) and np.array_equiv(e, arr) for e in seq)
Testing:
for x in (xs,ys,zs):
print(arr_in_seq(x,myseq))
True
True
False
This is probably not the most beatiful or fasted solution, but I think it works:
import numpy as np
def array_in_tuple(array, tpl):
i = 0
while i < len(tpl):
if isinstance(tpl[i], np.ndarray) and np.array_equal(array, tpl[i]):
return True
i += 1
return False
xs = np.array([1, 2, 3])
ys = np.array([4, 5, 6])
myseq = (xs, 1, True, ys, 'hello')
print(array_in_tuple(xs, myseq), array_in_tuple(ys, myseq), array_in_tuple(np.array([7, 8, 9]), myseq))

Turn array into array of arrays following structure of another array

I would like to turn an array into an array of arrays following another array of arrays. I'm not sure how to do this, here are the arrays:
orig_array = [[0,1],[4],[3],[],[3,2,6],[]]
my_array = [2,0,1,3,3,4,5]
wanted_array = [[2,0],[1],[3],[],[3,4,5],[]]
I would like to keep the empty arrays.
Thanks
Get the lengths of each element in orig_array, perform cumumlative summations along the length values to give us the indices at which my_array needs to be split and finally use np.split to actually perform the splitting. Thus, the implementation would look something like this -
lens = [len(item) for item in orig_array]
out = np.split(my_array,np.cumsum(lens))[:-1]
Sample run -
In [72]: orig_array = np.array([[0,1],[4],[3],[],[3,2,6],[]])
...: my_array = np.array([2,0,1,3,3,4,5])
...:
In [73]: lens = [len(item) for item in orig_array]
...: out = np.split(my_array,np.cumsum(lens))[:-1]
...:
In [74]: out
Out[74]:
[array([2, 0]),
array([1]),
array([3]),
array([], dtype=int64),
array([3, 4, 5]),
array([], dtype=int64)]
def do(format, values):
if type(format) == list:
return [do(v, values) for v in format]
else:
return values.pop(0)
print do(orig_array, my_array)
Note: this destroys the array where the values come from.
You could do the following:
import copy
def reflect_array(orig_array, order):
wanted_array = copy.deepcopy(orig_array)
for i, part_list in enumerate(orig_array):
for j, _ in enumerate(part_list):
wanted_array[i][j] = order.pop()
return wanted_array
Test run:
orig_array = [[0,1],[4],[3],[],[3,2,6],[]]
my_array = [2,0,1,3,3,4,5]
print reflect_array(orig_array, my_array)
# [[2, 0], [1], [3], [], [3, 4, 5], []]
In [858]: my_array = [2,0,1,3,3,4,5]
In [859]: [[my_array.pop(0) for _ in range(len(x))] for x in orig_array]
Out[859]: [[2, 0], [1], [3], [], [3, 4, 5], []]
Use b=my_array[:] if you don't want to change my_array.
This operates on the same principle as #karoly's answer; just more direct because it assumes only one level of nesting.

Perform different calculation on each element of an array

I have an array. I need to perform a different calculation on each element. I thought I could do something like the following:
def calc(a, b, c)
arr = [a, b, c]
arr.map { |i| (i[0] * 600), (i[1] * 800), (i[2] * 1000) }
end
calc(5, 8, 15)
but this does not work. How can I perform different calculations on each element of a single array?
Here are some other implementations that might be helpful. By putting the multipliers into an array, we can use zip to connect the element in the input array with the appropriate multiplier value. In addition, that makes it simple to abstract the logic further by removing the multiplier values from the logic that does the multiplication (in multiply_arrays and transform_arrays).
#!/usr/bin/env ruby
VALUES = [1, 1, 1]
MULTIPLIERS = [600, 800, 1000]
def transform(*values)
values.zip(MULTIPLIERS).map { |x, y| x * y }
end
def multiply_arrays(array1, array2)
array1.zip(array2).map { |n1, n2| n1 * n2 }
end
def transform_arrays(array1, array2, method_name)
array1.zip(array2).map { |n1, n2| n1.public_send(method_name, n2) }
end
p transform(*VALUES) # [600, 800, 1000]
p multiply_arrays(VALUES, MULTIPLIERS) # [600, 800, 1000]
p transform_arrays(VALUES, MULTIPLIERS, :*) # [600, 800, 1000]
If the calculations need to be substantially different (different operators, values, more complex logic), than I'd consider using an array of lambdas:
def transform_with_lambdas(values, transforms)
values.zip(transforms).map do |value, transform|
transform.(value)
end
end
TRANSFORMS = [
->(x) { x * 600 },
->(x) { x + 100 },
->(x) { x / 3.0 },
]
p transform_with_lambdas(VALUES, TRANSFORMS) # [600, 101, 0.3333333333333333]
Here is a solution which will help you to apply different operations on two different operands:
def calc(first_operand_arr, operator_arr, second_operand_arr)
result_arr = []
operator_arr.each_with_index do |o, i|
result_arr << (first_operand_arr[i]).method(o).(second_operand_arr[i])
end
result_arr
end
calc([5, 8, 15], ['+', '-', '*'], [5, 3, 2])
def calc *arr
ops = [600, 800, 1000]
arr.map { |x| x * ops.shift }
end
calc(5, 8, 15)
#=> [3000, 6400, 15000]
You could generalize this as follows:
def calc(*arr)
arr.map { |op1, op2, m| op1.send(m, op2) }
end
calc [5, 6, :*], [2, 3, :+], [10, 8, :-]
#=> [30, 5, 2]
Here is an option using a second array of lambda's that can be arbitrary functions of each entry in your main array.
operands = [1.0,2.0,3.0]
operations = [
->(e) { e * 10} ,
->(e) { e + 10 },
->(e) { e * e }
]
results = operands.each_with_index.map { |operand, index| operations[index].call(operand) }
puts results
Edit
I just noticed this is a really a variation on Keith Bennett's answer above, I will leave it here, since it is different in how the lambda is retrieved from the array.

Resources