How to change the order in dimensions xarray.Dataset? - dataset

I am creating a xarray dataset as below:
import numpy as np
import xarray as xr
x_example = np.random.rand(1488,)
y_example = np.random.rand(1331,)
time_example = np.random.rand(120,)
rainfall_example = np.random.rand(120, 1331, 1488)
rainfall_dataset = xr.Dataset(
data_vars=dict(
rainfall_depth=(['time', 'y', 'x'], rainfall_example),
),
coords=dict(
time=(['time'], time_example),
x=(['x'], x_example),
y=(['y'], y_example)
)
)
The results are like this
And the dimensions when I run rainfall_example.dims are like this Frozen({'time': 120, 'y': 1331, 'x': 1488}) (this can also be seen in the above results). I know the xarray.Dataset.dims cannot be modified according to here
My question is: How can we change the order of those dimensions into the dimensions like this Frozen({'time': 120, 'x': 1488, 'y': 1331}) without changing anything else (everything will be the same only the order in dimensions is changed)?

You can reorder your coordinates and variables by selecting them both in order using a list:
In [3]: rainfall_dataset[["time", "y", "x", "rainfall_depth"]]
Out[3]:
<xarray.Dataset>
Dimensions: (time: 120, y: 1331, x: 1488)
Coordinates:
* time (time) float64 0.2848 0.7556 0.9501 ... 0.694 0.734 0.198
* y (y) float64 0.1941 0.1132 0.2504 ... 0.1501 0.5085 0.006135
* x (x) float64 0.2776 0.4504 0.1886 ... 0.4071 0.3327 0.5555
Data variables:
rainfall_depth (time, y, x) float64 ...

Related

Plotting many pie charts using a loop to create a single figure using matplotlib

I'm having trouble converting a script I wrote to create and save 15 pie charts separately which I would like to save as a single figure with 15 subplots instead. I have tried taking fig, ax = plt.subplots(5, 3, figsize=(7, 7)) out of the loop and specifying the number of rows and columns for the plot but I get this error AttributeError: 'numpy.ndarray' object has no attribute 'pie'. This error doesn't occur if I leave that bit of code in the script as is seen below. Any help with tweaking the code below to create a single figure with 15 subplots (one for each site) would be enormously appreciated.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel(path)
df_1 = df.groupby(['Site', 'group'])['Abundance'].sum().reset_index(name='site_count')
site = ['Ireland', 'England', 'France', 'Scotland', 'Italy', 'Spain',
'Croatia', 'Sweden', 'Denmark', 'Germany', 'Belgium', 'Austria', 'Poland', 'Stearman', 'Hungary']
for i in site:
df_1b = df_1.loc[df_1['Site'] == i]
colors = {'Dog': 'orange', 'Cat': 'cyan', 'Pig': 'darkred', 'Horse': 'lightcoral', 'Bird':
'grey', 'Rat': 'lightsteelblue', 'Whale': 'teal', 'Fish': 'plum', 'Shark': 'darkgreen'}
wp = {'linewidth': 1, 'edgecolor': "black"}
fig, ax = plt.subplots(figsize=(7, 7))
texts, autotexts = ax.pie(df_1b['site_count'],
labels=None,
shadow=False,
colors=[colors[key] for key in labels],
startangle=90,
wedgeprops=wp,
textprops=dict(color="black"))
plt.setp(autotexts, size=16)
ax.set_title(site, size=16, weight="bold", y=0)
plt.savefig('%s_group_diversity.png' % i, bbox_inches='tight', pad_inches=0.05, dpi=600)
It's hard to guess how exactly you'd like the plot to look like.
The main changes the code below makes, are:
adding fig, axs = plt.subplots(nrows=5, ncols=3, figsize=(12, 18)). Here axs is a 2d array of subplots. figsize should be large enough to fit the 15 subplots.
df_1b['group'] is used for the labels that decide the color (it's not clear where the labels themselves should be shown, maybe in a common legend)
autopct='%.1f%%' is added to ax.pie(...). This shows the percentages with one decimal.
With autopct, ax.pie(...) now returns 3 lists: texts, autotexts, wedges. The texts are the text objects for the labels (currently empty texts), autotexts are the percentages (that are calculated "automatically"), wedges are the triangular wedges.
ax.set_title now uses the site name, and puts it at a negative y-value (y=0 would overlap with the pie)
plt.tight_layout() at the end tries to optimize the surrounding white space
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
site = ['Ireland', 'England', 'France', 'Scotland', 'Italy', 'Spain',
'Croatia', 'Sweden', 'Denmark', 'Germany', 'Belgium', 'Austria', 'Poland', 'Stearman', 'Hungary']
colors = {'Dog': 'orange', 'Cat': 'cyan', 'Pig': 'darkred', 'Horse': 'lightcoral', 'Bird': 'grey',
'Rat': 'lightsteelblue', 'Whale': 'teal', 'Fish': 'plum', 'Shark': 'darkgreen'}
wedge_properties = {'linewidth': 1, 'edgecolor': "black"}
# create some dummy test data
df = pd.DataFrame({'Site': np.random.choice(site, 1000),
'group': np.random.choice(list(colors.keys()), 1000),
'Abundance': np.random.randint(1, 11, 1000)})
df_1 = df.groupby(['Site', 'group'])['Abundance'].sum().reset_index(name='site_count')
fig, axs = plt.subplots(nrows=5, ncols=3, figsize=(12, 18))
for site_i, ax in zip(site, axs.flat):
df_1b = df_1[df_1['Site'] == site_i]
labels = df_1b['group']
texts, autotexts, wedges = ax.pie(df_1b['site_count'],
labels=None,
shadow=False,
colors=[colors[key] for key in labels],
startangle=90,
wedgeprops=wedge_properties,
textprops=dict(color="black"),
autopct='%.1f%%')
plt.setp(autotexts, size=10)
ax.set_title(site_i, size=16, weight="bold", y=-0.05)
plt.tight_layout()
plt.savefig('group_diversity.png', bbox_inches='tight', pad_inches=0.05, dpi=600)
plt.show()

IndexError: The shape of the mask [183, 10] at index 1 does not match the shape of the indexed tensor [183, 1703] at index 1

I'm trying to load the Cornell dataset from PyTorch Geometric to train my Graph Neural Network. I want to apply a mask but I achieve this error (also on Chameleon, Wisconsin, Texas datasets). My Dataset class works perfectly with all the datasets of Planetoid that are mono dimensional tensors, probable bidimensional tensors give problem.
I insert my code that can be ruined on Colab without problems.
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
import torch_geometric
from torch_geometric.datasets import Planetoid, WebKB
from torch_geometric.utils import to_dense_adj, to_undirected, remove_self_loops
class Dataset(object):
def __init__(self, name):
super(Dataset, self).__init__()
self.name = name
if (name == 'Cora'):
dataset = Planetoid(root='/tmp/Cora', name='Cora', split="full")
if(name == 'Citeseer'):
dataset = Planetoid(root='/tmp/Cora', name='Citeseer', split="full")
if(name == 'PubMed'):
dataset = Planetoid(root='/tmp/Cora', name='Pubmed', split="full")
if(name == 'Cornell'):
dataset = WebKB(root='/tmp/WebKB', name='Cornell')
self.data = dataset[0]
print(self.data)
self.train_mask = self.data.train_mask
self.valid_mask = self.data.val_mask
self.test_mask = self.data.test_mask
def train_val_test_split(self):
train_x = self.data.x[self.data.train_mask]
train_y = self.data.y[self.data.train_mask]
valid_x = self.data.x[self.data.val_mask]
valid_y = self.data.y[self.data.val_mask]
test_x = self.data.x[self.data.test_mask]
test_y = self.data.y[self.data.test_mask]
return train_x, train_y, valid_x, valid_y, test_x, test_y
def get_fullx(self):
return self.data.x
def get_edge_index(self):
return self.data.edge_index
def get_adjacency_matrix(self):
# We will ignore this for the first part
adj = to_dense_adj(self.data.edge_index)[0]
return adj
The error that I achieve is in the title and is obtained in this snippet:
cornell_dataset = Dataset(name = 'Cornell')
train_x, train_y, valid_x, valid_y, test_x, test_y = cornell_dataset.train_val_test_split()
# check and confirm our data shapes match our expectations
print(f"Train shape x: {train_x.shape}, y: {train_y.shape}")
print(f"Val shape x: {valid_x.shape}, y: {valid_y.shape}")
print(f"Test shape x: {test_x.shape}, y: {test_y.shape}")

baseline fitting using Numpy poly1d

i have the following baseline:
and as it can be seen, it has an almost sinusoidal shape. i am trying to use polyfit on it. Actually what I have are two arrays of data,one called x and the other y. So what i am using is:
porder = 2
coefs = np.polyfit(x, y, porder)
baseline = np.poly1d(coefs)
cleanspec = y - baseline(x)
My goal is to obtain a clean spectrum in the end, who has a straight baseline with no ondulation.
However, the fitting is not working. Any suggestions on using another more efficient method?
I have tried changing porder to 3, but i have this warning, and it doesn't change anything:
Polyfit may be poorly conditioned
My data for x:
[1.10192816e+11 1.10192893e+11 1.10192969e+11 1.10193045e+11
1.10193122e+11 1.10193198e+11 1.10193274e+11 1.10193350e+11
1.10193427e+11 1.10193503e+11 1.10193579e+11 1.10193656e+11
1.10193732e+11 1.10193808e+11 1.10193885e+11 1.10193961e+11
1.10194037e+11 1.10194113e+11 1.10194190e+11 1.10194266e+11
1.10194342e+11 1.10194419e+11 1.10194495e+11 1.10194571e+11
1.10194647e+11 1.10194724e+11 1.10194800e+11 1.10194876e+11
1.10194953e+11 1.10195029e+11 1.10195105e+11 1.10195182e+11
1.10195258e+11 1.10195334e+11 1.10195410e+11 1.10195487e+11
1.10195563e+11 1.10195639e+11 1.10195716e+11 1.10195792e+11
1.10195868e+11 1.10195944e+11 1.10196021e+11 1.10196097e+11
1.10196173e+11 1.10196250e+11 1.10196326e+11 1.10196402e+11
1.10196479e+11 1.10196555e+11 1.10196631e+11 1.10196707e+11
1.10196784e+11 1.10196860e+11 1.10196936e+11 1.10197013e+11
1.10197089e+11 1.10197165e+11 1.10197241e+11 1.10197318e+11
1.10197394e+11 1.10197470e+11 1.10197547e+11 1.10197623e+11
1.10197699e+11 1.10197776e+11 1.10197852e+11 1.10197928e+11
1.10198004e+11 1.10198081e+11 1.10198157e+11 1.10198233e+11
1.10198310e+11 1.10198386e+11 1.10198462e+11 1.10198538e+11
1.10198615e+11 1.10198691e+11 1.10198767e+11 1.10198844e+11
1.10198920e+11 1.10198996e+11 1.10199073e+11 1.10199149e+11
1.10199225e+11 1.10199301e+11 1.10199378e+11 1.10199454e+11
1.10199530e+11 1.10199607e+11 1.10199683e+11 1.10199759e+11
1.10199835e+11 1.10199912e+11 1.10199988e+11 1.10200064e+11
1.10200141e+11 1.10202582e+11 1.10202658e+11 1.10202735e+11
1.10202811e+11 1.10202887e+11 1.10202963e+11 1.10203040e+11
1.10203116e+11 1.10203192e+11 1.10203269e+11 1.10203345e+11
1.10203421e+11 1.10203498e+11 1.10203574e+11 1.10203650e+11
1.10203726e+11 1.10203803e+11 1.10203879e+11 1.10203955e+11
1.10204032e+11 1.10204108e+11 1.10204184e+11 1.10204260e+11
1.10204337e+11 1.10204413e+11 1.10204489e+11 1.10204566e+11
1.10204642e+11 1.10204718e+11 1.10204795e+11 1.10204871e+11
1.10204947e+11 1.10205023e+11 1.10205100e+11 1.10205176e+11
1.10205252e+11 1.10205329e+11 1.10205405e+11 1.10205481e+11
1.10205557e+11 1.10205634e+11 1.10205710e+11 1.10205786e+11
1.10205863e+11 1.10205939e+11 1.10206015e+11 1.10206092e+11
1.10206168e+11 1.10206244e+11 1.10206320e+11 1.10206397e+11
1.10206473e+11 1.10206549e+11 1.10206626e+11 1.10206702e+11
1.10206778e+11 1.10206854e+11 1.10206931e+11 1.10207007e+11
1.10207083e+11 1.10207160e+11 1.10207236e+11 1.10207312e+11
1.10207389e+11 1.10207465e+11 1.10207541e+11 1.10207617e+11
1.10207694e+11 1.10207770e+11 1.10207846e+11 1.10207923e+11
1.10207999e+11 1.10208075e+11 1.10208151e+11 1.10208228e+11
1.10208304e+11 1.10208380e+11 1.10208457e+11 1.10208533e+11
1.10208609e+11 1.10208686e+11 1.10208762e+11 1.10208838e+11
1.10208914e+11 1.10208991e+11 1.10209067e+11 1.10209143e+11
1.10209220e+11 1.10209296e+11 1.10209372e+11 1.10209448e+11
1.10209525e+11 1.10209601e+11 1.10209677e+11 1.10209754e+11
1.10209830e+11]
and for y:
[ 0.00143858 0.05495827 0.07481739 0.03287334 -0.06275658 0.03744501
-0.04392341 0.02849104 0.03173781 0.09748282 0.02854265 0.06573162
0.08215295 0.0240697 0.00931477 0.17572605 0.06783381 0.04853354
-0.00226023 0.03722596 0.09687121 0.10767829 0.04922701 0.08036865
0.02371989 0.13885361 0.13903188 0.09910567 0.08793601 0.06048823
0.03932097 0.04061129 0.03706228 0.13764936 0.14150589 0.12226208
0.09041878 0.13638676 0.11107155 0.12261369 0.11765545 0.07425344
0.06643712 0.1449991 0.14256909 0.0924173 0.09291525 0.12216271
0.11272059 0.07618891 0.16787807 0.07832849 0.10786856 0.12381844
0.14182937 0.08078092 0.11932429 0.06383649 0.02923562 0.0864741
0.07806758 0.04514088 0.12929371 0.11769577 0.03619867 0.02811366
0.06401639 0.06883735 0.01162673 0.0956252 0.11206549 0.0485106
0.07269545 0.01662149 0.01287365 0.13401546 0.06300487 0.01994627
0.00721926 0.04863274 -0.01578364 0.0235379 0.03102316 0.00392559
0.05662182 0.04643381 -0.00665026 0.05532307 -0.01533339 0.04838893
0.02097954 0.02551123 0.03727188 -0.04001189 -0.04294883 0.02837669
-0.06062512 -0.0743994 -0.04665618 -0.03553261 -0.07057554 -0.07028277
-0.07502298 -0.07247965 -0.03540266 -0.03226398 -0.08014487 -0.11907543
-0.18521053 -0.1117617 -0.14377897 -0.07113503 -0.02480966 -0.07459746
-0.07994097 -0.02648713 -0.10288478 -0.13328137 -0.08121377 -0.13742166
-0.024583 -0.11391389 -0.02717251 -0.08876166 -0.04369363 -0.0790144
-0.09589054 -0.12058701 0.00041344 -0.06646403 -0.06368366 -0.10335613
-0.04508286 -0.18360729 -0.0551775 -0.06476622 -0.0834523 -0.01276785
-0.04145486 -0.14549992 -0.11186823 -0.07663398 -0.11920359 -0.0539315
-0.10507118 -0.09112374 -0.09751319 -0.06848278 -0.09031172 -0.07218853
-0.03129234 -0.04543539 -0.00942861 -0.06711099 -0.00712202 -0.11696418
-0.06344093 0.03624227 -0.04798777 0.01174394 -0.08326314 -0.06761215
-0.12063419 -0.05236908 -0.03914692 -0.05370061 -0.01620056 0.06731788
-0.06600111 -0.04601257 -0.02144361 0.00256863 -0.00093034 0.00629604
-0.0252835 -0.00907992 0.03583489 -0.03761906 0.10325763 0.08016437
-0.04900467 0.0110328 0.05019604 -0.04428984 -0.03208058 0.05095359
-0.01807463 0.0691733 0.07472691 0.00659871 0.00947692 0.0014422
0.05227057]
Having this huge offset in x is probably not helping. It definitively works when removing it for the fitting process. Looks like this:
import matplotlib.pyplot as plt
import numpy as np
scaledx = xdata * 1e-8 - 1100
coefs = np.polyfit( scaledx, ydata, 7)
base = np.poly1d( coefs )
xt = np.linspace( 1.9,2.1,150)
yt = base( xt )
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.scatter( scaledx , ydata )
ax.plot( xt , yt )
bx.plot( scaledx , ydata - base( scaledx ) )
plt.show()
with xdata and ydata being numpy arrays of the OP data lists.
Provides:
Addon
Concerning the poorly conditioned one should remember how simple linear optimization works. In case of a polynomial one builds the matrix:
A = [
[1, x1, x1**2, ...],
[1, x2, x2**2, ...],
...
[1, xn, xn**2, ...]
]
and one needs B^(-1) the inverse of B with B = AT.A and AT being the transposed of A. Now looking at the x values in the order of 1e11, B will have order 1 on one side of the diagonal and for a second order polynomial order 1e44 on the other. In case of a third order polynomial this is getting worse, accordingly. Making the inverse, hence, is becoming unstable, numerically. Luckily, and as used above, this can be solved easily by simple re-scaling of the problem at hand.

Data arrays must have the same length, and match time discretization in dynamic problems error in GEKKO

I want to find the value of the parameter m that minimizes my variable x subject to a system of differential equations. I have the following code
from gekko import GEKKO
def run_model_m(days, population, case, k_val, b_val, u0_val, sigma_val, Kmax0, a_val, c_val):
list_x =[]
list_u =[]
list_Kmax =[]
for i in range(len(days)):
list_xi=[]
list_ui=[]
list_Ki=[]
for j in range(len(days[i])):
#try:
m = GEKKO(remote=False)
#m.time= days[i][j]
eval = np.linspace(days[i][j][0], days[i][j][-1], 100, endpoint=True)
m.time = eval
x_data= population[i][j]
variable= np.linspace(population[i][j][0], population[i][j][-1], 100, endpoint=True)
x = m.Var(value=population[i][j][0], lb=0)
sigma= m.Param(sigma_val)
d = m.Param(c_val)
k = m.Param(k_val)
b = m.Param(b_val)
r = m.Param(a_val)
step = np.ones(len(eval))
step= 0.2*step
step[0]=1
m_param = m.CV(value=1, lb=0, ub=1, integer=True); m_param.STATUS=1
u = m.Var(value=u0_val, lb=0, ub=1)
#m.free(u)
a = m.Param(a_val)
c= m.Param(c_val)
Kmax= m.Param(Kmax0)
if case == 'case0':
m.Equations([x.dt()== x*(r*(1-x/(Kmax))-m_param/(k+b*u)-d), u.dt()== sigma*(m_param*b/((k+b*u)**2))])
elif case == 'case4':
m.Equations([x.dt()== x*(r*(1-u**2)*(1-x/(Kmax))-m_param/(k+b*u)-d), u.dt() == sigma*(-2*u*r*(1-x/(Kmax))+(b*m_param)/(b*u+k)**2)])
p = np.zeros(len(eval))
p[-1] = 1.0
final = m.Param(value=p)
m.Obj(x)
m.options.IMODE = 6
m.options.MAX_ITER=15000
m.options.SOLVER=1
# optimize
m.solve(disp=False, GUI=False)
#m.open_folder(dataset_path+'inf')
list_xi.append(x.value)
list_ui.append(u.value)
list_Ki.append(m_param.value)
list_x.append(list_xi)
list_Kmax.append(list_Ki)
list_u.append(list_ui)
return list_x, list_u, list_Kmax, m.options.OBJFCNVAL
scaled_days[i][j] =[-7.0, 42.0, 83.0, 125.0, 167.0, 217.0, 258.0, 300.0, 342.0]
scaled_pop[i][j] = [0.01762491277346285, 0.020592540360308997, 0.017870838266697213, 0.01690069378982034,0.015512320147187675,0.01506701796298272,0.014096420738841563,0.013991224004743027,0.010543380664478205]
k0,b0,group, case0, u0, sigma0, K0, a0, c0 = (100, 20, 'Size3, Inc', 'case0', 0.1, 0.05, 2, 0, 0.01)
list_x2, list_u2, list_Kmax2,final =run_model_m(days=[[scaled_days[i][j]]], population=
[[scaled_pop[i][j]]],case=case1, k_val=list_b1[i0][0], b_val=b1, u0_val=list_u1[i0][j0],
sigma_val=sigma1, Kmax0=K1, a_val=list_Kmax1[0][0], c_val=c1)
I get the error Data arrays must have the same length, and match time discretization in dynamic problems error but I don't understand why. I have tried making x and m_param arrays, with x=m.Var, m_param =m.MV... But still get the same error, even if they are all arrays of the same length. Is this the right way to find the solution of the minimization problem?
I think the error was just that in run_model_m I was passing a list as u0_val and it didn't have the same dimensions as m.time. So it should be u0_val=list_u1[0][0][0]

Q2: AttributeError: 'builtin_function_or_method' object has no attribute 'size'

Could anyone tell me why I'm getting the error type: AttributeError: 'builtin_function_or_method' object has no attribute
'size' in like 57?
for this synthax: out=np.zeros((x.size,y.size))
import numpy as np
import sympy as sp
from numpy import exp,sqrt,pi
from sympy import Integral, log, exp, sqrt, pi
import math
from numpy import array
import matplotlib.pyplot as plt
import scipy.integrate
from scipy.special import erf
from scipy.stats import norm, gaussian_kde
from quantecon import LAE
from sympy.abc import q
#from sympy import symbols
#var('q')
#q= symbols('q')
## == Define parameters == #
mu=80
sigma=20
b=0.2
Q=80
Q1=Q*(1-b)
Q2=Q*(1+b)
d = (sigma*np.sqrt(2*np.pi))
phi = norm()
n = 500
#Phi(z) = 1/2[1 + erf(z/sqrt(2))].
def p_k_positive(x, y):
# x, y = np.array(x, dtype=float), np.array(y, dtype=float)
Positive_RG = norm.pdf(x[:, None] - y[None, :]+Q1, mu, sigma)
print('Positive_R = ', Positive_RG)
return Positive_RG
def p_k_negative(x, y):
# x, y = np.array(x, dtype=float), np.array(y, dtype=float)
Negative_RG = norm.pdf(x[:, None] - y[None, :]+Q2, mu, sigma)
print('Negative_RG = ', Negative_RG)
return Negative_RG
def p_k_zero(x, y):
# x, y = np.array(x, dtype=float), np.array(y, dtype=float)
Zero_RG = (1/(2*math.sqrt(2*math.pi)))*(erf((x[:, None]+Q2-mu)/(sigma*math.sqrt(2)))-erf((x[:, None]+Q1-mu)/(sigma*math.sqrt(2))))
#Zero_RG =norm.pdf
print('Zero_RG',Zero_RG)
return Zero_RG
def myFilter(x,y):
x, y = x.squeeze, y.squeeze
out=np.zeros((x.size,y.size))
xyDiff = x[:, None] - y[None, :]
out=np.where(np.bitwise_and(y[None, :] > 0.0, xyDiff >= -Q1), p_k_positive(x, y), out) # unless the sum functions are different
out=np.where(np.bitwise_and(y[None, :] < 0.0, x[:, None] >= -Q1), p_k_negative(x, y), out)
out=np.where(np.bitwise_and(y[None, :] ==0.0, xyDiff >= -Q1), p_k_zero(x, y), out)
return out
Z = phi.rvs(n)
X = np.empty(n)
for t in range(n-1):
X[t+1] = X[t] + Z[t]
#X[t+1] = np.abs(X[t]) + Z[t]
psi_est = LAE(myFilter, X)
k_est = gaussian_kde(X)
fig, ax = plt.subplots(figsize=(10,7))
ys = np.linspace(-200.0, 200.0, 200)
ax.plot(ys, psi_est(ys), 'g-', lw=2, alpha=0.6, label='look ahead estimate')
ax.plot(ys, k_est(ys), 'k-', lw=2, alpha=0.6, label='kernel based estimate')
ax.legend(loc='upper left')
plt.show()
x, y = x.squeeze, y.squeeze
Should be
x, y = x.squeeze(), y.squeeze()
or you're trying to take the size of a function.

Resources