Bokeh MultiSelect plotting in infinite loop, distorting plot - loops

I'm trying to plotting multiple lines into a graph based on a user's "MultiSelect" options. I read in two separate excel files of data and and plot their axis based on the user's request. I'm using Python 3.5 and running on a MAC.
1). As soon as I make a multiselection the figure gets distorted
2). It seems the plot is running in an infinite loop.
3). The plot doses not properly update when user changes selections. It just adds more plots without removing the previous plot.
from os.path import dirname, join
from pandas import *
import numpy as np
import pandas.io.sql as psql
import sqlite3 as sql
import sys, os
from bokeh.plotting import figure
from bokeh.layouts import layout, widgetbox
from bokeh.models import ColumnDataSource, HoverTool, Div
from bokeh.models.widgets import Slider, Select, TextInput, MultiSelect
from bokeh.io import curdoc
import matplotlib.pyplot as plt
files = list()
path = os.getcwd()
for x in os.listdir(path):
if x.endswith(".xlsx"):
if x != 'template.xlsx' :
files.append(x)
axis_map = {
"0% void": "0% void",
"40% void": "40% void",
"70% void": "70% void",
}
files_list = MultiSelect(title="Files", value=["dummy2.xlsx"],
options=open(join(dirname(__file__), 'files.txt')).read().split())
voids = MultiSelect(title="At what void[s]", value=["0% void"], options=sorted(axis_map.keys()))
p = figure(plot_height=600, plot_width=700, title="", toolbar_location=None)
pline = figure(plot_height=600, plot_width=700, title="")
path = os.getcwd()
data_dict = {}
for file in os.listdir(path):
if file.endswith(".xlsx"):
xls = ExcelFile(file)
df = xls.parse(xls.sheet_names[0])
data = df.to_dict()
data_dict[file] = data
# converting dictionary to dataframe
newdict = {(k1, k2):v2 for k1,v1 in data_dict.items() \
for k2,v2 in data_dict[k1].items()}
xxs = DataFrame([newdict[i] for i in sorted(newdict)],
index=MultiIndex.from_tuples([i for i in sorted(newdict.keys())]))
master_data = xxs.transpose()
def select_data():
for vals in files_list.value:
for vox in voids.value:
pline.line(x=master_data[vals]['Burnup'], y= master_data[vals][vox])
pline.circle(x=master_data[vals]['Burnup'], y= master_data[vals][vox])
return
def update():
select_data()
controls = [ files_list, voids]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
inputs = widgetbox(*controls, sizing_mode=sizing_mode)
l = layout([
[inputs, pline],
], sizing_mode=sizing_mode)
update()
curdoc().add_root(l)
curdoc().title = "Calculations"

I am not 100% certain, since the code above is not self-contained and cannot be run and investigated, but there are some issues (as of Bokeh 0.12.4) with adding new components to documents being problematic in some situations. These issues are high on the priority list for the next two point releases.
Are the data sizes reasonable such that you could create all the combinations up front? If so, I would recommend doing that, and then having the multi-select values toggle the visibility on/off appropriately. E.g., here's a similar example using a checkbox:
import numpy as np
from bokeh.io import curdoc
from bokeh.layouts import row
from bokeh.palettes import Viridis3
from bokeh.plotting import figure
from bokeh.models import CheckboxGroup
p = figure()
props = dict(line_width=4, line_alpha=0.7)
x = np.linspace(0, 4 * np.pi, 100)
l0 = p.line(x, np.sin(x), color=Viridis3[0], legend="Line 0", **props)
l1 = p.line(x, 4 * np.cos(x), color=Viridis3[1], legend="Line 1", **props)
l2 = p.line(x, np.tan(x), color=Viridis3[2], legend="Line 2", **props)
checkbox = CheckboxGroup(labels=["Line 0", "Line 1", "Line 2"], active=[0, 1, 2], width=100)
def update(attr, old, new):
l0.visible = 0 in checkbox.active
l1.visible = 1 in checkbox.active
l2.visible = 2 in checkbox.active
checkbox.on_change('active', update)
layout = row(checkbox, p)
curdoc().add_root(layout)
If the data sizes are not such that you can create all the combinations up front, then I would suggest making an issue on the project issue trackerhttps://github.com/bokeh/bokeh/issues) that has a complete, minimal, self-contained, runnable as-is code to reproduce the problem (i.e. generates random or synthetic data but it otherwise identical). This it the number one thing that would help the core devs address the issue more promptly.

#bigreddot Thanks for your response.
I edited the code to now make it self contained.
1). The plot does not reset. The new selected plots over the previous plot.
2). When the user makes multiple selections (ctrl+shift) the plot axis gets distorted and it seems to be running in an infinite loop
from pandas import *
import numpy as np
import sys, os
from bokeh.plotting import figure
from bokeh.layouts import layout, widgetbox
from bokeh.models.widgets import MultiSelect
from bokeh.io import curdoc
from bokeh.plotting import reset_output
import math
axis_map = {
"y1": "y3",
"y2": "y2",
"y3": "y1",
}
x1 = np.linspace(0,20,62)
y1 = [1.26 * math.cos(x) for x in np.linspace(-1,1,62) ]
y2 = [1.26 * math.cos(x) for x in np.linspace(-0.95,.95,62) ]
y3 = [1.26 * math.cos(x) for x in np.linspace(-.9,.90,62) ]
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,hover"
vars = MultiSelect(title="At what void[s]", value=["y1"], options=sorted(axis_map.keys()))
master_data = { 'rate' : x1,
'y1' : y1,
'y2' : y2,
'y3' : y3
}
p = figure(plot_height=600, plot_width=700, title="", toolbar_location=None)
pline = figure(plot_height=600, plot_width=700, title="", tools=TOOLS)
def select_data():
for vox in vars.value:
pline.line(x=master_data['rate'], y= master_data[vox], line_width=2)
pline.circle(x=master_data['rate'], y=master_data[vox], line_width=2)
return
controls = [ vars]
for control in controls:
control.on_change('value', lambda attr, old, new: select_data())
sizing_mode = 'fixed'
inputs = widgetbox(*controls)
l = layout([
[inputs, pline],
])
select_data()
curdoc().add_root(l)
curdoc().title = "Plot"

Related

Matplotlib scatterplot subplot legends overwrite one another

I have a scatterplot figure with subplots generated using a for loop. Within the figure, I am trying to create a single legend but each time a subplot and legend is rendered the legend is overwritten by the next subplot, so the figure that is generated contains a single legend pertaining only to the last subplot. I would like the legend to pertain to all subplots (i.e., it should include years 2019, 2020, 2021 and 2022). Here is my code, please let me know how I can tweak it.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches
df = pd.read_excel(path)
spp = df.SPP.unique()
fig, axs = plt.subplots(nrows=8, ncols=4, figsize=(14, 14))
for spp_i, ax in zip(spp, axs.flat):
df_1 = df[df['SPP'] == spp_i]
labels = list(df_1.Year.unique())
x = df_1['Length_mm']
y = df_1['Weight_g']
levels, categories = pd.factorize(df_1['Year'])
colors = [plt.cm.tab10(i) for i in levels]
handles = [matplotlib.patches.Patch(color=plt.cm.tab10(i), label=c) for i, c in enumerate(categories)]
ax.scatter(x, y, c=colors)
plt.legend(handles=handles)
plt.savefig('Test.png', bbox_inches='tight', pad_inches=0.1, dpi=600)
Here is figure, as you can see the legend in the bottom right is for the last subplot only.
enter image description here
Creating this type of plots is quite cumbersome with standard matplotlib. Seaborn automates a lot of the steps.
In this case, sns.relplot(...) can be used. If you don't want all the subplots to have the same x and/or y ranges, you can add facet_kws={'sharex': False, 'sharey': False}).
The size of the individual subplots is controlled via height=, while the width will be calculated as the height multiplied by the aspect. col_wrap= tells how many columns of subplots will be put before starting a new row.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
spp_list = ["Aeloria", "Baelun", "Caelondia", "Draeden", "Eldrida", "Faerun", "Gorandor", "Haldira", "Ilysium",
"Jordheim", "Kaltara", "Lorlandia", "Myridia", "Nirathia", "Oakenfort"]
df = pd.DataFrame({'SPP': np.repeat(spp_list, 100),
'Year': np.tile(np.repeat(np.arange(2019, 2023), 25), 15),
'Length_mm': np.abs(np.random.randn(1500).cumsum()) + 10,
'Weight_g': np.abs(np.random.randn(1500).cumsum()) + 20})
g = sns.relplot(df, x='Length_mm', y='Weight_g', col='SPP', col_order=spp_list,
hue='Year', palette='turbo',
height=3, aspect=1.5, col_wrap=6,
facet_kws={'sharex': False, 'sharey': False})
g.set_axis_labels(x_var='Length (mm)', y_var='Weight (g)', clear_inner=True)
g.fig.tight_layout() # nicely fit supblots with their titles, labels and ticks
g.fig.subplots_adjust(right=0.97) # space for the legend after fitting the subplots
plt.show()

How to remove no data values from merged raster using rioxarray?

I have a collection of raster stored in a directory. They are rasters of glaciers in same region. When I remove no data using rioxarray.where method on individual rasters it works. But when I use this method on the merged product generated using rioxarray.merge.merge_arrays method, it does not remove the no data value (which is -9999). Below is the code. I apologize that I was not able to provide a reproducible example.
import rioxarray as rxr
from rioxarray import merge
import xarray as xr
import numpy as np
import glob
import matplotlib.pyplot as plt
path = r'ice_thickness_pandit/*.tif'
files = glob.glob(path)
files
elements = []
for item in files:
elements.append(rxr.open_rasterio(item))
merged = merge.merge_arrays(elements, nodata=-9999)
merged = merged.where(merged != -9999, drop = False)
I was having this same problem and was able to fix it by using the where command on the input data arrays.
import numpy as np
import pandas as pd
import rioxarray as rxr
import xarray as xr
import matplotlib.pyplot as plt
path = '/path/to/files/'
file1 = 'file1.dat'
cube1 = xr.open_dataarray(path+file1,engine='rasterio')
cube1_masked = cube1.where(cube1 != 0)
cube1.close()
file2 = 'file2.dat'
cube2 = xr.open_dataarray(path+file2,engine='rasterio')
cube2_masked = cube2.where(cube2 != 0)
cube2.close()
#%%
from rioxarray import merge
ds = [
cube1_masked,
cube2_masked,
]
merged = merge.merge_arrays(ds, nodata=np.nan)
merged_arr1 = merged[0,:,:]
merged_arr2 = merged[1,:,:]
fig, axes = plt.subplots(ncols=2, figsize=(12,4))
merged_arr1.plot(ax=axes[0], add_colorbar=False)
merged_arr2.plot(ax=axes[1], add_colorbar=False)
plt.draw()

How can I hide markers and markerclusters outside a specific zoom level in folium?

Is it possible to hide a marker and markercluster on folium map in some specific zoom level?
My code needs to react to zoom change and decide what points I want to share and register/deregister them from the map.
I know that it is possible to do it with Leaflet using get.Zoom() and zoomend. As folium uses map from Leaflet I guess that is also possible to do it with folium, but I am not sure how to do it yet.
This is what I have so far (any idea on how to improve my code and make it "smarter" is also appreciated, I am just a beginner in Python):
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn
import folium
import mplleaflet
import os
import json
from folium import plugins
from folium.plugins import MarkerCluster
from folium import FeatureGroup, LayerControl, Map, Marker
df = pd.read_csv(r'Pakistan.csv')
data = df[['Latitude', 'Longitude']].values.tolist()
x = list(df['Latitude'])
y = list(df['Longitude'])
ID = list(df['S'])
latmean = df['Latitude'].mean()
lonmean = df['Longitude'].mean()
m = folium.Map(location=[latmean, lonmean], zoom_start= 10, zoom_control=True)
folium.TileLayer('openstreetmap').add_to(m)
folium.TileLayer('Stamen Terrain').add_to(m)
#Vega data
vis1 = os.path.join('data', 'vis1.json')
#Geojson Data
overlay = os.path.join('data', 'overlay.json')
#Distrital
fgDistrital = FeatureGroup(name='Distrital', control=True)
my_Circle1 = MarkerCluster().add_to(fgDistrital)
for i in range (1,4):
folium.Marker(location=[x[i], y[i]], popup=str("Distrital")).add_to(my_Circle1)
#Polo
fgPolo = FeatureGroup(name = 'Polo', show=False)
my_Circle2 = MarkerCluster().add_to(fgPolo)
for i in range (5,8):
folium.Marker(location=[x[i], y[i]], popup=folium.Popup(str("Polo"), max_width=450, show=True).add_child(folium.Vega(json.load(open(vis1)), width=450, height=250))).add_to(my_Circle2)
#Rota
fgRota = FeatureGroup(name='Rota', control=True)
my_Circle3 = MarkerCluster().add_to(fgRota)
for i in range (9,20):
folium.Marker(location=[x[i], y[i]], popup=str("Rota")).add_to(my_Circle3)
m.add_child(fgDistrital)
m.add_child(fgPolo)
m.add_child(fgRota)
folium.GeoJson(overlay, name = 'vis1').add_to(m)
folium.LayerControl(collapsed=True).add_to(m)
m.save('example.html')

Non conformable array error when using rpart with rpy2

I'm using rpart with rpy2 (version 2.8.6) on python 3.5, and want to train a decision tree for classification. My code snippet looks like this:
import rpy2.robjects.packages as rpackages
from rpy2.robjects.packages import importr
from rpy2.robjects import numpy2ri
from rpy2.robjects import pandas2ri
from rpy2.robjects import DataFrame, Formula
rpart = importr('rpart')
numpy2ri.activate()
pandas2ri.activate()
dataf = DataFrame({'responsev': owner_train_label,
'predictorv': owner_train_data})
formula = Formula('responsev ~.')
clf = rpart.rpart(formula = formula, data = dataf, method = "class", control=rpart.rpart_control(minsplit = 10, xval = 10))
where owner_train_label is a numpy float64 array of shape (12610,) and
owner_train_data is a numpy float64 array of shape (12610,88)
This is the error I'm getting when I run the last line of code to fit the data.
RRuntimeError: Error in ((xmiss %*% rep(1, ncol(xmiss))) < ncol(xmiss)) & !ymiss :
non-conformable arrays
I get that it is telling me they are non-conformable arrays but I don't know why as for the same training data, I can train using sklearn's Decision tree successfully.
Thanks for your help.
I got around this by creating the dataframe using pandas and passing the panadas dataframe to rpart using rpy2's pandas2ri to convert it to R's dataframe.
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects import Formula
rpart = importr('rpart')
pandas2ri.activate()
df = pd.DataFrame(data = owner_train_data)
df['l'] = owner_train_label
formula = Formula('l ~.')
clf = rpart.rpart(formula = formula, data = df, method = "class", control=rpart.rpart_control(minsplit = 10, xval = 10))

Python 2.7: Tkinter/ttk Linked ComboBoxes

I've made a simple GUI using a GUI editor called PAGE. It contains two Tkinter.ttk combo boxes. My first combo box gets its values as column names from a connected sqlite database table. When I choose a column name from first combo box, second combo box should update its values automatically related to first values.
BTW, my codes below work properly in this current condition. If I choose a value from first combo box, It'll print the values to interactive shell. But these values should insert into second combo box. Does anyone know how can I figure it out?
Any help is greatly appreciated. Thanks in advance...
Here are my GUI codes:
from pysqlite2 import dbapi2 as db
from Tkinter import *
import ttk
def getdata():
global colnames
conn = db.connect("blabla.sqlite")
cur = conn.execute("select * from states")
col = cur.description
colnames = [abu[0] for abu in col]
initcombo = getdata()
def vp_start_gui():
global val, w, root
root = Tk()
root.title('Linked Comboboxes')
root.geometry('301x230+556+208')
set_Tk_var()
w = New_Toplevel_1 (root)
init()
root.mainloop()
w = None
def create_New_Toplevel_1 (root):
global w, w_win
if w:
return
w = Toplevel (root)
w.title('New_Toplevel_1')
w.geometry('301x230+556+208')
set_Tk_var()
w_win = New_Toplevel_1 (w)
init()
return w_win
def destroy_New_Toplevel_1 ():
global w
w.destroy()
w = None
def set_Tk_var():
global combobox
combobox = StringVar()
def init():
pass
class New_Toplevel_1:
def __init__(self, master=None):
style = ttk.Style()
theme = style.theme_use()
default = style.lookup(theme, 'background')
master.configure(background=default)
def choose1(event=None):
conn2 = db.connect("blabla.sqlite")
cur2 = conn2.execute("select %s from states" % self.TCombobox1.get())
results = cur2.fetchall()
for row in results:
print row
self.TCombobox1 = ttk.Combobox (master, state='readonly')
self.TCombobox1.place(relx=0.03,rely=0.13,relheight=0.09,relwidth=0.48)
self.TCombobox1["values"] = colnames
self.TCombobox1.set("Choose one...")
self.TCombobox1.bind("<<ComboboxSelected>>", choose1)
self.TCombobox2 = ttk.Combobox (master, state='readonly')
self.TCombobox2.place(relx=0.03,rely=0.33,relheight=0.09,relwidth=0.48)
self.TLabel1 = ttk.Label (master)
self.TLabel1.place(relx=0.03,rely=0.83,height=19,width=28)
self.TLabel1.configure(relief="flat")
self.TLabel1.configure(text='''Info:''')
self.TButton1 = ttk.Button (master)
self.TButton1.place(relx=0.63,rely=0.22,height=25,width=76)
self.TButton1.configure(takefocus="")
self.TButton1.configure(text='''Run''')
if __name__ == '__main__':
vp_start_gui()
Set the 'results' as the value of the 'values' config of the second combobox.
def choose1(event=None):
conn2 = db.connect("blabla.sqlite")
cur2 = conn2.execute("select %s from states" % self.TCombobox1.get())
results = cur2.fetchall()
self.TCombobox2['values'] = results
for row in results:
print row

Resources