Finding the nearest neighbour between coordinates in two separate lists

Finding the nearest neighbour between coordinates in two separate lists - loops

I have two galaxy catalogs and want to pair up each galaxy in catalog 1 with the closest galaxy from catalog 2. I have already written a script that does this for only one input catalog (not very efficiently but it works for what I need), which is posted below.
When I updated the code I've written for two catalogs, catalog 2 doesn't iterate further down the list than j = 3. It's getting stuck there for some reason.
Code for one catalog input:
def nnpairs(x):
"""
find nearest neighbour of each galaxy and pairs them up
"""
nearest_neighbours = []
shortest_distances = []
displacements = []
m_diffs = []
galaxies1 = []
galaxies2 = []
for i in range (len(x)):
nearest_neighbour = []
shortest_distance = []
displacement = []
m_diff = []
galaxy1 = []
galaxy2 = []
for j in range(len(x)):
if j == 0 and i!=j:
shortest_distance = separation(x[i], x[j])
if (separation(x[i], x[j])) < (shortest_distance) and i!=j:
shortest_distance = separation(x[i], x[j])
nearest_neighbour = CATAID[i], CATAID[j]
displacement = displacement_along_loa(x[i],x[j])
dRA1, dRA2, dDEC1, dDEC2 = displacement
galaxy1 = (CATAID[i], shortest_distance, dRA1, dDEC1)
galaxy2 = (CATAID[j], shortest_distance, dRA2, dDEC2)
m_diff = mass_diff(x[i],x[j])
nearest_neighbours.append(nearest_neighbour)
shortest_distances.append(shortest_distance)
displacements.append(displacement)
m_diffs.append(m_diff)
galaxies1.append(galaxy1)
galaxies2.append(galaxy2)
data = list(zip(nearest_neighbours))
return data
Code for two catalogs:
def nnpairs_2(catalog1, catalog2):
"""
find nearest neighbour of each galaxy and pairs them up
"""
nearest_neighbours = []
shortest_distances = []
displacements = []
m_diffs = []
for i in range (len(catalog1)):
nearest_neighbour = []
shortest_distance = []
displacement = []
m_diff = []
j = 0
while j < len(catalog2):
if j == 0 and i!=0:
shortest_distance = separation(catalog1[i], catalog2[j])
if (separation(catalog1[i], catalog2[j])) < (shortest_distance) and i!=j:
shortest_distance = separation(catalog1[i], catalog2[j])
nearest_neighbour = (i,j)
displacement = displacement_along_loa(catalog1[i],catalog2[j])
m_diff = mass_diff(catalog1[i],catalog2[j])
j = j+1
nearest_neighbours.append(nearest_neighbour)
shortest_distances.append(shortest_distance)
displacements.append(displacement)
m_diffs.append(m_diff)
data = list(zip(nearest_neighbours))
return data
The idea is that the code find the separation between every galaxy and creates the nearest neighbour pair for the smallest separation, but for the second code it only matches up all galaxies in catalog 1 with the third entry in catalog 2.
I understand there is quite a lot of information here but any help would be much appreciated, thanks in advance!

Related

How do I provide dimensions of an array to be reshaped as a vector variable?

I want to reshape the array G in every iteration, and the shape for new dimensions are coming from vector (say) tensorSize. But the MATLAB reshape command does not accept the below given method,
tensorSize = [5,6,7,9,3,4];
r=[1,5,25,225,45,9,1];
G1(1) = {randn(1,tensorSize(1),r(2))};
G1(2) = {randn(r(2),tensorSize(2),r(3))};
G1(3) = {randn(r(3),tensorSize(3),r(4))};
G1(4) = {randn(r(4),tensorSize(4),r(5))};
G1(5) = {randn(r(5),tensorSize(5),r(6))};
G1(6) = {randn(r(6),tensorSize(6),1)};
for j = 1:length(tensorSize)-1
if j == 1
G = G1(j);
end
G = reshape(G,[],r(j+1));
H = reshape(G1(j+1),r(j+1),[]);
G = G*H;
G = reshape(G,tensorSize(1:j+1),[]);
end
I have also tried to use other alternatives like:
str2num(regexprep(num2str(tensorSize(1:j+1),),'\s+',','))
str2num(strjoin(cellstr(tensorSize(1:j+1)),','))
but they create a string and when converted to num, they are not comma separated. So the reshape option does not accept it.
Is there any work around?

Thanks to #beaker in the comment section below, for proposing this solution!
tensorSize = [5,6,7,9,3,4];
r=[1,5,25,225,45,9,1];
G1(1) = {randn(1,tensorSize(1),r(2))};
G1(2) = {randn(r(2),tensorSize(2),r(3))};
G1(3) = {randn(r(3),tensorSize(3),r(4))};
G1(4) = {randn(r(4),tensorSize(4),r(5))};
G1(5) = {randn(r(5),tensorSize(5),r(6))};
G1(6) = {randn(r(6),tensorSize(6),1)};
tensorSizeCell = {zeros(1,length(tensorSize))};
for i = 1:length(tensorSize)
tensorSizeCell(i) = {tensorSize(i)};
end
for j = 1:length(tensorSize)-1
if j == 1
G = cell2mat(G1(j));
end
G = reshape(G,[],r(j+1));
H = reshape(cell2mat(G1(j+1)),r(j+1),[]);
G = G*H;
G = reshape(G,tensorSizeCell{1:j+1},[]);
end

Brute Force Transposition

Hello i have an assignment that I can find out. The questions for the assignment is:
Make a loop that tries to decrypt the ciphertext with all possible keys one at a time.
For each loop, each individual word is looked up in the dictionary. If 85% of the words are found in the dictionary, then it is probably the right key in the current run, and then the loop must be broken.
Decrypt the text with the found key and print it.
I have a code that takes all words from a dictionary and count them. I have linked the dsv file. Hope you can help me.
import csv
import pickle
import math
orddict = {}
item = 0
with open('alle_dkord.csv', 'r', encoding='utf-8') as file:
reader = csv.reader(file, delimiter=';')
for row in reader:
orddict[row[0].upper()] = row[1]
print(len(orddict))
pkfile = open('wordlist.pkl', 'ab')
pickle.dump(orddict, pkfile)
pkfile.close()
def main():
msg = "This is a cypher text"
kryptmsg = "Ta h ticesyx ptihse r"
key = 8
krypteret_tekst = krypter(key, msg)
print(krypteret_tekst)
dekrypteret_tekst = dekrypter(key, kryptmsg)
print(dekrypteret_tekst)
def krypter(key, msg):
ciffer_string = [""] * key
for kolonne in range(key):
curIndex = kolonne
while curIndex < len(msg):
ciffer_string[kolonne] += msg[curIndex]
curIndex += key
return''.join(ciffer_string)
def dekrypter(key, kryptmsg):
numKolonner = int(math.ceil(len(kryptmsg)/float(key)))
numRows = key
numOfGreyBox = (numKolonner * numRows) - len(kryptmsg)
plaintekst = [''] * numKolonner
kolonne = 0
row = 0
for symbol in kryptmsg:
plaintekst[kolonne] += symbol
kolonne += 1
if (kolonne == numKolonner) or (kolonne == numKolonner - 1 and row >= numRows - numOfGreyBox):
kolonne = 0
row += 1
return ''.join(plaintekst)
if __name__ =='__main__':
main()
The csv file
I have tried to make a loop. But it didn't work

Use numpy arrays as arguments in odeint

I am trying to solve a system with differential equations using odeint. I have 4 txt files (that look like the picture below). I read them and I save them in numpy arrays (length:8000) (maby not with the most effective way, but anyway...). I want to pass these 4 arrays as arguments in my odeint and solve the system. For example, at every time step the odeint takes (one from the 8000) to solve the system, I want it to use a different value from these arrays. Is there any way to do it automatically without getting lost in for loops? I tried to do it like this (see code below) but I get the error:
if g2>0: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
g2 supposed to be 1x1 size at every loop of odeint. So it has to be something with the way I use the 4 arrays (xdot,ydot,xdotdot,ydotdot).
I am new to python and I use python 2.7.12 on Ubuntu 16.04 LTS.
Thank you in advance.
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
added_mass_x = 0.03 # kg
added_mass_y = 0.04
mb = 0.3 # kg
m1 = mb-added_mass_x
m2 = mb-added_mass_y
l1 = 0.07 # m
l2 = 0.05 # m
J = 0.00050797 # kgm^2
Sa = 0.0110 # m^2
Cd = 2.44
Cl = 3.41
Kd = 0.000655 # kgm^2
r = 1000 # kg/m^3
c1 = 0.5*r*Sa*Cd
c2 = 0.5*r*Sa*Cl
c3 = 0.5*mb*(l1**2)
c4 = Kd/J
c5 = (1/(2*J))*(l1**2)*mb*l2
c6 = (1/(3*J))*(l1**3)*mb
theta_0 = 10*(np.pi/180) # rad
theta_A = 20*(np.pi/180) # rad
f = 2 # Hz
###################################################################
t = np.linspace(0,100,8000) # s
###################################################################
# Save data from txt files into numpy arrays
xdot_list = []
ydot_list = []
xdotdot_list = []
ydotdot_list = []
with open('xdot.txt', 'r') as filehandle:
filecontents = filehandle.readlines()
for line in filecontents:
current_place = line[:-1]
xdot_list.append(current_place)
xdot = np.array(xdot_list, dtype=np.float32)
with open('ydot.txt', 'r') as filehandle:
filecontents = filehandle.readlines()
for line in filecontents:
current_place = line[:-1]
ydot_list.append(current_place)
ydot = np.array(ydot_list, dtype=np.float32)
with open('xdotdot.txt', 'r') as filehandle:
filecontents = filehandle.readlines()
for line in filecontents:
current_place = line[:-1]
xdotdot_list.append(current_place)
xdotdot = np.array(xdotdot_list, dtype=np.float32)
with open('ydotdot.txt', 'r') as filehandle:
filecontents = filehandle.readlines()
for line in filecontents:
current_place = line[:-1]
ydotdot_list.append(current_place)
ydotdot = np.array(ydotdot_list, dtype=np.float32)
def inverse(k,t,xdot,ydot,xdotdot,ydotdot):
vcx_i = k[0]
vcy_i = k[1]
psi_i = k[2]
wz_i = k[3]
theta_i = k[4]
theta_deg_i = k[5]
# Subsystem 4
vcx_i = xdot*np.cos(psi_i)-ydot*np.sin(psi_i)
vcy_i = ydot*np.cos(psi_i)+xdot*np.sin(psi_i)
psidot_i = wz_i
vcxdot_i = xdotdot*np.cos(psi_i)-xdot*np.sin(psi_i)*psidot_i-ydotdot*np.sin(psi_i)-ydot*np.cos(psi_i)*psidot_i
vcydot_i = ydotdot*np.cos(psi_i)-ydot*np.sin(psi_i)*psidot_i+xdotdot*np.sin(psi_i)+xdot*np.cos(psi_i)*psidot_i
g1 = -(m1/c3)*vcxdot_i+(m2/c3)*vcy_i*wz_i-(c1/c3)*vcx_i*np.sqrt((vcx_i**2)+(vcy_i**2))+(c2/c3)*vcy_i*np.sqrt((vcx_i**2)+(vcy_i**2))*np.arctan2(vcy_i,vcx_i)
g2 = (m2/c3)*vcydot_i+(m1/c3)*vcx_i*wz_i+(c1/c3)*vcy_i*np.sqrt((vcx_i**2)+(vcy_i**2))+(c2/c3)*vcx_i*np.sqrt((vcx_i**2)+(vcy_i**2))*np.arctan2(vcy_i,vcx_i)
A = 12*np.sin(2*np.pi*f*t+np.pi) # eksiswsi tail_frequency apo simulink
if A>=0.1:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2-c6*np.sqrt((g1**2)+(g2**2))
elif A<-0.1:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2+c6*np.sqrt((g1**2)+(g2**2))
else:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2
# Subsystem 5
if g2>0:
theta_i = np.arctan2(g1,g2)
elif g2<0 and g1>=0:
theta_i = np.arctan2(g1,g2)-np.pi
elif g2<0 and g1<0:
theta_i = np.arctan2(g1,g2)+np.pi
elif g2==0 and g1>0:
theta_i = -np.pi/2
elif g2==0 and g1<0:
theta_i = np.pi/2
elif g1==0 and g2==0:
theta_i = 0
theta_deg_i = (theta_i*180)/np.pi
return [vcxdot_i, vcydot_i, psidot_i, wzdot_i, theta_i, theta_deg_i]
# initial conditions
vcx_i_0 = 0.1257
vcy_i_0 = 0
psi_i_0 = 0
wz_i_0 = 0
theta_i_0 = 0
theta_deg_i_0 = 0
#theta_i_0 = 0.1745
#theta_deg_i_0 = 9.866
k0 = [vcx_i_0, vcy_i_0, psi_i_0, wz_i_0, theta_i_0, theta_deg_i_0]
# epilysi systimatos diaforikwn
k = odeint(inverse, k0, t, args=(xdot,ydot,xdotdot,ydotdot), tfirst=False)
# apothikeysi apotelesmatwn
vcx_i = k[:,0]
vcy_i = k[:,1]
psi_i = k[:,2]
wz_i = k[:,3]
theta_i = k[:,4]
theta_deg_i = k[:,5]
# Epanalipsi tu Subsystem 5 gia na mporun na plotaristun ta theta_i, theta_deg_i
theta_i = [inverse(k_i, t_i)[4] for t_i, k_i in zip(t, k)]
theta_deg_i = [inverse(k_i, t_i)[5] for t_i, k_i in zip(t, k)]
# Ypologismos mesis gwnias theta kai platus talantwsis
mesi_gwnia = sum(theta_i)/len(theta_i) # rad
platos = (max(theta_i)-min(theta_i))/2
UPDATE:
The most relevant solution I found so far is this:
Solving a system of odes (with changing constant!) using scipy.integrate.odeint?
But since I have only values of my variables in arrays and not the equation of the variables that depend on time (e.g. xdot=f(t)), I tried to aply an interpolation between the values in my arrays, as shown here: ODEINT with multiple parameters (time-dependent)
I managed to make the code running without errors, but the total time increased dramatically and the results of the system solved are completely wrong. I tried any possible type of interpolation that I found here: https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.interp1d.html but still wring outcome. That means that my interpolation isn't the best possible, or my points in the arrays (8000 values) are too much to interpolate between them and solve the system correctly.

Biopython for Loop IndexError

I get "IndexError: list is out of range" when I input this code. Also, the retmax is set at 614 because that's the total number of results when I make the request. Is there a way to make the retmode equal to the number of results using a variable that changes depending on the search results?
#!/usr/bin/env python
from Bio import Entrez
Entrez.email = "something#gmail.com"
handle1 = Entrez.esearch(db = "nucleotide", term = "dengue full genome", retmax = 614)
record = Entrez.read(handle1)
IdNums = [int(i) for i in record['IdList']]
while i >= 0 and i <= len(IdNums):
handle2 = Entrez.esearch(db = "nucleotide", id = IdNums[i], type = "gb", retmode = "text")
record = Entrez.read(handle2)
print(record)
i += 1

Rather than using a while loop, you can use a for loop...
from Bio import Entrez
Entrez.email = 'youremailaddress'
handle1 = Entrez.esearch(db = 'nucleotide', term = 'dengue full genome', retmax = 614)
record = Entrez.read(handle1)
IdNums = [int(i) for i in record['IdList']]
for i in IdNums:
print(i)
handle2 = Entrez.esearch(db = 'nucleotide', term = 'dengue full genome', id = i, rettype = 'gb', retmode = 'text')
record = Entrez.read(handle2)
print(record)
I ran it on my computer and it seems to work. The for loop solved the out of bounds, and adding the term to handle2 solved the calling error.

Is it better to change the db schema?

I'm building a web app with django. I use postgresql for the db. The app code is getting really messy(my begginer skills being a big factor) and slow, even when I run the app locally.
This is an excerpt of my models.py file:
REPEATS_CHOICES = (
(NEVER, 'Never'),
(DAILY, 'Daily'),
(WEEKLY, 'Weekly'),
(MONTHLY, 'Monthly'),
...some more...
)
class Transaction(models.Model):
name = models.CharField(max_length=30)
type = models.IntegerField(max_length=1, choices=TYPE_CHOICES) # 0 = 'Income' , 1 = 'Expense'
amount = models.DecimalField(max_digits=12, decimal_places=2)
date = models.DateField(default=date.today)
frequency = models.IntegerField(max_length=2, choices=REPEATS_CHOICES)
ends = models.DateField(blank=True, null=True)
active = models.BooleanField(default=True)
category = models.ForeignKey(Category, related_name='transactions', blank=True, null=True)
account = models.ForeignKey(Account, related_name='transactions')
The problem is with date, frequency and ends. With this info I can know all the dates in which transactions occurs and use it to fill a cashflow table. Doing things this way involves creating a lot of structures(dictionaries, lists and tuples) and iterating them a lot. Maybe there is a very simple way of solving this with the actual schema, but I couldn't realize how.
I think that the app would be easier to code if, at the creation of a transaction, I could save all the dates in the db. I don't know if it's possible or if it's a good idea.
I'm reading a book about google app engine and the datastore's multivalued properties. What do you think about this for solving my problem?.
Edit: I didn't know about the PickleField. I'm now reading about it, maybe I could use it to store all the transaction's datetime objects.
Edit2: This is an excerpt of my cashflow2 view(sorry for the horrible code):
def cashflow2(request, account_name="Initial"):
if account_name == "Initial":
uri = "/cashflow/new_account"
return HttpResponseRedirect(uri)
month_info = {}
cat_info = {}
m_y_list = [] # [(month,year),]
trans = []
min, max = [] , []
account = Account.objects.get(name=account_name, user=request.user)
categories = account.categories.all()
for year in range(2006,2017):
for month in range(1,13):
month_info[(month, year)] = [0, 0, 0]
for cat in categories:
cat_info[(cat, month, year)] = 0
previous_months = 1 # previous months from actual
next_months = 5
dates_list = month_year_list(previous_month, next_months) # Returns [(month,year)] from the requested range
m_y_list = [(date.month, date.year) for date in month_year_list(1,5)]
min, max = dates_list[0], dates_list[-1]
INCOME = 0
EXPENSE = 1
ONHAND = 2
transacs_in_dates = []
txs = account.transactions.order_by('date')
for tx in txs:
monthyear = ()
monthyear = (tx.date.month, tx.date.year)
if tx.frequency == 0:
if tx.type == 0:
month_info[monthyear][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
else:
month_info[monthyear][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
if monthyear in lista_m_a:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
elif tx.frequency == 4: # frequency = 'Monthly'
months_dif = relativedelta.relativedelta(tx.ends, tx.date).months
if tx.ends.day < tx.date.day:
months_dif += 1
years_dif = relativedelta.relativedelta(tx.ends, tx.date).years
dif = months_dif + (years_dif*12)
dates_range = dif + 1
for i in range(dates_range):
dt = tx.date+relativedelta.relativedelta(months=+i)
if (dt.month, dt.year) in m_y_list:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
if tx.type == 0:
month_info[(fch.month,fch.year)][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
else:
month_info[(fch.month,fch.year)][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
import operator
thelist = []
thelist = sorted((my + tuple(v) for my, v in month_info.iteritems()),
key = operator.itemgetter(1, 0))
thelistlist = []
for atuple in thelist:
thelistlist.append(list(atuple))
for i in range(len(thelistlist)):
if i != 0:
thelistlist[i][4] = thelistlist[i-1][2] - thelistlist[i-1][3] + thelistlist[i-1][4]
list = []
for el in thelistlist:
if (el[0],el[1]) in lista_m_a:
list.append(el)
transactions = account.transactions.all()
cats_in_dates_income = []
cats_in_dates_expense = []
for t in transacs_in_dates:
if t.category and t.type == 0:
if t.category not in cats_in_dates_income:
cats_in_dates_income.append(t.category)
elif t.category and t.type == 1:
if t.category not in cats_in_dates_expense:
cats_in_dates_expense.append(t.category)
cat_infos = []
for k, v in cat_info.items():
cat_infos.append((k[0], k[1], k[2], v))

Depends on how relevant App Engine is here. P.S. If you'd like to store pickled objects as well as JSON objects in the Google Datastore, check out these two code snippets:
http://kovshenin.com/archives/app-engine-json-objects-google-datastore/
http://kovshenin.com/archives/app-engine-python-objects-in-the-google-datastore/
Also note that the Google Datastore is a non-relational database, so you might have other trouble refactoring your code to switch to that.
Cheers and good luck!