Binary search sort, Time out - arrays

Climbing the Leaderboard. Terminated due to timeout :(
A HackerRank challenge of algorithm category.
My approach: To reduce the overhead when large arrays as input, I used map() and a Binar search function with it. No luck. I got 7/11 test cases passed. Can I ask for some help to improve my code. The code below is my solution so far. I need help to improve it.
Problem description
class Score_board:
def climbingLeaderboard(self, scores, alice):
self.boardScores = sorted(set(scores), reverse=True)
alice_rank = list(map(self.rankBoard,alice))
return alice_rank
def rankBoard(self, current):
score_bank = self.boardScores
midIndex = len(score_bank)//2
while midIndex > 0:
if current in score_bank:
return(self.boardScores.index(current)+1)
elif current < score_bank[midIndex]:
score_bank = score_bank[midIndex:]
midIndex = len(score_bank)//2
elif current > score_bank[midIndex]:
score_bank = score_bank[:midIndex]
midIndex = len(score_bank)//2
else:
boardIndex = self.boardScores.index(score_bank[0])
if current in score_bank:
return(self.boardScores.index(current)+1)
elif current > score_bank[0] and boardIndex == 0:
return(1)
elif current < score_bank[0] and boardIndex == (len(self.boardScores)-1):
return(boardIndex + 2)
elif current > score_bank[0]:
return(boardIndex)
elif current < score_bank[0]:
return(boardIndex + 2)
if __name__ == '__main__':
scores_count = int(input())
scores = list(map(int, input().rstrip().split()))
alice_count = int(input())
alice = list(map(int, input().rstrip().split()))
coord = Score_board()
result = coord.climbingLeaderboard(scores, alice)
print('\n'.join(map(str, result)))
print('\n')

Related

Restricted boltzmann machine - array

I'm doing a college assignment about RBM restricted Boltzmann machines. but this code error.I am confused how to get this code working. can anybody help me about this error?
def reconstruct(self, v):
h = sigmoid(np.dot(v, self.W) + self.hbias)
reconstructed_v = sigmoid(np.dot(h, self.W.T) + self.vbias)
return reconstructed_v
def test_rbm(learning_rate=0.1, k=1, training_epochs=10):
data = datainput
rng = np.random.RandomState(123)
# construct RBM
rbm = RBM(input=data, n_visible=40, n_hidden=20, np_rng=rng)
# train
for epoch in range(training_epochs):
rbm.contrastive_divergence(lr=learning_rate, k=k)
cost = rbm.get_reconstruction_cross_entropy()
print ('Training epoch %d, cost is ' % epoch, cost, file = sys.stderr)
# test
v = datatarget
print (rbm.reconstruct(v))
if __name__ == "__main__":
test_rbm()
ValueError: shapes (1979,1) and (40,20) not aligned: 1 (dim 1) != 40 (dim 0)
at run time print(rbm.reconstruct(v)) error line in h = sigmoid(np.dot(v, self.W) + self.hbias)

Ruby Array Reference Function [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 6 years ago.
Improve this question
I don't understand the Ruby reference relationship of arrays variables (best and vector) with the return of the functions (fitness or bitstring for example).
Concidere this case:
best[:fitness]
At where best is array and fitness is a function, what will be required of the variable ? and what will be referenced by the function ?
See the complete code:
def objective_function(vector)
return vector.inject(0.0) {|sum, x| sum + (x ** 2.0)}
end
def random_bitstring(num_bits)
return (0...num_bits).inject(""){|s,i| s<<((rand<0.5) ? "1" : "0")}
end
def decode(bitstring, search_space, bits_per_param)
vector = []
search_space.each_with_index do |bounds, i|
off, sum = i*bits_per_param, 0.0
param = bitstring[off...(off+bits_per_param)].reverse
param.size.times do |j|
sum += ((param[j].chr=='1') ? 1.0 : 0.0) * (2.0 ** j.to_f)
end
min, max = bounds
vector << min + ((max-min)/((2.0**bits_per_param.to_f)-1.0)) * sum
end
return vector
end
def fitness(candidate, search_space, param_bits)
candidate[:vector]=decode(candidate[:bitstring], search_space, param_bits)
candidate[:fitness] = objective_function(candidate[:vector])
end
def binary_tournament(pop)
i, j = rand(pop.size), rand(pop.size)
j = rand(pop.size) while j==i
return (pop[i][:fitness] < pop[j][:fitness]) ? pop[i] : pop[j]
end
def point_mutation(bitstring, rate=1.0/bitstring.size)
child = ""
bitstring.size.times do |i|
bit = bitstring[i].chr
child << ((rand()<rate) ? ((bit=='1') ? "0" : "1") : bit)
end
return child
end
def crossover(parent1, parent2, rate)
return ""+parent1 if rand()>=rate
child = ""
parent1.size.times do |i|
child << ((rand()<0.5) ? parent1[i].chr : parent2[i].chr)
end
return child
end
def reproduce(selected, pop_size, p_cross, p_mut)
children = []
selected.each_with_index do |p1, i|
p2 = (i.modulo(2)==0) ? selected[i+1] : selected[i-1]
p2 = selected[0] if i == selected.size-1
child = {}
child[:bitstring] = crossover(p1[:bitstring], p2[:bitstring], p_cross)
child[:bitstring] = point_mutation(child[:bitstring], p_mut)
children << child
break if children.size >= pop_size
end
return children
end
def bitclimber(child, search_space, p_mut, max_local_gens, bits_per_param)
current = child
max_local_gens.times do
candidate = {}
candidate[:bitstring] = point_mutation(current[:bitstring], p_mut)
fitness(candidate, search_space, bits_per_param)
current = candidate if candidate[:fitness] <= current[:fitness]
end
return current
end
def search(max_gens, search_space, pop_size, p_cross, p_mut, max_local_gens, p_local, bits_per_param=16)
pop = Array.new(pop_size) do |i| {:bitstring=>random_bitstring(search_space.size*bits_per_param)}
end
pop.each{|candidate| fitness(candidate, search_space, bits_per_param) }
gen, best = 0, pop.sort{|x,y| x[:fitness] <=> y[:fitness]}.first
max_gens.times do |gen|
selected = Array.new(pop_size){|i| binary_tournament(pop)}
children = reproduce(selected, pop_size, p_cross, p_mut)
children.each{|cand| fitness(cand, search_space, bits_per_param)}
pop = []
children.each do |child|
if rand() < p_local
child = bitclimber(child, search_space, p_mut, max_local_gens, bits_per_param)
end
pop << child
end
pop.sort!{|x,y| x[:fitness] <=> y[:fitness]}
best = pop.first if pop.first[:fitness] <= best[:fitness]
puts ">gen=#{gen}, f=#{best[:fitness]}, b=#{best[:bitstring]}"
end
return best
end
if __FILE__ == $0
# problem configuration
problem_size = 3
search_space = Array.new(problem_size) {|i| [-5, +5]}
# algorithm configuration
max_gens = 100
pop_size = 100
p_cross = 0.98
p_mut = 1.0/(problem_size*16).to_f
max_local_gens = 20
p_local = 0.5
# execute the algorithm
best = search(max_gens, search_space, pop_size, p_cross, p_mut, max_local_gens, p_local)
puts "done! Solution: f=#{best[:fitness]}, b=#{best[:bitstring]}, v=#{best[:vector].inspect}"
end
ps: This code is an implementation of a Memetic Algorithms , which in turn is a variation of an Evolutionary Algorithm.
A Memetic Altorithms technically search the best solution to a problem in a number of solutions from an integrated global search results to an optimization every cycle / generation that selects variations of a good solution from a local search results.
My goal is to translate the code to Matlab programming.

OpenMDAO v0.13: performing an optimization when using multiple instances of a components initiated in a loop

I am setting up an optimization in OpenMDAO v0.13 using several components that are used many times. My assembly seems to be working just fine with the default driver, but when I run with an optimizer it does not solve. The optimizer simply runs with the inputs given and returns the answer using those inputs. I am not sure what the issue is, but I would appreciate any insights. I have included a simple code mimicking my structure that reproduces the error. I think the problem is in the connections, summer.fs does not update after initialization.
from openmdao.main.api import Assembly, Component
from openmdao.lib.datatypes.api import Float, Array, List
from openmdao.lib.drivers.api import DOEdriver, SLSQPdriver, COBYLAdriver, CaseIteratorDriver
from pyopt_driver.pyopt_driver import pyOptDriver
import numpy as np
class component1(Component):
x = Float(iotype='in')
y = Float(iotype='in')
term1 = Float(iotype='out')
a = Float(iotype='in', default_value=1)
def execute(self):
x = self.x
a = self.a
term1 = a*x**2
self.term1 = term1
print "In comp1", self.name, self.a, self.x, self.term1
def list_deriv_vars(self):
return ('x',), ('term1',)
def provideJ(self):
x = self.x
a = self.a
dterm1_dx = 2.*a*x
J = np.array([[dterm1_dx]])
print 'In comp1, J = %s' % J
return J
class component2(Component):
x = Float(iotype='in')
y = Float(iotype='in')
term1 = Float(iotype='in')
f = Float(iotype='out')
def execute(self):
y = self.y
x = self.x
term1 = self.term1
f = term1 + x + y**2
self.f = f
print "In comp2", self.name, self.x, self.y, self.term1, self.f
class summer(Component):
total = Float(iotype='out', desc='sum of all f values')
def __init__(self, size):
super(summer, self).__init__()
self.size = size
self.add('fs', Array(np.ones(size), iotype='in', desc='f values from all cases'))
def execute(self):
self.total = sum(self.fs)
print 'In summer, fs = %s and total = %s' % (self.fs, self.total)
class assembly(Assembly):
x = Float(iotype='in')
y = Float(iotype='in')
total = Float(iotype='out')
def __init__(self, size):
super(assembly, self).__init__()
self.size = size
self.add('a_vals', Array(np.zeros(size), iotype='in', dtype='float'))
self.add('fs', Array(np.zeros(size), iotype='out', dtype='float'))
print 'in init a_vals = %s' % self.a_vals
def configure(self):
# self.add('driver', SLSQPdriver())
self.add('driver', pyOptDriver())
self.driver.optimizer = 'SNOPT'
# self.driver.pyopt_diff = True
#create this first, so we can connect to it
self.add('summer', summer(size=len(self.a_vals)))
self.connect('summer.total', 'total')
print 'in configure a_vals = %s' % self.a_vals
# create instances of components
for i in range(0, self.size):
c1 = self.add('comp1_%d'%i, component1())
c1.missing_deriv_policy = 'assume_zero'
c2 = self.add('comp2_%d'%i, component2())
self.connect('a_vals[%d]' % i, 'comp1_%d.a' % i)
self.connect('x', ['comp1_%d.x'%i, 'comp2_%d.x'%i])
self.connect('y', ['comp1_%d.y'%i, 'comp2_%d.y'%i])
self.connect('comp1_%d.term1'%i, 'comp2_%d.term1'%i)
self.connect('comp2_%d.f'%i, 'summer.fs[%d]'%i)
self.driver.workflow.add(['comp1_%d'%i, 'comp2_%d'%i])
self.connect('summer.fs[:]', 'fs[:]')
self.driver.workflow.add(['summer'])
# set up main driver (optimizer)
self.driver.iprint = 1
self.driver.maxiter = 100
self.driver.accuracy = 1.0e-6
self.driver.add_parameter('x', low=-5., high=5.)
self.driver.add_parameter('y', low=-5., high=5.)
self.driver.add_objective('summer.total')
if __name__ == "__main__":
""" the result should be -1 at (x, y) = (-0.5, 0) """
import time
from openmdao.main.api import set_as_top
a_vals = np.array([1., 1., 1., 1.])
test = set_as_top(assembly(size=len(a_vals)))
test.a_vals = a_vals
print test.a_vals
test.x = 2.
test.y = 2.
tt = time.time()
test.run()
print "Elapsed time: ", time.time()-tt, "seconds"
print 'result = ', test.summer.total
print '(x, y) = (%s, %s)' % (test.x, test.y)
print test.fs
I played around with your model, and found that the following line caused problems:
#self.connect('summer.fs[:]', 'fs[:]')
When I commented it out, I got the optimization to move.
I am not sure what is happening there, but the graph transformations sometimes have some issues with component input nodes that are promoted as outputs on the assembly boundary. If you still want those values to be available on the assembly, you could try promoting the outputs from the comp2_n components instead.

customizable PageRank algorithm in Gremlin?

I'm looking for a Gremlin version of a customizable PageRank algorithm. There are a few old versions out there, one (from: http://www.infoq.com/articles/graph-nosql-neo4j) is pasted below. I'm having trouble fitting the flow into the current GremlinGroovyPipeline-based structure. What is the modernized equivalent of this or something like it?
$_g := tg:open()
g:load('data/graph-example-2.xml')
$m := g:map()
$_ := g:key('type', 'song')[g:rand-nat()]
repeat 2500
$_ := ./outE[#label='followed_by'][g:rand-nat()]/inV
if count($_) > 0
g:op-value('+',$m,$_[1]/#name, 1.0)
end
if g:rand-real() > 0.85 or count($_) = 0
$_ := g:key('type', 'song')[g:rand-nat()]
end
end
g:sort($m,'value',true())
Another version is available on slide 55 of http://www.slideshare.net/slidarko/gremlin-a-graphbased-programming-language-3876581. The ability to use the if statements and change the traversal based on them is valuable for customization.
many thanks
I guess I'll answer it myself in case somebody else needs it. Be warned that this is not a very efficient PageRank calculation. It should only be viewed as a learning example.
g = new TinkerGraph()
g.loadGraphML('graph-example-2.xml')
m = [:]
g.V('type','song').sideEffect{m[it.name] = 0}
// pick a random song node that has 'followed_by' edge
def randnode(g) {
return(g.V('type','song').filter{it.outE('followed_by').hasNext()}.shuffle[0].next())
}
v = randnode(g)
for(i in 0..2500) {
v = v.outE('followed_by').shuffle[0].inV
v = v.hasNext()?v.next():null
if (v != null) {
m[v.name] += 1
}
if ((Math.random() > 0.85) || (v == null)) {
v = randnode(g)
}
}
msum = m.values().sum()
m.each{k,v -> m[k] = v / msum}
println "top 10 songs: (normalized PageRank)"
m.sort {-it.value }[0..10]
Here's a good reference for a simplified one-liner:
https://groups.google.com/forum/m/#!msg/gremlin-users/CRIlDpmBT7g/-tRgszCTOKwJ
(as well as the Gremlin wiki: https://github.com/tinkerpop/gremlin/wiki)

Is it better to change the db schema?

I'm building a web app with django. I use postgresql for the db. The app code is getting really messy(my begginer skills being a big factor) and slow, even when I run the app locally.
This is an excerpt of my models.py file:
REPEATS_CHOICES = (
(NEVER, 'Never'),
(DAILY, 'Daily'),
(WEEKLY, 'Weekly'),
(MONTHLY, 'Monthly'),
...some more...
)
class Transaction(models.Model):
name = models.CharField(max_length=30)
type = models.IntegerField(max_length=1, choices=TYPE_CHOICES) # 0 = 'Income' , 1 = 'Expense'
amount = models.DecimalField(max_digits=12, decimal_places=2)
date = models.DateField(default=date.today)
frequency = models.IntegerField(max_length=2, choices=REPEATS_CHOICES)
ends = models.DateField(blank=True, null=True)
active = models.BooleanField(default=True)
category = models.ForeignKey(Category, related_name='transactions', blank=True, null=True)
account = models.ForeignKey(Account, related_name='transactions')
The problem is with date, frequency and ends. With this info I can know all the dates in which transactions occurs and use it to fill a cashflow table. Doing things this way involves creating a lot of structures(dictionaries, lists and tuples) and iterating them a lot. Maybe there is a very simple way of solving this with the actual schema, but I couldn't realize how.
I think that the app would be easier to code if, at the creation of a transaction, I could save all the dates in the db. I don't know if it's possible or if it's a good idea.
I'm reading a book about google app engine and the datastore's multivalued properties. What do you think about this for solving my problem?.
Edit: I didn't know about the PickleField. I'm now reading about it, maybe I could use it to store all the transaction's datetime objects.
Edit2: This is an excerpt of my cashflow2 view(sorry for the horrible code):
def cashflow2(request, account_name="Initial"):
if account_name == "Initial":
uri = "/cashflow/new_account"
return HttpResponseRedirect(uri)
month_info = {}
cat_info = {}
m_y_list = [] # [(month,year),]
trans = []
min, max = [] , []
account = Account.objects.get(name=account_name, user=request.user)
categories = account.categories.all()
for year in range(2006,2017):
for month in range(1,13):
month_info[(month, year)] = [0, 0, 0]
for cat in categories:
cat_info[(cat, month, year)] = 0
previous_months = 1 # previous months from actual
next_months = 5
dates_list = month_year_list(previous_month, next_months) # Returns [(month,year)] from the requested range
m_y_list = [(date.month, date.year) for date in month_year_list(1,5)]
min, max = dates_list[0], dates_list[-1]
INCOME = 0
EXPENSE = 1
ONHAND = 2
transacs_in_dates = []
txs = account.transactions.order_by('date')
for tx in txs:
monthyear = ()
monthyear = (tx.date.month, tx.date.year)
if tx.frequency == 0:
if tx.type == 0:
month_info[monthyear][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
else:
month_info[monthyear][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
if monthyear in lista_m_a:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
elif tx.frequency == 4: # frequency = 'Monthly'
months_dif = relativedelta.relativedelta(tx.ends, tx.date).months
if tx.ends.day < tx.date.day:
months_dif += 1
years_dif = relativedelta.relativedelta(tx.ends, tx.date).years
dif = months_dif + (years_dif*12)
dates_range = dif + 1
for i in range(dates_range):
dt = tx.date+relativedelta.relativedelta(months=+i)
if (dt.month, dt.year) in m_y_list:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
if tx.type == 0:
month_info[(fch.month,fch.year)][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
else:
month_info[(fch.month,fch.year)][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
import operator
thelist = []
thelist = sorted((my + tuple(v) for my, v in month_info.iteritems()),
key = operator.itemgetter(1, 0))
thelistlist = []
for atuple in thelist:
thelistlist.append(list(atuple))
for i in range(len(thelistlist)):
if i != 0:
thelistlist[i][4] = thelistlist[i-1][2] - thelistlist[i-1][3] + thelistlist[i-1][4]
list = []
for el in thelistlist:
if (el[0],el[1]) in lista_m_a:
list.append(el)
transactions = account.transactions.all()
cats_in_dates_income = []
cats_in_dates_expense = []
for t in transacs_in_dates:
if t.category and t.type == 0:
if t.category not in cats_in_dates_income:
cats_in_dates_income.append(t.category)
elif t.category and t.type == 1:
if t.category not in cats_in_dates_expense:
cats_in_dates_expense.append(t.category)
cat_infos = []
for k, v in cat_info.items():
cat_infos.append((k[0], k[1], k[2], v))
Depends on how relevant App Engine is here. P.S. If you'd like to store pickled objects as well as JSON objects in the Google Datastore, check out these two code snippets:
http://kovshenin.com/archives/app-engine-json-objects-google-datastore/
http://kovshenin.com/archives/app-engine-python-objects-in-the-google-datastore/
Also note that the Google Datastore is a non-relational database, so you might have other trouble refactoring your code to switch to that.
Cheers and good luck!

Resources