I'm looking to run a hierarchical logistic regression in a Bayesian framework, but am having trouble adapting codes for my data. I have the great book "Doing Bayesian Data Analysis", but I'm not sure how to modify the script the author provided (will paste below) to rerun the analysis on my thesis data. Specifically, I have the following questions:
How do I add more terms to this model? I had 5 predictors in my thesis, and this model only has 2
How do I make it hierarchical/include multiple steps or blocks?
How do I add interaction terms among my predictors?
I believe this script is set up for metric predictors (the book's example used height and weight data); If I am running a mixture of metric and nominal independent variables, how do I need to adapt it?
any help on any of these questions would be wonderful.
# Jags-Ydich-XmetMulti-Mlogistic.R
# Accompanies the book:
# Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:
# A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.
source("DBDA2E-utilities.R")
#===============================================================================
genMCMC = function( data , xName="x" , yName="y" ,
numSavedSteps=10000 , thinSteps=1 , saveName=NULL ,
runjagsMethod=runjagsMethodDefault ,
nChains=nChainsDefault ) {
require(runjags)
#-----------------------------------------------------------------------------
# THE DATA.
y = data[,yName]
x = as.matrix(data[,xName],ncol=length(xName))
# Do some checking that data make sense:
if ( any( !is.finite(y) ) ) { stop("All y values must be finite.") }
if ( any( !is.finite(x) ) ) { stop("All x values must be finite.") }
cat("\nCORRELATION MATRIX OF PREDICTORS:\n ")
show( round(cor(x),3) )
cat("\n")
flush.console()
# Specify the data in a list, for later shipment to JAGS:
dataList = list(
x = x ,
y = y ,
Nx = dim(x)[2] ,
Ntotal = dim(x)[1]
)
#-----------------------------------------------------------------------------
# THE MODEL.
modelString = "
# Standardize the data:
data {
for ( j in 1:Nx ) {
xm[j] <- mean(x[,j])
xsd[j] <- sd(x[,j])
for ( i in 1:Ntotal ) {
zx[i,j] <- ( x[i,j] - xm[j] ) / xsd[j]
}
}
}
# Specify the model for standardized data:
model {
for ( i in 1:Ntotal ) {
# In JAGS, ilogit is logistic:
y[i] ~ dbern( ilogit( zbeta0 + sum( zbeta[1:Nx] * zx[i,1:Nx] ) ) )
}
# Priors vague on standardized scale:
zbeta0 ~ dnorm( 0 , 1/2^2 )
for ( j in 1:Nx ) {
zbeta[j] ~ dnorm( 0 , 1/2^2 )
}
# Transform to original scale:
beta[1:Nx] <- zbeta[1:Nx] / xsd[1:Nx]
beta0 <- zbeta0 - sum( zbeta[1:Nx] * xm[1:Nx] / xsd[1:Nx] )
}
" # close quote for modelString
# Write out modelString to a text file
writeLines( modelString , con="TEMPmodel.txt" )
#-----------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
# Let JAGS do it...
#-----------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "beta0" , "beta" ,
"zbeta0" , "zbeta" )
adaptSteps = 500 # Number of steps to "tune" the samplers
burnInSteps = 1000
runJagsOut <- run.jags( method=runjagsMethod ,
model="TEMPmodel.txt" ,
monitor=parameters ,
data=dataList ,
#inits=initsList ,
n.chains=nChains ,
adapt=adaptSteps ,
burnin=burnInSteps ,
sample=ceiling(numSavedSteps/nChains) ,
thin=thinSteps ,
summarise=FALSE ,
plots=FALSE )
codaSamples = as.mcmc.list( runJagsOut )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
if ( !is.null(saveName) ) {
save( codaSamples , file=paste(saveName,"Mcmc.Rdata",sep="") )
}
return( codaSamples )
} # end function
#===============================================================================
smryMCMC = function( codaSamples ,
saveName=NULL ) {
summaryInfo = NULL
mcmcMat = as.matrix(codaSamples)
paramName = colnames(mcmcMat)
for ( pName in paramName ) {
summaryInfo = rbind( summaryInfo , summarizePost( mcmcMat[,pName] ) )
}
rownames(summaryInfo) = paramName
if ( !is.null(saveName) ) {
write.csv( summaryInfo , file=paste(saveName,"SummaryInfo.csv",sep="") )
}
return( summaryInfo )
}
#===============================================================================
plotMCMC = function( codaSamples , data , xName="x" , yName="y" ,
showCurve=FALSE , pairsPlot=FALSE ,
saveName=NULL , saveType="jpg" ) {
# showCurve is TRUE or FALSE and indicates whether the posterior should
# be displayed as a histogram (by default) or by an approximate curve.
# pairsPlot is TRUE or FALSE and indicates whether scatterplots of pairs
# of parameters should be displayed.
#-----------------------------------------------------------------------------
y = data[,yName]
x = as.matrix(data[,xName])
mcmcMat = as.matrix(codaSamples,chains=TRUE)
chainLength = NROW( mcmcMat )
zbeta0 = mcmcMat[,"zbeta0"]
zbeta = mcmcMat[,grep("^zbeta$|^zbeta\\[",colnames(mcmcMat))]
if ( ncol(x)==1 ) { zbeta = matrix( zbeta , ncol=1 ) }
beta0 = mcmcMat[,"beta0"]
beta = mcmcMat[,grep("^beta$|^beta\\[",colnames(mcmcMat))]
if ( ncol(x)==1 ) { beta = matrix( beta , ncol=1 ) }
#-----------------------------------------------------------------------------
if ( pairsPlot ) {
# Plot the parameters pairwise, to see correlations:
openGraph()
nPtToPlot = 1000
plotIdx = floor(seq(1,chainLength,by=chainLength/nPtToPlot))
panel.cor = function(x, y, digits=2, prefix="", cex.cor, ...) {
usr = par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt = format(c(r, 0.123456789), digits=digits)[1]
txt = paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex=1.5 ) # was cex=cex.cor*r
}
pairs( cbind( beta0 , beta )[plotIdx,] ,
labels=c( "beta[0]" ,
paste0("beta[",1:ncol(beta),"]\n",xName) ) ,
lower.panel=panel.cor , col="skyblue" )
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"PostPairs",sep=""), type=saveType)
}
}
#-----------------------------------------------------------------------------
# Data with posterior predictive:
# If only 1 predictor:
if ( ncol(x)==1 ) {
openGraph(width=7,height=6)
par( mar=c(3.5,3.5,2,1) , mgp=c(2.0,0.7,0) )
plot( x[,1] , y , xlab=xName[1] , ylab=yName ,
cex=2.0 , cex.lab=1.5 , col="black" , main="Data with Post. Pred." )
abline(h=0.5,lty="dotted")
cVec = floor(seq(1,chainLength,length=30))
xWid=max(x)-min(x)
xComb = seq(min(x)-0.1*xWid,max(x)+0.1*xWid,length=201)
for ( cIdx in cVec ) {
lines( xComb , 1/(1+exp(-(beta0[cIdx]+beta[cIdx,1]*xComb ))) , lwd=1.5 ,
col="skyblue" )
xInt = -beta0[cIdx]/beta[cIdx,1]
arrows( xInt,0.5, xInt,-0.04, length=0.1 , col="skyblue" , lty="dashed" )
}
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"DataThresh",sep=""), type=saveType)
}
}
# If only 2 predictors:
if ( ncol(x)==2 ) {
openGraph(width=7,height=7)
par( mar=c(3.5,3.5,2,1) , mgp=c(2.0,0.7,0) )
plot( x[,1] , x[,2] , pch=as.character(y) , xlab=xName[1] , ylab=xName[2] ,
col="black" , main="Data with Post. Pred.")
cVec = floor(seq(1,chainLength,length=30))
for ( cIdx in cVec ) {
abline( -beta0[cIdx]/beta[cIdx,2] , -beta[cIdx,1]/beta[cIdx,2] , col="skyblue" )
}
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"DataThresh",sep=""), type=saveType)
}
}
#-----------------------------------------------------------------------------
# Marginal histograms:
decideOpenGraph = function( panelCount , saveName , finished=FALSE ,
nRow=1 , nCol=3 ) {
# If finishing a set:
if ( finished==TRUE ) {
if ( !is.null(saveName) ) {
saveGraph( file=paste0(saveName,ceiling((panelCount-1)/(nRow*nCol))),
type=saveType)
}
panelCount = 1 # re-set panelCount
return(panelCount)
} else {
# If this is first panel of a graph:
if ( ( panelCount %% (nRow*nCol) ) == 1 ) {
# If previous graph was open, save previous one:
if ( panelCount>1 & !is.null(saveName) ) {
saveGraph( file=paste0(saveName,(panelCount%/%(nRow*nCol))),
type=saveType)
}
# Open new graph
openGraph(width=nCol*7.0/3,height=nRow*2.0)
layout( matrix( 1:(nRow*nCol) , nrow=nRow, byrow=TRUE ) )
par( mar=c(4,4,2.5,0.5) , mgp=c(2.5,0.7,0) )
}
# Increment and return panel count:
panelCount = panelCount+1
return(panelCount)
}
}
# Original scale:
panelCount = 1
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMarg") )
histInfo = plotPost( beta0 , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(beta[0]) , main="Intercept" )
for ( bIdx in 1:ncol(beta) ) {
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMarg") )
histInfo = plotPost( beta[,bIdx] , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(beta[.(bIdx)]) , main=xName[bIdx] )
}
panelCount = decideOpenGraph( panelCount , finished=TRUE , saveName=paste0(saveName,"PostMarg") )
# Standardized scale:
panelCount = 1
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMargZ") )
histInfo = plotPost( zbeta0 , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(z*beta[0]) , main="Intercept" )
for ( bIdx in 1:ncol(beta) ) {
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMargZ") )
histInfo = plotPost( zbeta[,bIdx] , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(z*beta[.(bIdx)]) , main=xName[bIdx] )
}
panelCount = decideOpenGraph( panelCount , finished=TRUE , saveName=paste0(saveName,"PostMargZ") )
#-----------------------------------------------------------------------------
}
#===============================================================================
My code works for the first few iterations, but after a few times through the while loop, it seems that my array elements are being deleted.
I'm taking numbers off the array constructed from the input parameters and all I can tell is that when I get to a number which was passed in twice, I get an error.
I am calling my script like this
./branchandboundNoComments.pl 1 2 3 4 5 5 7 7 9 9 10 10 12 14 19
I should get this as output
0, 7, 9, 10, 14, 19
This is my script
#!/usr/bin/perl -w
use strict;
my #input = #ARGV;
my $maxAll = $input[-1];
$#input = $#input - 1;
my #multiset = ( 0, $maxAll );
my #stack;
my $rotation = 0; # this is 0,1, or 2.
while ( #input != 0 ) {
my $max = $input[-1];
my #deltamultiset;
for ( my $i = 1; $i <= $#multiset; $i++ ) {
push #deltamultiset, $multiset[$i] - $max;
}
push #deltamultiset, $max;
my #deltamultiset2;
for ( my $i = 1; $i <= $#multiset; $i++ ) {
push #deltamultiset2, $multiset[$i] - ( $maxAll - $max );
}
push #deltamultiset2, $max;
if ( subset( \#deltamultiset, \#input ) and $rotation == 0 ) {
for ( my $i = 0; $i < $#deltamultiset; $i++ ) {
pop #input;
}
push #multiset, $max;
push #stack, $max;
push #stack, 0;
}
elsif ( subset( \#deltamultiset2, \#input ) and $rotation <= 1 ) {
for ( my $j = 0; $j < $#deltamultiset; $j++ ) {
pop #input;
}
push #multiset, ( $maxAll - $max );
push #stack, ( $maxAll - $max );
push #stack, 1;
$rotation = 0;
}
elsif ( #stack != 0 ) {
$rotation = $stack[-1];
$#stack--;
$max = $stack[-1];
$#stack--;
$rotation++;
for ( my $i = 0; $i < $#multiset; $i++ ) {
if ( $multiset[$i] == $max ) {
delete $multiset[$i];
last;
}
}
for ( my $i = 0; $i < $#deltamultiset; $i++ ) {
push #input, $deltamultiset[$i];
}
}
else {
print "no solutions \n";
exit;
}
}
print "#multiset is a solution \n";
sub subset {
my ( $deltamultisetSubref, $multisetSubref ) = #_;
my #deltamultisetSub = #{$deltamultisetSubref};
my #multisetSub = #{$multisetSubref};
while ( #deltamultisetSub != 0 ) {
for ( my $i = $#multisetSub; $i >= -1; $i-- ) {
if ( $multisetSub[$i] == $deltamultisetSub[-1] ) {
pop #deltamultisetSub;
$#multisetSub--;
last;
}
if ( $i == -1 ) {
return 0;
}
}
}
return 1;
}
This is what is output
Use of uninitialized value in subtraction (-) at ./branchandboundNoComments.pl line 20.
Use of uninitialized value in subtraction (-) at ./branchandboundNoComments.pl line 26.
no solutions
I can't understand the algorithm you're trying to implement, so there are probably more errors, but the immediate problem is that the statement
delete $multiset[$i]
won't remove that element from the array unless it is the last element; otherwise the array stays the same length, exists on that element will return false, and it will evaluate to undef
If you want to remove the element, which seems most likely, then you want
splice #multiset, $i, 1;
But I have tested your code with that fix in place, and while it no longer produces Use of uninitialized value in subtraction errors, the result is still
no solutions
Unfortunately I can't understand what you're trying to implement, and can't make any useful guesses about what may be wrong unless you can offer me a description of the underlying algorithm
I'm working on a 20 year old project with some ... interesting problems, among them: there's some shared objects with circular dependencies.
I'm attempting to map out the relationships between all the libraries, but it would be rather helpful if there's an existing tool capable of searching a list of libraries to see what can satisfy the missing dependencies.
For reference, they got around the problem by doing something like the following:
# True list of dependencies:
A: B
B: A
C: A
# Dependencies used in practice:
A:
B: A
C: A B
I haven't tested the following code, since I've just attempted to re-write this from memory, but the one I wrote to solve this earlier (it looks roughly like this one) works fine:
#!/usr/bin/env perl
using IPC::Open3;
my $queryFile = $ARGV[0];
shift;
my %missingSymbols = getSymbols( "nm -Aau", $queryFile );
my %symtbl;
foreach $lib ( #ARGV ) {
my %temp = getSymbols( "nm -Aa --defined-only", $lib );
foreach $key ( keys( %temp ) ) {
$symtbl{$key} = (defined($symtbl{$key}) ? "${symtbl{$key}} " : "")
. $temp{$key};
}
}
my %dependencies;
foreach $symbol ( keys( %missingSymbols ) ) {
if( defined( $symtbl{$symbol} ) ) {
foreach $lib ( split( / +/, $symtbl{$symbol} ) ) {
if( !defined( $dependencies{$lib} ) ) {
$dependencies{$lib} = 1;
}
}
}
}
if( scalar( keys( %dependencies ) ) > 0 ) {
print( "$queryFile depends on one or more of the following libs:\n\n" );
print join( "\n", sort( keys( %dependencies ) ) ) . "\n\n";
} else {
print( "Unable to resolve dependencies for $queryFile.\n\n" );
}
# Done.
sub getSymbols {
my $cmd = shift;
my $fname = shift;
checkFileType( $fname );
open3( IN, OUT, ERR, "$cmd $fname" );
my %symhash;
close( IN );
# If you leave ERR open and nm prints to STDERR, reads from
# OUT can block. You could make reads from both handles be
# non-blocking so you could examine STDERR if needed, but I
# don't need to.
close( ERR );
while( <OUT> ) {
chomp;
if( m/^(?:[^:]*:)+[a-zA-Z0-9]*\s*[a-zA-Z] ([^\s]+)$/ ) {
my $temp = defined( $symhash{$1} ) ? "${symhash{$1}} " : "";
$symhash{$1} = $temp . $fname;
}
}
close( OUT );
return %symhash;
}
sub checkFileType {
my $fname = shift;
die "$fname does not exist\n" if( ! -e $fname );
die "Not an ELF or archive file\n" if( `file $fname` !~ m/ELF| ar archive/ );
}