RSQLite RS-DBI driver: (error in statement: no such table: test) - database

I have just started using RSQLite for analysis of a very large survey data set using R and the survey package by Thomas Lumley. I am getting an error message that has been asked about before on Stack Overflow and the R help archive, but the solutions don't apply to my data (one solution was that the original poster was using POSIX data type, but my data doesn't have that). I don't think it is a problem with the survey package, rather I think I am doing something wrong with the database/table creation. One thing that may help, when I use the sample from my data that I posed below, I don't get an error with a SELECT query, but when I do the same thing with my full data set, I do get the same error. Here is a sample of my data and some reproducible code:
test=structure(list(household = c(0, 0, 0, 0, 0), NUMADULT = c(2L,
1L, 2L, 1L, 1L), CHILDREN = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), SEX = c(1L, 2L, 1L, 2L, 2L), X_STATE = c(36L, 5L,
53L, 41L, 10L), X_FINALWT = c(665.97647582, 53.293518032, 72.60538811,
61.223634396, 5.5921160216), AGE = c(30L, 65L, 9L, 49L, 48L),
X_INCOMG = structure(c(6L, 6L, 6L, 6L, 6L), .Label = c("1",
"2", "3", "4", "5", "9"), class = "factor"), X_MAM502Y = structure(c(NA,
1L, NA, NA, NA), .Label = c("1", "2", "9"), class = "factor"),
HLTHPLAN = structure(c(2L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), MEDCOST = structure(c(1L, 2L, 2L,
2L, 2L), .Label = c("1", "2"), class = "factor"), QLACTLM2 = c(2L,
2L, 2L, 2L, 2L), CTYCODE = structure(c(30L, 53L, 33L, 26L,
1L), .Label = c("1", "3", "5", "6", "7", "9", "10", "11",
"13", "14", "15", "17", "19", "20", "21", "23", "25", "27",
"28", "29", "30", "31", "33", "35", "37", "39", "41", "43",
"45", "47", "49", "51", "53", "55", "57", "59", "61", "63",
"65", "67", "69", "71", "73", "75", "77", "79", "81", "83",
"85", "86", "87", "89", "91", "93", "95", "97", "99", "101",
"103", "105", "107", "109", "111", "113", "115", "117", "119",
"121", "123", "125", "127", "129", "131", "133", "135", "137",
"139", "141", "143", "145", "147", "149", "151", "153", "155",
"157", "159", "161", "163", "165", "167", "169", "171", "173",
"175", "177", "179", "181", "183", "185", "187", "189", "191",
"193", "195", "197", "199", "201", "205", "209", "215", "227",
"235", "245", "297", "303", "309", "339", "355", "439", "453",
"491", "510", "550", "590", "650", "700", "710", "740", "760",
"770", "777", "800", "810", "999", "203", "207", "217", "221",
"223", "275", "277", "295", "313", "381", "423", "680", "12",
"54", "186", "211", "213", "219", "225", "229", "231", "233",
"237", "239", "241", "247", "249", "251", "253", "255", "257",
"259", "261", "265", "267", "271", "273", "279", "281", "285",
"287", "289", "291", "293", "299", "305", "311", "321", "323",
"325", "329", "331", "337", "341", "343", "347", "349", "351",
"353", "361", "363", "365", "367", "371", "373", "375", "387",
"395", "397", "401", "407", "409", "415", "419", "427", "441",
"449", "451", "455", "457", "459", "463", "465", "467", "469",
"471", "473", "477", "479", "481", "485", "487", "489", "493",
"497", "499", "503", "520", "540", "570", "600", "630", "660",
"670", "683", "690", "730", "750", "775", "820", "830", "840",
"790"), class = "factor"), X_RACEGR2 = structure(c(1L, 1L,
NA, 1L, NA), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
PERSDOC2 = structure(c(3L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), POORHLTH = c(0, NA, NA, 0,
0), X_EDUCAG = structure(c(3L, 2L, 4L, 4L, 4L), .Label = c("1",
"2", "3", "4"), class = "factor"), X_PSU = c(2004006698L,
2004014294L, 2004100796L, 2004024220L, 2004005537L), X_STSTR = c(36011L,
5012L, 53271L, 41012L, 10011L), X_RFMAM2Y = structure(c(NA,
1L, NA, 1L, 1L), .Label = c("1", "2", "9"), class = "factor"),
X_RFSMOK3 = structure(c(2L, 1L, 1L, 2L, 1L), .Label = c("1",
"2"), class = "factor"), X_RFHLTH = structure(c(1L, 1L, 1L,
1L, 1L), .Label = c("1", "2", "3"), class = "factor"), YEAR = c(2004,
2004, 2004, 2004, 2004), bcccp = structure(c(2L, 2L, 2L,
2L, 1L), .Label = c("0", "1"), class = "factor"), pov.limit = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), cutoff = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), elig = c(NA, NA,
NA, NA, NA), bcccp_elig = c(NA, NA, NA, NA, NA)), .Names = c("household",
"NUMADULT", "CHILDREN", "SEX", "X_STATE", "X_FINALWT", "AGE",
"X_INCOMG", "X_MAM502Y", "HLTHPLAN", "MEDCOST", "QLACTLM2", "CTYCODE",
"X_RACEGR2", "PERSDOC2", "POORHLTH", "X_EDUCAG", "X_PSU", "X_STSTR",
"X_RFMAM2Y", "X_RFSMOK3", "X_RFHLTH", "YEAR", "bcccp", "pov.limit",
"cutoff", "elig", "bcccp_elig"), row.names = c(NA, 5L), class = "data.frame")
library(survey)
library(sqldf)
library(RSQLite)
drv=dbDriver('SQLite')
con=dbConnect(drv,'brfsagg.db')
dbWriteTable(con,'brfs0210',test)
dbListFields(con,'brfs0210') #This function works
sqldf("select SEX from brfs0210") #This works with my sample data but I get the same error message when I use the full data set.
dbExistsTable(con,'test') #This proves that the table exists
brfsvy=svydesign(id=~X_PSU, strata=~X_STSTR, weights=~X_FINALWT,nest=TRUE,
data='test',dbtype='SQLite',dbname=system.file('brfsagg.db',package='survey')) #This always generates the error message, regardless of whether I am using the test sample data or my full data set.

the r code that you are trying to write has already been written here with accompanying blog post here. why would you bother re-inventing the wheel? googling r brfss or import brfss into r gets you to those posts.
is there a reason you want to re-write everything from scratch yourself? there is lots of example syntax using SQLite with the survey package here ..here's how to fix this particular issue. :)
library(survey)
library(RSQLite)
db.filename <- 'brfsagg.db'
con <- dbConnect(SQLite(),db.filename)
dbWriteTable( con , 'test' , test )
brfsvy <-
svydesign(
id = ~X_PSU ,
strata = ~X_STSTR ,
weights = ~X_FINALWT ,
nest = TRUE ,
data = 'test' ,
dbtype = 'SQLite' ,
dbname = db.filename
)
svymean( ~ SEX , brfsvy )
options( 'survey.lonely.psu' = 'adjust' )
svymean( ~ SEX , brfsvy )
svymean( ~ factor( SEX ) , brfsvy )

Related

Ruby set hash inside the Hash for the Array of Hashes

I am working on Rails 6 API. This is what I get
"data": [
{
"invoice_details": {
"customer_name": "Dylan Sollfrank",
"invoice_number": "1060",
"invoice_status": "paid"
}
},
{
"transaction_number": "QB1589148496",
"customer_name": "Freeman Sporting Goods:55 Twin Lane",
"amount": {
"amount_to_pay": 86.4,
"payment_fee": 0.0
},
"created_time": "03:38 AM",
"created_date": "May 11, 2020",
"payment_method": "qb_payment",
"payment_status": "completed"
},
Following is my code
def get_payment_report_activity(invoice_transactions, timezone = Time.zone.name)
invoice_details = []
transaction_details = {}
amount = {}
invoice_transactions.group_by(&:paymentable_id).each do |key, transactions|
invoice = Invoice.find key
invoice_details.push(invoice_details:{
customer_name: invoice&.customer&.fully_qualified_name&.strip,
invoice_number: invoice&.doc_number,
invoice_status: invoice&.invoice_status
})
transactions.each do |transaction|
customer = transaction&.paymentable&.customer
amount[:amount_to_pay] = transaction&.amount_to_pay.to_f
amount[:payment_fee] = transaction&.payment_fee.to_f
transaction_details[:transaction_number] = transaction&.transaction_number
transaction_details[:customer_name] = customer&.fully_qualified_name&.strip
transaction_details[:amount] = amount
transaction_details[:created_time] = Customer.time_format(transaction.created_at.in_time_zone(timezone))
transaction_details[:created_date] = Customer.date_format(transaction.created_at.in_time_zone(timezone))
transaction_details[:payment_method] = transaction&.payment_method
transaction_details[:payment_status] = transaction&.payment_status
end
invoice_details << transaction_details
end
invoice_details
end
Now I need the hash transaction details inside the invoice_details hash label as transaction_details and there can be multiple transaction details inside the invoice_details
"data": [
{
"invoice_details": {
"customer_name": "Dylan Sollfrank",
"invoice_number": "1060",
"invoice_status": "paid",
"transaction_details: [{
"transaction_number": "QB1589148496",
"customer_name": "Freeman Sporting Goods:55 Twin Lane",
"amount": {
"amount_to_pay": 86.4,
"payment_fee": 0.0
},
"created_time": "03:38 AM",
"created_date": "May 11, 2020",
"payment_method": "qb_payment",
"payment_status": "completed"
},
{
"transaction_number": "QB1589148496",
"customer_name": "Freeman Sporting Goods:55 Twin Lane",
"amount": {
"amount_to_pay": 86.4,
"payment_fee": 0.0
},
"created_time": "03:38 AM",
"created_date": "May 11, 2020",
"payment_method": "qb_payment",
"payment_status": "completed"
}]
},
"invoice_details": {
"customer_name": "Dylan Sollfrank",
"invoice_number": "1060",
"invoice_status": "paid",
"transaction_details : {
"transaction_number": "QB1589148496",
"customer_name": "Freeman Sporting Goods:55 Twin Lane",
"amount": {
"amount_to_pay": 86.4,
"payment_fee": 0.0
},
"created_time": "03:38 AM",
"created_date": "May 11, 2020",
"payment_method": "qb_payment",
"payment_status": "completed"
}
},
}
you can try like this:
def get_payment_report_activity(invoice_transactions, timezone = Time.zone.name)
invoice_details = []
invoice_transactions.group_by(&:paymentable_id).each do |key, transactions|
invoice = Invoice.find key
transaction_details = []
transactions.each do |transaction|
transaction_hash = {}
amount_hash = {}
customer = transaction&.paymentable&.customer
amount_hash[:amount_to_pay] = transaction&.amount_to_pay.to_f
amount_hash[:payment_fee] = transaction&.payment_fee.to_f
transaction_hash[:transaction_number] = transaction&.transaction_number
transaction_hash[:customer_name] = customer&.fully_qualified_name&.strip
transaction_hash[:created_time] = Customer.time_format(transaction.created_at.in_time_zone(timezone))
transaction_hash[:created_date] = Customer.date_format(transaction.created_at.in_time_zone(timezone))
transaction_hash[:payment_method] = transaction&.payment_method
transaction_hash[:payment_status] = transaction&.payment_status
transaction_hash[:amount] = amount_hash
transaction_details << transaction_hash
end
invoice_details.push(invoice_details: {
customer_name: invoice&.customer&.fully_qualified_name&.strip,
invoice_number: invoice&.doc_number,
invoice_status: invoice&.invoice_status,
transaction_details: transaction_details
})
end
invoice_details
end

How to split a data.frame into an array by a factor?

If we want to split a data.frame by a "factor" f we usually do:
split(df1, df1$f)
But how do we do that when we want to split the data.frame into an array? I find my code a little awkward because of the two t, also the max could be a bit unreliable:
A <- array(t(as.matrix(df1)), dim=c(ncol(df1), max(df1$id), max(df1$f)))
apply(A, c(1, 3), t)
Is there a less complicated base R solution?
The following won't give me what I want, since str still yields lists.
as.array(split(df1, df1$f))
as.array(lapply(split(df1, df1$f), as.matrix))
as.array(lapply(split(df1, df1$f), function(x) matrix(unlist(x), nrow(x))))
Data
df1 <- structure(list(id = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L), f = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L), x1 = c(14L, 15L, 4L, 10L, 8L, 6L, 7L,
2L, 5L, 9L, 3L, 12L, 13L, 1L, 11L), x2 = c(12L, 15L, 6L, 9L,
1L, 14L, 11L, 2L, 7L, 4L, 8L, 5L, 10L, 13L, 3L)), class = "data.frame", row.names = c(NA,
-15L))
mylist = split(df1, df1$f)
dims = dim(mylist[[1]])
array(sapply(mylist, function(x){
m = as.matrix(x)
array(m, dim = dims)
}), dim = c(dims, length(mylist)))

Record Linkage with multiple datasets

The problem
fastLink and RecordLinkage packages do extremely well in matching records (rows) from database A to database B and vice-versa. The developers are working on extending from matching only 2 databases to multiple databases.
A simple example of both I gave here.
In the meantime, how would we go about matching multiple data frames? For example, I have multiple medical records of patients from clinic A, B, C, D, E, F, and I want to merge them into a single one.
A reproducible example:
dfA <-
structure(list(fname = c("Jafar", "Nemo", "Simba", "Belle", "Nala",
"Jasmine"), lname = c("Evil", "Water", "King", "Beauty", "Princess",
"Princess"), gender = c("M", "M", "M", "F", "F", "F"), dob = c(1987,
2000, 2011, 1989, 1970, 1989), city = c("Arabtown", "Atlantic",
"Sahara", "Nice", "Sahara", "Arabtown")), row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
dfB <-
structure(list(fname = c("Jafar Jr", "Nemo", "Simba", "Belle",
"Nala", "Jasmine"), lname = c("Evil", "Waterson", "King", "Beauty",
"Princess", "Princess of Arabtown"), gender = c("M", "M", "M",
"F", "F", "F"), dob = c(NA, 2000, 2011, NA, NA, 1989), city = c("Arabtown",
"Atlantica", "Sahara", "Nice-France", "Sahara", "Arabia")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
dfC <-
structure(list(fname = c("Jafar Jr", "Fishy", "Lion", "Belle",
"Sarabi", "Jasmine"), lname = c("Evil", "Waterpal", "King", "Beauty",
"Queen", NA), gender = c("M", "M", NA, "F", "F", "F"), dob = c(NA,
2000, 2011, NA, 1940, 1989), city = c("Arabia", NA, "Sahara",
"France", "Sahara", NA)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
dfD <-
structure(list(fname = c("Jafar Jr", "Nemo", "Simba", "Belle",
"Sarabi", "Jasmine"), lname = c("Evil", "Waterson", "King", "Beast",
"Queen", "Evil"), gender = c("M", "M", "M", "F", "F", "M"), dob = c(NA,
2000, 2011, 1989, NA, 1989), city = c("Arabtown", "Atlantica",
"Sahara", NA, "Sahara", "Arabtown")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
dfE <-
structure(list(fname = c("Jafar Jr", "Nemo", "Simba", "Belle",
"Nala", "Aladdin"), lname = c("Evil", "Pateron", NA, "Gaston",
NA, "Streetrat"), gender = c("M", NA, "M", "F", "F", "M"), dob = c(1987,
NA, NA, NA, 1970, 1989), city = c("Arabtown", "Atlantica", "Sahara",
"France", "Sahara", "Arabia")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
dfF <-
structure(list(fname = c("Jafar Jr", "Nemo", "Simba", "Belle",
"Nala", "Al"), lname = c("Evil", "Waterson", "Dead", "Beauty",
"Princess", "Streetrat"), gender = c("M", "M", NA, "F", "F",
"M"), dob = c(1987, 2000, 2011, NA, NA, 1989), city = c("Arabia",
"Atlantic", "Sahara", "Nice-France", "Sahara", "Arabia")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
Expected result :
In the end I want unique identified records :
1 Jafar Evil M 1987 Arabtown
2 Nemo Water M 2000 Atlantic
3 Simba King M 2011 Sahara
4 Belle Beauty F 1989 Nice
5 Nala Princess F 1970 Sahara
6 Jasmine Princess F 1989 Arabtown
7 Sarabi Queen F 1940 Sahara
8 Aladdin Streetrat M 1989 Arabia
Even if the result isn't as clean as above, it's alright. The goal is to find a unified record from all 6 records and belong to the same entity.
Both fastLink & RecordLinkage take care of deduping (removing duplicates).
How can I develop an approach to deal with more than two databases in this scenario?

Excel macros Help. Creating a new column based of arrays.

I am trying to write a Macros in excel to create a new column with these states divided into these regions. I keep getting runtime error 13
Here is the code I have so far.
Sub Region ()
Dim Pacific As Variant
Pacific = Array("WA", "OR", "ID", "CA", "NV", "AZ", "NM", "HI", "AK")
Dim Continental As Variant
Continental = Array("AR", "IA", "CO", "KS", "LA", "MS", "MT", "ND", "NE", "OK", "SD", "UT", "WY")
Dim SouthEast As Variant
SouthEast = Array("GA", "AL", "FL", "SC", "KY", "TN")
Dim Midwest As Variant
Midwest = Array("MN", "WI", "IL", "IN", "MI", "OH")
Dim NorthAtlantic As Variant
NorthAtlantic = Array("ME", "NH", "MA", "RI", "CT", "VT", "NY", "PA", "NJ", "DE", "MD", "WV", "VA", "NC")
Dim Texas As Variant
Texas = Array("TX”)
Dim state As String , result As String
score = Range("F1").Value
If state = Pacific Then
result = "PACIFIC"
ElseIf state = Continental Then
result = "Continental"
ElseIf state = SouthEast Then
result = "SouthEast"
ElseIf state = Midwest Then
result = "Midwest"
ElseIf state = NorthAtlantic Then
result = "North Atlantic"
ElseIf state = Texas Then
result = "Texas"
Else
result = "fail"
End If
Range("Z1").Value = result
End Sub
AFAIK, to search for the occurrence of a string within an array isn't a simple matter within VBA. You either have to use a loop, or possibly use WorksheetFunction.Match.
A simpler way may be to avoid arrays altogether - your code could be easily refactored to use a Select Case statement:
Sub Region ()
Dim state As String , result As String
state = Range("F1").Value
Select Case state
Case "WA", "OR", "ID", "CA", "NV", "AZ", "NM", "HI", "AK"
result = "PACIFIC"
Case "AR", "IA", "CO", "KS", "LA", "MS", "MT", "ND", "NE", "OK", "SD", "UT", "WY"
result = "Continental"
Case "GA", "AL", "FL", "SC", "KY", "TN"
result = "SouthEast"
Case "MN", "WI", "IL", "IN", "MI", "OH"
result = "Midwest"
Case "ME", "NH", "MA", "RI", "CT", "VT", "NY", "PA", "NJ", "DE", "MD", "WV", "VA", "NC"
result = "North Atlantic"
Case "TX"
result = "Texas"
Case Else
result = "fail"
End Select
Range("Z1").Value = result
End Sub
Note: You also had two code problems.
You had
score = Range("F1").Value
when I think you meant
state = Range("F1").Value
You had "TX” instead of "TX" - I'm not sure whether the ” causes a problem in your version of Excel, but it does in mine.
To extend this function so that it applies to all cells in column F, you will need to loop through each row:
Sub Region ()
Dim state As String , result As String
Dim lastRow As Long
Dim r As Long
With ActiveSheet
lastRow = .Cells(.Rows.Count, "F").End(xlUp).Row
For r = 1 to lastRow
state = .Cells(r, "F").Value
Select Case state
Case "WA", "OR", "ID", "CA", "NV", "AZ", "NM", "HI", "AK"
result = "PACIFIC"
Case "AR", "IA", "CO", "KS", "LA", "MS", "MT", "ND", "NE", "OK", "SD", "UT", "WY"
result = "Continental"
Case "GA", "AL", "FL", "SC", "KY", "TN"
result = "SouthEast"
Case "MN", "WI", "IL", "IN", "MI", "OH"
result = "Midwest"
Case "ME", "NH", "MA", "RI", "CT", "VT", "NY", "PA", "NJ", "DE", "MD", "WV", "VA", "NC"
result = "North Atlantic"
Case "TX"
result = "Texas"
Case Else
result = "fail"
End Select
.Cells(r, "Z").Value = result
Next
End With
End Sub
Em why don,t you use Access create tables as you did and then link to further logical tables you are going to create (I presume there is some practical use of the code you wrote) That is why access was created in the first place...

Complex data transformation

I need to transform following (simplified) dataset, created by following code:
structure(list(W1.1 = structure(c(1L, NA, NA), .Names = c("case1",
"case2", "case3"), .Label = "1", class = "factor"), R1.1 = structure(c(1L,
NA, NA), .Names = c("case1", "case2", "case3"), .Label = "2", class = "factor"),
W1.2 = structure(c(NA, 1L, NA), .Names = c("case1", "case2",
"case3"), .Label = "1", class = "factor"), R1.2 = structure(c(NA,
1L, NA), .Names = c("case1", "case2", "case3"), .Label = "1", class = "factor"),
W2.1 = structure(c(NA, 1L, NA), .Names = c("case1", "case2",
"case3"), .Label = "1", class = "factor"), R2.1 = structure(c(NA,
1L, NA), .Names = c("case1", "case2", "case3"), .Label = "1", class = "factor"),
W2.2 = structure(c(1L, NA, NA), .Names = c("case1", "case2",
"case3"), .Label = "2", class = "factor"), R2.2 = structure(c(1L,
NA, NA), .Names = c("case1", "case2", "case3"), .Label = "1", class = "factor"),
W3.1 = structure(c(1L, NA, NA), .Names = c("case1", "case2",
"case3"), .Label = "1", class = "factor"), R3.1 = structure(c(1L,
NA, NA), .Names = c("case1", "case2", "case3"), .Label = "1", class = "factor"),
W3.2 = structure(c(1L, 1L, NA), .Names = c("case1", "case2",
"case3"), .Label = "1", class = "factor"), R3.2 = structure(c(1L,
1L, NA), .Names = c("case1", "case2", "case3"), .Label = "1", class = "factor"),
age = structure(c(3L, 1L, 2L), .Names = c("case1", "case2",
"case3"), .Label = c("20", "48", "56"), class = "factor"),
gender = structure(c(2L, 1L, 2L), .Names = c("case1", "case2",
"case3"), .Label = c("female", "male"), class = "factor")), .Names = c("W1.1",
"R1.1", "W1.2", "R1.2", "W2.1", "R2.1", "W2.2", "R2.2", "W3.1",
"R3.1", "W3.2", "R3.2", "age", "gender"), row.names = c(NA, 3L
), class = "data.frame")
For the new data I want:
- a row dedicated to every x.x, with info on the Rx.x value, age and gender.
- only have a row returned when Wx.x was 1. When 2 or NA, I don't need it.
For my example this dataset should look something like this:
incident type Where Reported age gender
1 1 1.1 1 2 56 male
2 2 3.1 1 1 56 male
3 3 3.2 1 1 56 male
4 4 1.2 1 1 20 female
5 5 2.1 1 1 20 female
6 6 3.2 1 1 20 female
Note: the "Where" column can even be omitted since it should be a constant vector of 1, and I don't need it for the analysis.
This is (mostly) a problem to be tackled by reshape(). Assuming your original dataset is called "temp":
First, reshape it from a wide format to a long format.
temp.long <- reshape(temp, direction = "long",
idvar=c("age", "gender"),
varying = which(!names(temp) %in% c("age", "gender")),
sep = "")
temp.long
# age gender time W R
# 56.male.1.1 56 male 1.1 1 2
# 20.female.1.1 20 female 1.1 <NA> <NA>
# 48.male.1.1 48 male 1.1 <NA> <NA>
# 56.male.1.2 56 male 1.2 <NA> <NA>
# 20.female.1.2 20 female 1.2 1 1
# 48.male.1.2 48 male 1.2 <NA> <NA>
# 56.male.2.1 56 male 2.1 <NA> <NA>
# 20.female.2.1 20 female 2.1 1 1
# 48.male.2.1 48 male 2.1 <NA> <NA>
# 56.male.2.2 56 male 2.2 2 1
# 20.female.2.2 20 female 2.2 <NA> <NA>
# 48.male.2.2 48 male 2.2 <NA> <NA>
# 56.male.3.1 56 male 3.1 1 1
# 20.female.3.1 20 female 3.1 <NA> <NA>
# 48.male.3.1 48 male 3.1 <NA> <NA>
# 56.male.3.2 56 male 3.2 1 1
# 20.female.3.2 20 female 3.2 1 1
# 48.male.3.2 48 male 3.2 <NA> <NA>
Second, do some cleanup.
temp.long <- na.omit(temp.long)
temp.long <- temp.long[-which(temp.long$W == 2), ]
temp.long <- temp.long[order(rev(temp.long$gender), temp.long$time), ]
rownames(temp.long) <- NULL
temp.long$incident <- seq(nrow(temp.long))
temp.long
# age gender time W R incident
# 1 56 male 1.1 1 2 1
# 2 56 male 3.1 1 1 2
# 3 56 male 3.2 1 1 3
# 4 20 female 1.2 1 1 4
# 5 20 female 2.1 1 1 5
# 6 20 female 3.2 1 1 6
You can do further cleanup to change your column names and column order if it's important.

Resources