I need to run some experiments on custom datasets using pytorch. The question is, how can I create a dataset using torch.Dataloader?
I have two lists, one is called Values and has a datapoint tensor at every entry, and the other one is called Labels, that has the corresponding label. What I did is the following:
for i in range(samples):
dataset[i] = [values[i],labels[I]]
So I have a list with datapoint and respective label, and then tried the following:
dataset = torch.tensor(dataset).float()
dataset = torch.utils.data.TensorDataset(dataset)
data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=100, shuffle=True, num_workers=4, pin_memory=True)
But, first of all, I get the error "Not a sequence" in the torch.tensor command, and second, I'm not sure this is the right way of creating one. Any suggestion?
Thank you very much!
You do not need to overload DataLoader, but rather create a Dataset for your data.
For instance,
class MyDataset(Dataset):
def __init__(self):
super(MyDataset, self).__init__()
# do stuff here?
self.values = values
self.labels = labels
def __len__(self):
return len(self.values) # number of samples in the dataset
def __getitem__(self, index):
return self.values[index], self.labels[index]
Just to enrich the answer by #shai
class MyDataset(Dataset):
def __init__(self, values):
super(MyDataset, self).__init__()
self.values = values
def __len__(self):
return len(self.values)
def __getitem__(self, index):
return self.values[index]
values = np.random.rand(51000, 3)
dataset = MyDataset(values)
Related
I am new to pytorch. I am trying to create a DataLoader for a dataset of images where each image got a corresponding ground truth (same name):
root:
--->RGB:
------>img1.png
------>img2.png
------>...
------>imgN.png
--->GT:
------>img1.png
------>img2.png
------>...
------>imgN.png
When I use the path for root folder (that contains RGB and GT folders) as input for the torchvision.datasets.ImageFolder it reads all of the images as if they were all intended for input (classified as RGB and GT), and it seems like there is no way to pair the RGB-GT images. I would like to pair the RGB-GT images, shuffle, and divide it to batches of defined size. How can it be done? Any advice will be appreciated.
Thanks.
I think, the good starting point is to use VisionDataset class as a base. What we are going to use here is: DatasetFolder source code. So, we going to create smth similar. You can notice this class depends on two other functions from datasets.folder module: default_loader and make_dataset.
We are not going to modify default_loader, because it's already fine, it just helps us to load images, so we will import it.
But we need a new make_dataset function, that prepared the right pairs of images from root folder. Since original make_dataset pairs images (image paths if to be more precisely) and their root folder as target class (class index) and we have a list of (path, class_to_idx[target]) pairs, but we need (rgb_path, gt_path). Here is the code for new make_dataset:
def make_dataset(root: str) -> list:
"""Reads a directory with data.
Returns a dataset as a list of tuples of paired image paths: (rgb_path, gt_path)
"""
dataset = []
# Our dir names
rgb_dir = 'RGB'
gt_dir = 'GT'
# Get all the filenames from RGB folder
rgb_fnames = sorted(os.listdir(os.path.join(root, rgb_dir)))
# Compare file names from GT folder to file names from RGB:
for gt_fname in sorted(os.listdir(os.path.join(root, gt_dir))):
if gt_fname in rgb_fnames:
# if we have a match - create pair of full path to the corresponding images
rgb_path = os.path.join(root, rgb_dir, gt_fname)
gt_path = os.path.join(root, gt_dir, gt_fname)
item = (rgb_path, gt_path)
# append to the list dataset
dataset.append(item)
else:
continue
return dataset
What do we have now? Let's compare our function with original one:
from torchvision.datasets.folder import make_dataset as make_dataset_original
dataset_original = make_dataset_original(root, {'RGB': 0, 'GT': 1}, extensions='png')
dataset = make_dataset(root)
print('Original make_dataset:')
print(*dataset_original, sep='\n')
print('Our make_dataset:')
print(*dataset, sep='\n')
Original make_dataset:
('./data/GT/img1.png', 1)
('./data/GT/img2.png', 1)
...
('./data/RGB/img1.png', 0)
('./data/RGB/img2.png', 0)
...
Our make_dataset:
('./data/RGB/img1.png', './data/GT/img1.png')
('./data/RGB/img2.png', './data/GT/img2.png')
...
I think it works great) It's time to create our class Dataset. The most important part here is __getitem__ methods, because it imports images, applies transformation and returns a tensors, that can be used by dataloaders. We need to read a pair of images (rgb and gt) and return a tuple of 2 tensor images:
from torchvision.datasets.folder import default_loader
from torchvision.datasets.vision import VisionDataset
class CustomVisionDataset(VisionDataset):
def __init__(self,
root,
loader=default_loader,
rgb_transform=None,
gt_transform=None):
super().__init__(root,
transform=rgb_transform,
target_transform=gt_transform)
# Prepare dataset
samples = make_dataset(self.root)
self.loader = loader
self.samples = samples
# list of RGB images
self.rgb_samples = [s[1] for s in samples]
# list of GT images
self.gt_samples = [s[1] for s in samples]
def __getitem__(self, index):
"""Returns a data sample from our dataset.
"""
# getting our paths to images
rgb_path, gt_path = self.samples[index]
# import each image using loader (by default it's PIL)
rgb_sample = self.loader(rgb_path)
gt_sample = self.loader(gt_path)
# here goes tranforms if needed
# maybe we need different tranforms for each type of image
if self.transform is not None:
rgb_sample = self.transform(rgb_sample)
if self.target_transform is not None:
gt_sample = self.target_transform(gt_sample)
# now we return the right imported pair of images (tensors)
return rgb_sample, gt_sample
def __len__(self):
return len(self.samples)
Let's test it:
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
bs=4 # batch size
transforms = ToTensor() # we need this to convert PIL images to Tensor
shuffle = True
dataset = CustomVisionDataset('./data', rgb_transform=transforms, gt_transform=transforms)
dataloader = DataLoader(dataset, batch_size=bs, shuffle=shuffle)
for i, (rgb, gt) in enumerate(dataloader):
print(f'batch {i+1}:')
# some plots
for i in range(bs):
plt.figure(figsize=(10, 5))
plt.subplot(221)
plt.imshow(rgb[i].squeeze().permute(1, 2, 0))
plt.title(f'RGB img{i+1}')
plt.subplot(222)
plt.imshow(gt[i].squeeze().permute(1, 2, 0))
plt.title(f'GT img{i+1}')
plt.show()
Out:
batch 1:
...
Here you can find a notebook with code and simple dummy dataset.
I have a sqlite table defined as:
class HourlyUserWebsite(Base):
__tablename__ = 'hourly_user_website'
id = Column(Integer, primary_key=True)
user = Column(String(600), index=True)
domain = Column(String(600))
time_secs = Column(Integer, index=True)
def __repr__(self):
return "HourlyUserWebsite(user='%s', domain='%s', time_secs=%d)" % \
(self.user, self.domain, self.time_secs)
and I add elements to it with a class method as:
def add_elements_to_hourly_db(self, data, start_secs, end_secs, engine):
session = self._get_session(engine)
for el in data:
session.add(el)
session.commit()
return
as the data is time series I am expecting to add always elements with increasing or equal time_secs value (not decreasing).
I get the data from the table with a query like:
session.query(HorlyUserWebsite)
I'd like to have the results from the query sorted by time_secs and by user.
Is there any way I can do it? Can the data be stored in such a way that query for sorted data is optimised keeping in mind that it is a time series?
session.query(HourlyUserWebsite).order_by(HourlyUserWebsite.user,HourlyUserWebsite.time_secs.desc()).all()
I have a list like [gender, axonVoucherCode]
Also I have a Json Array [[gender:Gender?], [concussionHistory:History of previous concussion?], [previousConcussion:Number of previous concussions?], [historyMigraineChronic:History of migraine or chronic headaches?], [edTreatment:ED treatment ?], [axonVoucherCode:Axon Voucher Code ?]]
I want to make a list with corresponding value of 1st list like [Gender?,Axon Voucher Code ?] .I use JsonSlurper for parsing Json .
def fetchQuestion(def list){
def webRootDir = SCH.servletContext.getRealPath("/")
def f = new File(webRootDir + "/jsons/" + "QuestionPart1")
def questionList = new JsonSlurper().parseText(f.text)
def newlists=[]
println questionList.QuestionPart1
questionList.QuestionPart1.each{
println(it)
}
println(newlists);
return newlists
}
//I want to put matching value to newlists
Here is my JSON file format .
{"QuestionPart1":[
{"gender":"Gender?"},
{"concussionHistory":"History of previous concussion?"},
{"previousConcussion":"Number of previous concussions?"},
{"historyMigraineChronic":"History of migraine or chronic headaches?"},
{"edvisitTime":"Date and time of ED visit?"},
{"injuryTime":"Date and time of Injury?"},
{"mechanismInjury":"Mechanism of Injury?"},
{"sportsType":"Choose Sport?"},
{"signAndSymptom":"Select the signs and symptoms the subject experienced following injury?"},
{"durationLossConsciousness":"Duration of loss of Conciousness ? "},
{"durationBeforeAmnesia":"Duration of Amnesia for events BEFORE injury ?"},
{"durationAfterAmnesia":"Duration of Amnesia for events AFTER injury ?"},
{"ctObtainedED":"Head CT obtained in ED ?"},
{"edTreatment":"ED treatment ?"},
{"axonVoucherCode":"Axon Voucher Code ?"}
]}
Please help me to solve this problem .Thanks
Assuming I understand your question, this should work:
def fetchQuestion(String ...keys){
def webRootDir = SCH.servletContext.getRealPath("/")
def f = new File(webRootDir + "/jsons/" + "QuestionPart1")
def questionList = new JsonSlurper().parseText(f.text)
def newlists=[]
println questionList.QuestionPart1
questionList.QuestionPart1.findResults { it.find { k, v -> k in keys }?.value }
}
def listOfValues = fetchQuestion('edvisitTime', 'axonVoucherCode')
Here, listOfValues will equal ['Date and time of ED visit?', 'Axon Voucher Code ?']
I'm trying to subclass Array to implement a map method that returns instances of my Record class. I'm trying to create a sort of "lazy" array that only instantiates objects as they are needed to try and avoid allocating too many Ruby objects at once. I'm hoping to make better use of the garbage collector by only instantiating an object on each iteration.
class LazyArray < Array
def initialize(results)
#results = results
end
def map(&block)
record = Record.new(#results[i]) # how to get each item from #results for each iteration?
# how do I pass the record instance to the block for each iteration?
end
end
simple_array = [{name: 'foo'}, {name: 'bar'}]
lazy_array_instance = LazyArray.new(simple_array)
expect(lazy_array_instance).to be_an Array
expect(lazy_array_instance).to respond_to :map
lazy_array_instance.map do |record|
expect(record).to be_a Record
end
How can I subclass Array so that I can return an instance of my Record class in each iteration?
From what I know, you shouldn't have to do anything like this at all. Using .lazy you can perform lazy evaluation of arrays:
simple_array_of_results.lazy.map do |record|
# do something with Record instance
end
Now, you've got some odd situation where you're doing something like -
SomeOperation(simple_array_of_results)
and either you want SomeOperation to do it's thing lazily, or you want the output to be something lazy -
lazily_transformed_array_of_results = SomeOperation(simple_array_of_results)
page_of_results = lazily_transformed_array_of_results.take(10)
If that sounds right... I'd expect it to be as simple as:
SomeOperation(simple_array_of_results.lazy)
Does that work? array.lazy returns an object that responds to map, after all...
Edit:
...after reading your question again, it seems like what you actually want is something like:
SomeOperation(simple_array_of_results.lazy.collect{|r| SomeTransform(r)})
SomeTransform is whatever you're thinking of that takes that initial data and uses it to create your objects ("as needed" becoming "one at a time"). SomeOperation is whatever it is that needs to be passed something that responds to map.
So you have an array of simple attributes or some such and you want to instantiate an object before calling the map block. Sort of pre-processing on a value-by-value basis.
class Record
attr_accessor :name
def initialize(params={})
self.name = params[:name]
end
end
require 'delegate'
class MapEnhanced < SimpleDelegator
def map(&block)
#delegate_ds_obj.map do |attributes|
object = Record.new(attributes)
block.call(object)
end
end
end
array = MapEnhanced.new([{name: 'Joe'}, {name: 'Pete'}])
array.map {|record| record.name }
=> ["Joe" "Pete"]
An alternative (which will allow you to keep object.is_a? Array)
class MapEnhanced < Array
alias_method :old_map, :map
def map(&block)
old_map do |attributes|
object = Record.new(attributes)
block.call(object)
end
end
end
I use a custom delegate to display a column of comboBoxes in my QTableView.
The values are the same for all the comboBoxes so it's not really the population part that gives me trouble.
I want them to show as the selected item, some value that I can retrieve from a database. I have access to the database from the delegate, but in order to send my request, I need the row of the comboBox.
So I guess my question is : how can you iterate over all the rows of the table and do some action from inside the custom delegate ?
If it can help here is my custom delegate class :
class ComboBoxDelegate(QtGui.QItemDelegate):
def __init__(self, parent, itemslist):
QtGui.QItemDelegate.__init__(self, parent)
self.itemslist = itemslist
self.parent = parent
def paint(self, painter, option, index):
# Get Item Data
value = index.data(QtCore.Qt.DisplayRole).toInt()[0]
# value = self.itemslist[index.data(QtCore.Qt.DisplayRole).toInt()[0]]
# fill style options with item data
style = QtGui.QApplication.style()
opt = QtGui.QStyleOptionComboBox()
opt.currentText = str(self.itemslist[value])
opt.rect = option.rect
# draw item data as ComboBox
style.drawComplexControl(QtGui.QStyle.CC_ComboBox, opt, painter)
self.parent.openPersistentEditor(index)
def createEditor(self, parent, option, index):
##get the "check" value of the row
# for row in range(self.parent.model.rowCount(self.parent)):
# print row
self.editor = QtGui.QComboBox(parent)
self.editor.addItems(self.itemslist)
self.editor.setCurrentIndex(0)
self.editor.installEventFilter(self)
self.connect(self.editor, QtCore.SIGNAL("currentIndexChanged(int)"), self.editorChanged)
return self.editor
# def setEditorData(self, editor, index):
# value = index.data(QtCore.Qt.DisplayRole).toInt()[0]
# editor.setCurrentIndex(value)
def setEditorData(self, editor, index):
text = self.itemslist[index.data(QtCore.Qt.DisplayRole).toInt()[0]]
pos = self.editor.findText(text)
if pos == -1:
pos = 0
self.editor.setCurrentIndex(pos)
def setModelData(self,editor,model,index):
value = self.editor.currentIndex()
model.setData(index, QtCore.QVariant(value))
def updateEditorGeometry(self, editor, option, index):
self.editor.setGeometry(option.rect)
def editorChanged(self, index):
check = self.editor.itemText(index)
id_seq = self.parent.selectedIndexes[0][0]
update.updateCheckSeq(self.parent.db, id_seq, check)
And I call it fromthe QTableView like this :
self.setEditTriggers(QtGui.QAbstractItemView.CurrentChanged)
self.viewport().installEventFilter(self)
self.setItemDelegateForColumn(13,ComboBoxDelegate(self, self.checkValues))
Hope I was clear enough, thanks for your attention
Not sure if accessing the database from the delegate is a right thing to do. Your delegate can contain reference to the instance of QAbstractTableModel which the QTableView refers to. You can then use methods in the model to iterate over rows of the table.