I have an array of floating numbers and a certain cutoff:
myData = [1.3,1.5,1.7,1.7,16.7,18.4,19.2,19.5,19.6,20.2,20.8,58.4,60.7,
61.2,61.2,116.4,121.2,122.7,123.2,123.2,138.5,149.5,149.5]
myBin = 5.3
I'd like to build a hash of arrays so that the difference by subtraction between the last element and the first element of the array is less or equal to myBin (5.3)
myHash = {
'hap_1' => [1.3,1.5,1.7],
'hap_2' => [16.8, 18.4,19.2,19.5,19.6,20.2,20.8],
'hap_3' => [58.4,60.7,61.2,61.2],
'hap_4' => [116.4,121.2],
'hap_5' => [122.7,123.2,123.2],
'hap_6' => [138.5],
'hap_7' => [149.5,149.5]}
Thank you so much in advance for your time and helpful assistance.
Cheers
Enumerable#slice_before can solve your problem:
first = myData[0]
myData.slice_before { |e| first = e if e - first > myBin }.to_a
#=> [[1.3, 1.5, 1.7, 1.7],
# [16.7, 18.4, 19.2, 19.5, 19.6, 20.2, 20.8],
# [58.4, 60.7, 61.2, 61.2],
# [116.4, 121.2],
# [122.7, 123.2, 123.2],
# [138.5],
# [149.5, 149.5]]
myData.drop(1).each_with_object([[myData.first]]) { |n,a|
n - a.last.first <= myBin ? (a.last << n) : a << [n] }.
each.with_index(1).with_object({}) { |(a,i),h| h["hap_#{i}"] = a }
#=> {"hap_1"=>[1.3, 1.5, 1.7, 1.7],
# "hap_2"=>[16.7, 18.4, 19.2, 19.5, 19.6, 20.2, 20.8],
# "hap_3"=>[58.4, 60.7, 61.2, 61.2],
# "hap_4"=>[116.4, 121.2],
# "hap_5"=>[122.7, 123.2, 123.2],
# "hap_6"=>[138.5],
# "hap_7"=>[149.5, 149.5]}
You could build a customer enumerator that works like chunk_while, but compares each chunk's first element to the current one, i.e. 1.3 to 1.5, then 1.3 to 1.7 and so on:
module Enumerable
def chunk_while1
return enum_for(__method__) unless block_given?
each_with_object([]) do |elt, result|
if result.last && yield(result.last.first, elt)
result.last << elt
else
result << [elt]
end
end
end
end
Usage:
data = [
1.3, 1.5, 1.7, 1.7, 16.7, 18.4, 19.2, 19.5, 19.6, 20.2, 20.8, 58.4, 60.7,
61.2, 61.2, 116.4, 121.2, 122.7, 123.2, 123.2, 138.5, 149.5, 149.5
]
result = data.chunk_while1 { |i, j| j - i <= 5.3 }
#=> [
# [1.3, 1.5, 1.7, 1.7],
# [16.7, 18.4, 19.2, 19.5, 19.6, 20.2, 20.8],
# [58.4, 60.7, 61.2, 61.2],
# [116.4, 121.2],
# [122.7, 123.2, 123.2],
# [138.5],
# [149.5, 149.5]
# ]
The result can then be converted to a hash, e.g. via:
result.map.with_index(1) { |a, i| ["hap_#{i}", a] }.to_h
#=> {
# "hap_1"=>[1.3, 1.5, 1.7, 1.7],
# "hap_2"=>[16.7, 18.4, 19.2, 19.5, 19.6, 20.2, 20.8],
# "hap_3"=>[58.4, 60.7, 61.2, 61.2],
# "hap_4"=>[116.4, 121.2],
# "hap_5"=>[122.7, 123.2, 123.2],
# "hap_6"=>[138.5],
# "hap_7"=>[149.5, 149.5]
# }
Related
I am implementing the following model:
def ConnectomeCNNAutoencoder(input_shape, keep_pr=0.65, n_filter=32, n_dense1=64, n_classes=2,
mode="autoencoder", sign="neg"):
input_1 = Input(shape=input_shape)
# Convolutional Encoder
bias_init = tf.constant_initializer(value=0.001)
conv1 = Conv2D(filters=n_filter , kernel_size=(1,input_shape[1]), strides=(1, 1),
padding= "valid", activation="selu", # "selu"
kernel_initializer="glorot_uniform",
bias_initializer=bias_init, name="conv1")(input_1)
dropout1 = Dropout(keep_pr, name="dropout1")(conv1)
conv2 = Conv2D(filters=n_filter*2 , kernel_size=(input_shape[1],1), strides=(1, 1),
padding= "valid", activation="selu",
kernel_initializer="glorot_uniform",
bias_initializer=bias_init, name="conv2")(dropout1)
encoded = Dropout(keep_pr, name="dropout2")(conv2)
# Classification
reshape = Reshape((n_filter*2,), name="reshape1")(encoded)
dense1 = Dense(n_dense1, activation="selu", name="dense1", kernel_regularizer=keras.regularizers.l1_l2())(reshape)
if n_classes == 1:
activation = "sigmoid"
else:
activation = "softmax"
output = Dense(n_classes, activation=activation, name="output")(dense1)
# Decoder
dense2 = Dense(n_dense1, activation="selu", name="dense2")(output)
dim_reconstruct = tuple(encoded.get_shape().as_list())
reshape2 = Reshape(dim_reconstruct[1:], name="reshape2")(dense2)
conv3 = Conv2DTranspose(filters=n_filter*2 , kernel_size=(1,1), strides=(1, 1),
padding= "valid", activation="selu", # "selu"
kernel_initializer="glorot_uniform",
bias_initializer=bias_init, name="conv3")(reshape2)
conv4 = Conv2DTranspose(filters=n_filter , kernel_size=(input_shape[1],1), strides=(1, 1),
padding= "valid", activation="selu", # "selu"
kernel_initializer="glorot_uniform",
bias_initializer=bias_init, name="conv4")(conv3)
if sign == "pos":
reconstructed_activation = "sigmoid"
elif sign == "neg":
reconstructed_activation = "tanh"
reconstructed_input = Conv2DTranspose(filters=input_shape[-1], kernel_size=(1,input_shape[1]), strides=(1, 1),
padding= "valid", activation=reconstructed_activation,
kernel_initializer="glorot_uniform",
bias_initializer=bias_init, name='autoencoder')(conv4)
if mode == "autoencoder":
model = keras.models.Model(inputs=input_1, outputs=[output, reconstructed_input])
elif mode =="encoder":
model = keras.models.Model(inputs=input_1, outputs=encoded)
elif mode == "decoder":
model = keras.models.Model(inputs=input_1, outputs=reconstructed_input)
return model
The model works fine when n_filter=32 and n_dense1=64, but when I change these variable for other values, this error pops up: "ValueError: total size of new array must be unchanged".
I know that is related tothe use of Reshape in reshape2, but I don't know how to solve this.
How can I solve this?
Thanks!
The problem appears in this line:
reshape2 = Reshape(dim_reconstruct[1:], name="reshape2")(dense2)
Tensor dense2 should be of the shape that could be 'transformed' into shape of dim_reconstruct[1:]. It means that the product of values of dim_reconstruct[1:] should be equal to the shape of dense2 (excluding zeroth dimension - batch size, because keras doesn't count it when derives dimensionalities of tensors).
If n_filters = 30, dim_reconstruct[1:] will be [1, 1, 60] - because you multiplied n_filters by 2. But number of dense filters has to be equal to the product of values from [1, 1, 60], i.e., 60.
I couldn't find any image with transformation of 1d into 3d array. But there's and example with 2d arrays: one can't fit array [1,2,3,4,5] into 2x3 2d array, but can transform [1,2,3,4,5,6] into something like [[1, 2, 3], [4, 5, 6]]
So, you could set n_units1 to 60 when call ConnectomeCNNAutoencoder, or you could derive it automatically instead:
# Decoder
dim_reconstruct = tuple(encoded.get_shape().as_list()) # say, (1, 1, 60)
n_dense2 = np.product(dim_reconstruct[1:]) # will be 60
dense2 = Dense(n_dense2, activation="selu", name="dense2")(output)
reshape2 = Reshape(dim_reconstruct[1:], name="reshape2")(dense2)
Complete example (I removed some arguments that were equal to default values):
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Input
from tensorflow.keras.layers import Conv2D, Dropout, Reshape, Dense, Conv2DTranspose
def ConnectomeCNNAutoencoder(input_shape,
keep_pr=0.65,
n_filter=32,
n_dense1=64,
n_classes=2,
mode="autoencoder",
sign="neg"):
input_1 = Input(shape=input_shape)
# Convolutional Encoder
bias_init = tf.constant_initializer(value=0.001)
conv1 = Conv2D(filters=n_filter,
kernel_size=(1, input_shape[1]),
strides=(1, 1),
activation="selu", # "selu"
bias_initializer=bias_init,
name="conv1")(input_1)
dropout1 = Dropout(keep_pr, name="dropout1")(conv1)
conv2 = Conv2D(filters=n_filter * 2,
kernel_size=(input_shape[1], 1),
strides=(1, 1),
activation="selu",
bias_initializer=bias_init,
name="conv2")(dropout1)
encoded = Dropout(keep_pr, name="dropout2")(conv2)
# Classification
reshape = Reshape((n_filter * 2,), name="reshape1")(encoded)
dense1 = Dense(n_dense1,
activation="selu",
name="dense1",
kernel_regularizer=keras.regularizers.l1_l2())(reshape)
if n_classes == 1:
activation = "sigmoid"
else:
activation = "softmax"
output = Dense(n_classes, activation=activation, name="output")(dense1)
# Decoder - Changes here
dim_reconstruct = tuple(encoded.get_shape().as_list())
n_dense2 = np.product(dim_reconstruct[1:])
dense2 = Dense(n_dense2, activation="selu", name="dense2")(output)
reshape2 = Reshape(dim_reconstruct[1:], name="reshape2")(dense2)
conv3 = Conv2DTranspose(filters=n_filter * 2,
kernel_size=(1, 1),
strides=(1, 1),
activation="selu", # "selu"
bias_initializer=bias_init,
name="conv3")(reshape2)
conv4 = Conv2DTranspose(filters=n_filter,
kernel_size=(input_shape[1], 1),
strides=(1, 1),
activation="selu", # "selu"
bias_initializer=bias_init,
name="conv4")(conv3)
if sign == "pos":
reconstructed_activation = "sigmoid"
elif sign == "neg":
reconstructed_activation = "tanh"
reconstructed_input = Conv2DTranspose(filters=input_shape[-1],
kernel_size=(1, input_shape[1]),
strides=(1, 1),
activation=reconstructed_activation,
bias_initializer=bias_init,
name='autoencoder')(conv4)
if mode == "autoencoder":
model = keras.models.Model(inputs=input_1, outputs=[output, reconstructed_input])
elif mode == "encoder":
model = keras.models.Model(inputs=input_1, outputs=encoded)
elif mode == "decoder":
model = keras.models.Model(inputs=input_1, outputs=reconstructed_input)
else:
raise ValueError("Unexpected mode: %s" % mode)
return model
model = ConnectomeCNNAutoencoder((32, 32, 3), n_filter=30, n_dense1=65)
guys
I am a beginner in ruby and in my practices I thought of a musical script and there is a point that is making me sleepy: The moment I type Scale.major_by_note ('C') in irb everything is fine, but if I type Scale.major_by_note ('C #'), it doesn't work, for it to work I must put a "C # / Db", help me to make sure with both "C" and "C #" and "C # / Db", thank you! below is the script:
class Scale
NATURAL = %w[C D E F G A B].freeze
ACCIDENT = %w[C# Db D# Eb F# Gb G# Ab A# Bb].freeze
CHROMATIC = %w[C C#/Db D D#/Eb E F F#/Gb G G# A A#/Bb B].freeze
SCALE_MAJOR_PATTERN = [0, 2, 4, 5, 7, 9, 11, 12].freeze # T T st T T T st
SCALE_MINOR_PATTERN = [0, 2, 3, 5, 7, 8, 10, 12].freeze # T st T T st T T
def self.show_all_scales(note)
major = Scale.major_by_note(note)
minor = Scale.minor_by_note(note)
all = { major: major, minor: minor}
end
def self.major_by_note(note)
major_note_index = CHROMATIC.index(note)
SCALE_MAJOR_PATTERN.map do |major_interval| # Interação
major_scale_note_index = major_note_index + major_interval
if major_scale_note_index <= (CHROMATIC.length - 1)
CHROMATIC[major_scale_note_index]
else
reseted_major_scale_note_index = major_scale_note_index - CHROMATIC.length
CHROMATIC[reseted_major_scale_note_index]
end
end
end
def self.minor_by_note(note)
minor_note_index = CHROMATIC.index(note)
SCALE_MINOR_PATTERN.map do |minor_interval|
minor_scale_note_index = minor_note_index + minor_interval
if minor_scale_note_index <= (CHROMATIC.length - 1)
CHROMATIC[minor_scale_note_index]
else
reseted_minor_scale_note_index = minor_scale_note_index - CHROMATIC.length
CHROMATIC[reseted_minor_scale_note_index]
end
end
end
end```
When you type
%w[C C#/Db D D#/Eb E F F#/Gb G G# A A#/Bb B]
Ruby is turning this into an Array of Strings:
["C", "C#/Db", "D", "D#/Eb", "E", "F", "F#/Gb", "G", "G#", "A", "A#/Bb", "B"]
Now while you know C# and Db are the same note, Ruby doesn't. It thinks the note in this case is called C#/Db. When it tries to find CHROMATIC.index("C#") it is returning nil because there is no C# in the Array.
A solution could be to write it like this:
CHROMATIC = %w[C C# D D# E F F# G G# A A# B].freeze
CHROMATIC_PAIR_MAP = {
"Db" => "C#",
"Eb" => "D#",
"Gb" => "F#",
"Ab" => "B#",
}
...
def self.index_of_note(note)
CHROMATIC.index(note) ||
CHROMATIC.index(CHROMATIC_PAIR_MAP[note])
end
def self.major_by_note(note)
major_note_index = index_of_note(note)
Here I am making a new helper method to get the index of the note by either getting it straight from CHROMATIC array, or looking up a the note key in the CHROMATIC_PAR_MAP Hash. It will only perform the lookup in the Hash if CHROMATIC.index(note) returns nil.
This is what I get in the console (irb):
irb(main):191:0> Scale.major_by_note("C#")
=> ["C#", "D#", "F", "F#", "G#", "A#", "C", "C#"]
irb(main):192:0> Scale.major_by_note("Db")
=> ["C#", "D#", "F", "F#", "G#", "A#", "C", "C#"]
irb(main):193:0> Scale.major_by_note("D#")=> ["D#", "F", "G", "G#", "A#", "C", "D", "D#"]
irb(main):194:0> Scale.major_by_note("Eb")=> ["D#", "F", "G", "G#", "A#", "C", "D", "D#"]
The full new class:
class Scale
NATURAL = %w[C D E F G A B].freeze
ACCIDENT = %w[C# Db D# Eb F# Gb G# Ab A# Bb].freeze
CHROMATIC = %w[C C# D D# E F F# G G# A A# B].freeze
SCALE_MAJOR_PATTERN = [0, 2, 4, 5, 7, 9, 11, 12].freeze # T T st T T T st
SCALE_MINOR_PATTERN = [0, 2, 3, 5, 7, 8, 10, 12].freeze # T st T T st T T
CHROMATIC_PAIR_MAP = {
"Db" => "C#",
"Eb" => "D#",
"Gb" => "F#",
"Ab" => "B#",
}
def self.show_all_scales(note)
major = Scale.major_by_note(note)
minor = Scale.minor_by_note(note)
all = { major: major, minor: minor}
end
def self.major_by_note(note)
major_note_index = index_of_note(note)
SCALE_MAJOR_PATTERN.map do |major_interval| # Interação
major_scale_note_index = major_note_index + major_interval
if major_scale_note_index <= (CHROMATIC.length - 1)
CHROMATIC[major_scale_note_index]
else
reseted_major_scale_note_index = major_scale_note_index - CHROMATIC.length
CHROMATIC[reseted_major_scale_note_index]
end
end
end
def self.minor_by_note(note)
minor_note_index = CHROMATIC.index(note)
SCALE_MINOR_PATTERN.map do |minor_interval|
minor_scale_note_index = minor_note_index + minor_interval
if minor_scale_note_index <= (CHROMATIC.length - 1)
CHROMATIC[minor_scale_note_index]
else
reseted_minor_scale_note_index = minor_scale_note_index - CHROMATIC.length
CHROMATIC[reseted_minor_scale_note_index]
end
end
end
def self.index_of_note(note)
CHROMATIC.index(note) ||
CHROMATIC.index(CHROMATIC_PAIR_MAP[note])
end
end
I have a dataset composed by welds and masks (white for weld and black for background), although I need to use Mask R-CNN so I have to convert them to COCO dataset annotation. Does anybody have any suggestions on how to do this?
I tried this one: https://github.com/chrise96/image-to-coco-json-converter
but I'm getting this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-3-0ddc235b1528> in <module>
94
95 # Create images and annotations sections
---> 96 coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)
97
98 with open("output/{}.json".format(keyword),"w") as outfile:
<ipython-input-3-0ddc235b1528> in images_annotations_info(maskpath)
57 sub_masks = create_sub_masks(mask_image_open, w, h)
58 for color, sub_mask in sub_masks.items():
---> 59 category_id = category_colors[color]
60
61 # "annotations" info
KeyError: '(1, 1, 1)'
Here is the code, I've just added the weld cathegory:
import glob
from src.create_annotations import *
# Label ids of the dataset
category_ids = {
"outlier": 0,
"window": 1,
"wall": 2,
"balcony": 3,
"door": 4,
"roof": 5,
"sky": 6,
"shop": 7,
"chimney": 8,
"weld": 9,
}
# Define which colors match which categories in the images
category_colors = {
"(0, 0, 0)": 0, # Outlier
"(255, 0, 0)": 1, # Window
"(255, 255, 0)": 2, # Wall
"(128, 0, 255)": 3, # Balcony
"(255, 128, 0)": 4, # Door
"(0, 0, 255)": 5, # Roof
"(128, 255, 255)": 6, # Sky
"(0, 255, 0)": 7, # Shop
"(128, 128, 128)": 8, # Chimney
"(255, 255, 255)": 9 # Weld
}
# Define the ids that are a multiplolygon. In our case: wall, roof and sky
multipolygon_ids = [9, 2, 5, 6]
# Get "images" and "annotations" info
def images_annotations_info(maskpath):
# This id will be automatically increased as we go
annotation_id = 0
image_id = 0
annotations = []
images = []
for mask_image in glob.glob(maskpath + "*.png"):
# The mask image is *.png but the original image is *.jpg.
# We make a reference to the original file in the COCO JSON file
original_file_name = os.path.basename(mask_image).split(".")[0] + ".jpg"
# Open the image and (to be sure) we convert it to RGB
mask_image_open = Image.open(mask_image).convert("RGB")
w, h = mask_image_open.size
# "images" info
image = create_image_annotation(original_file_name, w, h, image_id)
images.append(image)
sub_masks = create_sub_masks(mask_image_open, w, h)
for color, sub_mask in sub_masks.items():
category_id = category_colors[color]
# "annotations" info
polygons, segmentations = create_sub_mask_annotation(sub_mask)
# Check if we have classes that are a multipolygon
if category_id in multipolygon_ids:
# Combine the polygons to calculate the bounding box and area
multi_poly = MultiPolygon(polygons)
annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)
annotations.append(annotation)
annotation_id += 1
else:
for i in range(len(polygons)):
# Cleaner to recalculate this variable
segmentation = [np.array(polygons[i].exterior.coords).ravel().tolist()]
annotation = create_annotation_format(polygons[i], segmentation, image_id, category_id, annotation_id)
annotations.append(annotation)
annotation_id += 1
image_id += 1
return images, annotations, annotation_id
if __name__ == "__main__":
# Get the standard COCO JSON format
coco_format = get_coco_json_format()
for keyword in ["train", "val"]:
mask_path = "dataset/{}_mask/".format(keyword)
# Create category section
coco_format["categories"] = create_category_annotation(category_ids)
# Create images and annotations sections
coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)
with open("output/{}.json".format(keyword),"w") as outfile:
json.dump(coco_format, outfile)
print("Created %d annotations for images in folder: %s" % (annotation_cnt, mask_path))
Check that 255, 255 , 255 its the correct value of the object in the mask.
Check also the bit depth of the masks it must be the same for all masks.
I have an array that contains other arrays of items with prices but when one has a sale a new item is created How do I merge or pull value from one to the other to make 1 array so that the sale price replaces the non sale but contains the original price?
Example:
items=[{"id": 123, "price": 100, "sale": false},{"id":456,"price":25,"sale":false},{"id":678, "price":75, "sale":true, "parent_price_id":123}]
Transform into:
items=[{"id":456,"price":25,"sale":false},{"id":678, "price":75, "sale":true, "parent_price_id":123, "original_price": 100}]
It's not the prettiest solution, but here's one way you can do it. I added a minitest spec to check it against the values you provided and it gives the answer you're hoping for.
require "minitest/autorun"
def merge_prices(prices)
# Create a hash that maps the ID to the values
price_map =
prices
.map do |price|
[price[:id], price]
end
.to_h
# Create a result array which is initially duplicated from the original
result = prices.dup
result.each do |price|
if price.key?(:parent_price)
price[:original_price] = price_map[price[:parent_price]][:price]
# Delete the original
result.delete_if { |x| x[:id] == price[:parent_price] }
end
end
result
end
describe "Merge prices" do
it "should work" do
input = [
{"id":123, "price": 100, "sale": false},
{"id":456,"price":25,"sale": false},
{"id":678, "price":75, "sale": true, "parent_price":123}
].freeze
expected_output = [
{"id":456,"price":25,"sale": false},
{"id":678, "price":75, "sale": true, "parent_price":123, "original_price": 100}
].freeze
assert_equal(merge_prices(input), expected_output)
end
end
Let's being by defining items in an equivalent, but more familiar, way:
items = [
[{:id=>123, :price=>100, :sale=>false}],
[{:id=>456, :price=>25, :sale=>false}],
[{:id=>678, :price=>75, :sale=>true, :parent_price=>123}]
]
with the desired return value being:
[
{:id=>456, :price=>25, :sale=>false},
{:id=>678, :price=>75, :sale=>true, :parent_price=>123,
:original_price=>100}
]
I assume that h[:sale] #=> false for every element of items (a hash) g for which g[:parent_price] = h[:id].
A convenient first step is to create the following hash.
h = items.map { |(h)| [h[:id], h] }.to_h
#=> {123=>{:id=>123, :price=>100, :sale=>false},
# 456=>{:id=>456, :price=>25, :sale=>false},
# 678=>{:id=>678, :price=>75, :sale=>true, :parent_price=>123}}
Then:
h.keys.each { |k| h[k][:original_price] =
h.delete(h[k][:parent_price])[:price] if h[k][:sale] }
#=> [123, 456, 678] (not used)
h #=> {456=>{:id=>456, :price=>25, :sale=>false},
# 678=>{:id=>678, :price=>75, :sale=>true, :parent_price=>123,
# :original_price=>100}}
Notice that Hash#delete returns the value of the deleted key.
The last two steps are to extract the values from this hash and replace items with the resulting array of hashes:
items.replace(h.values)
#=> [{:id=>456, :price=>25, :sale=>false},
# {:id=>678, :price=>75, :sale=>true, :parent_price=>123,
# :original_price=>100}]
See Array#replace.
If desired we could combine these steps as follows.
items.replace(
items.map { |(h)| [h[:id], h] }.to_h.tap do |h|
h.keys.each { |k| h[k][:original_price] =
h.delete(h[k][:parent_price])[:price] if h[k][:sale] }
end.values)
#=> [{:id=>456, :price=>25, :sale=>false},
# {:id=>678, :price=>75, :sale=>true, :parent_price=>123,
# :original_price=>100}]
See Object#tap.
I'm new to Ruby. I have a series of arrays with two strings each:
["[[\"Wayfair \", \"57\"]]", "[[\"Move24 \", \"26\"]]",
"[[\"GetYourGuide \", \"25\"]]", "[[\"Visual Meta \", \"22\"]]",
"[[\"FinLeap \", \"20\"]]", "[[\"Movinga \", \"20\"]]",
"[[\"DCMN \", \"19\"]]", ...
I am trying to convert the string with the number of each array into an integer, but I get something else than I expect:
companies = companies.map do |company|
c = company[0].scan(/(.+)\((\d+)\)/).inspect
[c[0], c[1].to_i]
end
puts:
["[", 0], ["[", 0], ["[", 0], ["[", 0], ["[", 0], ["[", 0],
["[", 0], ["[", 0], ["[", 0], ["[", 0], ["[", 0]]
I am expecting:
["Wayfair", 57], ["Move24", 26], ["GetYourGuide", 25], ...
please help?
Full code :
require 'net/http'
require 'uri'
uri = URI('http://berlinstartupjobs.com/') #URI takes just one url
req = Net::HTTP::Get.new(uri) #get in URI
req['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36' #use this header
res = Net::HTTP.start(uri.hostname, uri.port) {|http| http.request(req)} # URI documentation
puts res.code #status code
puts res.body
puts res.body.scan('<a href="http://berlinstartupjobs.com/companies/') #scan in the body of the document files that match a href=...
puts res.body.scan(/<a href="http:\/\/berlinstartupjobs\.com\/companies\/[^\s]+ class="tag-link">(.*)<\/a>/) #scan
companies = res.body.scan(/<a href="http:\/\/berlinstartupjobs\.com\/companies\/[^\s]+ class="tag-link">(.*)<\/a>/)
companies = companies.map do |company|
c = company[0].scan(/(.+)\((\d+)\)/).inspect
[c[0], c[1].to_i]
end # do ... end = { }
puts companies.inspect
Your code was mostly ok. Just drop that .inspect at the end. It returns a string, not array.
# this is what you get from the scraping.
companies = [["Wayfair (57)"], ["Move24 (26)"], ["GetYourGuide (25)"]]
companies = companies.flatten.map do |company|
c = company.scan(/(.+)\((\d+)\)/).flatten
[c[0], c[1].to_i]
end
p companies
# >> [["Wayfair ", 57], ["Move24 ", 26], ["GetYourGuide ", 25], ...]
You can achieve this by using Enumerable#map & parsing each element using JSON.parse:
require 'json'
companies.map { |elem| key, val = JSON.parse(elem).flatten; [k.strip, v.to_i] }
Instead of JSON.parse you also can use eval, but using eval is considered to be a bad practice.
arr = ["[[\"Wayfair \", \"57\"]]", "[[\"Move24 \", \"26\"]]"]
result = arr.collect{|e| JSON.parse(e)[0].map{|name, value| [name.strip, value.to_i]}}
OUTPUT:
[[Wayfair, 57], [Move24", 26]]