Duplicated output when scraping in r using loops - loops

I'm new in R, and I'm finding some trouble when scraping. From each page, i have 60 products, i'm trying to get the first 3 pages, so 180 rows. The problem is that i get the 180 rows, but basically i'm having the same info from the first page 3 times. I'm using rvest and dplyr.
My code here:
fragance_data = data.frame()
for (i in seq(from = 0, to = 3)) {
fragance_url = paste0("https://www.cultbeauty.co.uk/fragrance.list?pageNumber=", i)
fragance_page = read_html(fragance_url)
links = fragance_page %>% html_nodes("a.productBlock_link") %>%
html_attr("href") %>% paste("https://www.cultbeauty.co.uk", ., sep="") %>% unique()
name <- html_nodes(fragance_page, ".productBlock_productName")
name<- html_text(name)
rating <- html_node(html_nodes(fragance_page, ".productBlock"), "span.productBlock_ratingValue")
rating <- html_text(rating)
price <- html_node(html_nodes(fragance_page, ".productBlock"), ".productBlock_priceValue")
price <- html_text(price)
get_review = function(links) {
pages = read_html(links)
review = html_node(html_nodes(pages,".athenaProductReviews_topReviewSingle"),
".athenaProductReviews_topReviewsExcerpt") %>%
html_text() %>% paste(collapse = ",")
return(review)
}
review = sapply(links, FUN = get_review, USE.NAMES = FALSE)
get_rating = function(links) {
pages = read_html(links)
rating_single = html_node(html_nodes(pages,".athenaProductReviews_topReviewSingle"),
".athenaProductReviews_topReviewsRatingStarsContainer") %>%
html_attr(., "title") %>% paste(collapse = ",")
return(rating_single)
}
rating_single = sapply(links, FUN = get_rating, USE.NAMES = FALSE)
get_title = function(links) {
pages = read_html(links)
title = html_node(html_nodes(pages,".athenaProductReviews_topReviewSingle"),
".athenaProductReviews_topReviewTitle") %>%
html_text() %>% paste(collapse = ",")
return(title)
}
title = sapply(links, FUN = get_title, USE.NAMES = FALSE)
print(i)
fragance_data = rbind (fragance_data , data.frame(links, name, rating, price, review, rating_single, title, stringsAsFactors = FALSE))
}
Could someone help me identify what I'm doing wrong and how can i get the total rows without duplicated values?

Page 0 and Page 1 are same. Thus start from page 1.
Here are the links for first three pages.
library(rvest)
library(dpyr)
for (i in seq(from = 1, to = 3)) {
fragance_url = paste0("https://www.cultbeauty.co.uk/fragrance.list?pageNumber=", i)
url = fragance_url %>% read_html() %>% html_nodes("a.productBlock_link") %>%
html_attr("href") %>% paste("https://www.cultbeauty.co.uk", ., sep="") %>% unique()
fragance_data = c(fragance_data, url)
}

Related

How to take all text field values to temporary array in swift

I fairly new in Swift.
So I created numbers of sections based on the user input, and then I want to get all the data from the text fields.
1 section contains 4 rows (4 text fields).
Next I want to do something with the data from each section.
Here's what the app look like:
Now as I mentioned before, the number of sections can be different based on user request.
Here I have created Struct and temporary Array like so:
struct LabelData {
var lbl1: Int
var lbl2: Int
var lbl3: Int
var lbl4: Int
}
var txtFieldArray = [LabelData]()
then in cellForRow I put this:
txtFieldValue?.addTarget(self, action: #selector(sectionTxtFieldDidChange), for: .editingChanged)
and here's the function:
var row1: Int = 0
var row2: Int = 0
var row3: Int = 0
var row4: Int = 0
#objc func sectionTxtFieldDidChange(_ sender: UITextField) {
for rowIndex in 0...tableView.numberOfRows(inSection: 0) - 1 {
let indexPath = IndexPath(row: rowIndex, section: 0)
let cell = tableView.cellForRow(at: indexPath) as! CellOutlet
if rowIndex == 0 {
row1 = Int(cell.txtFieldSpecNumb.text!) ?? 0
}
else if rowIndex == 1 {
row2 = Int(cell.txtFieldSpecNumb.text!) ?? 0
}
else if rowIndex == 2 {
row3 = Int(cell.txtFieldSpecNumb.text!) ?? 0
}
else if rowIndex == 3 {
row4 = Int(cell.txtFieldSpecNumb.text!) ?? 0
}
}
}
#IBAction func btnShowResult(_ sender: UIButton) {
let x = LabelData(lbl1: row1, lbl2: row2, lbl3: row3, lbl4: row4)
txtFieldArray.append(x)
// print array
print(txtFieldArray)
}
with input in first section = 1,2,3,4;
it resulted just fine, like this:
[(lbl1: 1, lbl2: 2, lbl3: 3, lbl4: 4)]
But that's for 1 section. what if I had more than 1 section.
In that case, I also put into var before I add it to LabelData and append it to Array.
How can I do this better?
Thanks
There are a lot of ways that could improve the provided code (e.g. creating a subclass of UITextField with an IndexPath property) but it would fall under opinion-based. To answer your specific question, I'd recommend getting rid of row1, row2, row3 & row4 as they are redundant (you already have txtFieldArray for data storage and access).
In your cellForRow, add the following line
cell.txtFieldValue?.tag = indexPath.section
Then, update your sectionTxtFieldDidChange to something like this
#objc func sectionTxtFieldDidChange(_ sender: UITextField) {
let section = sender.tag
for rowIndex in 0...4 { //assuming there are always 4 rows in a section
let indexPath = IndexPath(row: rowIndex, section: section)
let cell = tableView.cellForRow(at: indexPath) as! CellOutlet
if rowIndex == 0 {
txtFieldArray[section].lbl1 = Int(cell.txtFieldSpecNumb.text!) ?? 0
} else if rowIndex == 1 {
txtFieldArray[section].lbl2 = Int(cell.txtFieldSpecNumb.text!) ?? 0
} else if rowIndex == 2 {
txtFieldArray[section].lbl3 = Int(cell.txtFieldSpecNumb.text!) ?? 0
} else if rowIndex == 3 {
txtFieldArray[section].lbl4 = Int(cell.txtFieldSpecNumb.text!) ?? 0
}
}
}
This way, each item in your txtFieldArray would represent each section where lbl1, lbl2 lbl3 & lbl4 of each item would represent the respective rows
NOTE: Make sure that txtFieldArray is initialised and has an entry for each of the section before you start editing the text fields
UDPATE: Based on the information provided in the comments, you can pre-populate your txtFieldArray as below right after you have your user input stored in finalSectionNumber
txtFieldArray = (0...finalSectionNumber).map { _ in LabelData(lbl1: 0, lbl2: 0, lbl3: 0, lbl4: 0) }

How to merge 2 JSON Arrays in Groovy?

Trying to merge 2 Json arrays into 1 in groovy.
def branchTags = new JsonBuilder()
branchTags branches, { String branch ->
tag branch
type 'b'
}
println(branchTags.toString())
//generates [{"tag":"Branch","type":"b"},{"tag":"Branch1","type":"b"}]
def releaseTags = new JsonBuilder()
releaseTags releases, {String release ->
tag release
type 'r'
}
println(releaseTags.toString())
//generates [{"tag":"Release","type":"r"},{"tag":"Rel1","type":"r"}]
/*def newTags = new JsonBuilder()
branchTags.each {k,v -> newTags.}*/
def slurper = new JsonSlurper()
def input = slurper.parseText(branchTags.toString())
def res = slurper.parseText(releaseTags.toString())
def joined = [input, res].flatten()
println joined.toString()
//this generates [{"tag":"Branch","type":"b"},{"tag":"Branch1","type":"b"}][{"tag":"Release","type":"r"},{"tag":"Rel1","type":"r"}]
I need:
[
{"tag":"Branch","type":"b"},
{"tag":"Branch1","type":"b"},
{"tag":"Release","type":"r"},
{"tag":"Rel1","type":"r"}
]
TIA,
in your case after you parsed json you have two arrays.
just use + to concatenate two arrays into one
import groovy.json.*
def branchTags = '[{"tag":"Branch","type":"b"},{"tag":"Branch1","type":"b"}]'
def releaseTags = '[{"tag":"Release","type":"r"},{"tag":"Rel1","type":"r"}]'
def slurper = new JsonSlurper()
def bArr = slurper.parseText(branchTags)
def rArr = slurper.parseText(releaseTags)
def res = bArr+rArr
println new JsonBuilder(res).toString()

Swift - Joining two tuple arrays based on their values

I've a scenario where I have two arrays of tuples.
tuple1 = [(score1, index1), (score2, index2), (score3, index3)]
tuple2 = [(date1, index1), (date2, index2), (date3, index4)]
I want to get the scores and dates from these tuples and create a new array of tuples such that it contains the score and date having the same index like this:
tuple3 = [(score1, date1), (score2, date2)]
How can I implement this? What is the best practice to follow in this scenario?
Note: The arrays can be of different sizes
My implementation for the scenario is as follows:
var tuple3 = [(Double, Date)]()
for (i,psa) in tuple1.enumerated() {
let date = tuple2.filter({ $0.1 == i })
if date.count == 1 {
let newTuple = (tuple1.0, date[0].0)
tuple3.append(newTuple)
}
}
Is this a right way to do it or is there a better one?
You can try
let v1 = [("1","2"),("3","4")]
let v2 = [("1A","2A"),("3A","4A")]
let res = zip(v1,v2).map { ($0.0 , $1.0) } // [("1", "1A"), ("3", "3A")]
print(res)
let tuple1 = [("score1", "index1"), ("score2", "index2"), ("score3", "index3")]
let tuple2 = [("date1", "index1"), ("date2", "index2"), ("date3", "index4")]
let t2Dict = tuple2.reduce(into: [String:String]()) { (dict, args) in
let (date, index) = args
dict[index] = date
}
let tuple3 = tuple1.compactMap { args -> (String, String)? in
let (score, index) = args
guard let date = t2Dict[index] else { return nil }
return (score, date)
}
It's not as pretty as the others, but it's far more efficient to collapse one of the tuples into a dictionary first.
This should do the trick:
let tuple3 = tuple1.compactMap({ (scoreInTuple1, indexInTuple1) -> (String, String)? in
if let tupleIn2 = tuple2.first(where: { (scoreInTuple2, index2InTuple2) in index2InTuple2 == indexInTuple1 }){
return (scoreInTuple1, tupleIn2.0)
}
return nil
})
(String, String) should be change to the real type/class of score & date1.
Also, index2InTuple2 == indexInTuple1 might be changed also if it's custom type/class which might not be Equatable.
With sample code before:
let tuple1 = [("score1", "index1"), ("score2", "index2"), ("score3", "index3")]
let tuple2 = [("date1", "index1"), ("date2", "index2"), ("date3", "index4")]
Debug log after:
print("tuple3: \(tuple3)")
Output:
$> tuple3: [("score1", "date1"), ("score2", "date2")]
This might be what you're looking for:
let tuple1 = [("score1", "index1"), ("score2", "index2"), ("score3", "index3")]
let tuple2 = [("date1", "index1"), ("date2", "index2"), ("date3", "index4")]
let filtered = tuple1.filter {tuple2.map{$0.1}.contains($0.1)}
let result = filtered.map {tuple in
return (tuple.0, tuple2.first(where: {$0.1 == tuple.1})!.0)
}
print (result) // [("score1", "date1"), ("score2", "date2")]
I'm using Strings for simplicity, just make sure you are using Equatable objects in your tuples.

Swift display previous entered item in array

I am trying to display the pervious entered exercise attributes in the current workout. If I go to the pervious workout everything shows up but when I go to my current workout the previous exercise attributes don't show and the date label only shows today's date and not the previous workout date. Here are the two functions for the issue. Let me know if i need to post more.
func lastWorkout() -> Workout? {
if let client = currentClient(), let currentWorkout = currentWorkout(), let workouts = client.workouts as? Set<Workout> {
// get all this client's workouts in cronological order
let sortedWorkouts = workouts.sorted { (one, two) -> Bool in
let scheduledTimeOfOne = one.appointment?.scheduled ?? Date(timeIntervalSince1970: 0)
let scheduledTimeOfTwo = two.appointment?.scheduled ?? Date(timeIntervalSince1970: 0)
return scheduledTimeOfOne > scheduledTimeOfTwo
}
// get the index of this workout
let indexOfTodaysWorkout = sortedWorkouts.index(of: currentWorkout) ?? 0
// go back one workout to find the last workout completed
let lastWorkout: Workout? = (indexOfTodaysWorkout - 1) < 0 ? nil : sortedWorkouts[indexOfTodaysWorkout - 1]
// and return
return lastWorkout
}
return nil
}
/// Last Exercise Info to load previous exercise data
func lastExercise() -> Exercise? {
guard let selectedExercise = currentExerciseInfo() else{
return nil
}
if let exercises = lastWorkout()?.exercises as? Set<Exercise>, let pastExercise = exercises.first(where: { $0.exerciseInfo == selectedExercise }) {
return pastExercise
}
return nil
}
So the array count was off in last workout function. Here is what the working function looks like. I am still not displaying the proper date. it just gives today's date.
func lastWorkout() -> Workout? {
if let client = currentClient(), let currentWorkout = currentWorkout(), let workouts = client.workouts as? Set<Workout> {
// get all this client's workouts in cronological order
let sortedWorkouts = workouts.sorted { (one, two) -> Bool in
let scheduledTimeOfOne = one.appointment?.scheduled ?? Date(timeIntervalSince1970: 0)
let scheduledTimeOfTwo = two.appointment?.scheduled ?? Date(timeIntervalSince1970: 0)
return scheduledTimeOfOne > scheduledTimeOfTwo
}
// get the index of this workout
let indexOfTodaysWorkout = sortedWorkouts.index(of: currentWorkout) ?? 0
// go back one workout to find the last workout completed
let lastWorkout: Workout? = sortedWorkouts.count < 2 ? nil : sortedWorkouts[indexOfTodaysWorkout + 1]
// and return
return lastWorkout
}
return nil
}

How would I go about categorizing items within an Array in swift?

In swift, I want to categorize items in an existing array and place them accordingly in one new string.
Here is an example of what I want to do:
originalArray = ["hotdog","fries","hotdog","coke","coke","fries","hotdog"]
resultingString = "hotdog x 3, fries x 2, coke x 2"
How would I go about doing this?
Try this:
let originalArray = ["hotdog","fries","hotdog","coke","coke","fries","hotdog"]
var dict = [String: Int]()
let resultString = originalArray.reduce(dict) { _, element in
if dict[element] == nil {
dict[element] = 1
} else {
dict[element]! += 1
}
return dict
}
.map { "\($0) x \($1)" }
.joinWithSeparator(", ")
If you want to keep the original order of the array (ie: hotdog, fries, coke), the code is slightly more complicated:
let originalArray = ["hotdog","fries","hotdog","coke","coke","fries","hotdog"]
var dict = [String: (index: Int, count: Int)]()
let resultString = originalArray.enumerate()
.reduce(dict) { _ , e in
if let value = dict[e.element] {
dict[e.element] = (index: value.index, count: value.count + 1)
} else {
dict[e.element] = (index: e.index, count: 1)
}
return dict
}
.sort { return $0.1.index < $1.1.index }
.map { "\($0) x \($1.count)" }
.joinWithSeparator(", ")
print(resultString)
I think this will help you:
let originalArray = ["hotdog","fries","hotdog","coke","coke","fries","hotdog"]
var resultingString = ""
var counts:[String:Int] = [:]
for item in originalArray {
counts[item] = (counts[item] ?? 0) + 1
}
resultingString = counts.map { (key, value) -> String in
return "\(key) x \(value)"
}.joinWithSeparator(",")
print(resultingString)
Here is the output: coke x 2, hotdog x 3, fries x 2

Resources