Google analytics organic search loop problems - loops

The loop functions completely different and I'm not sure if it is because of the google analytics package because very little is different between the code.
Doesn't WORK difference in print statement output shows results are not coming out correctly.
library(googleAnalyticsR)
library(tidyverse)
#settings
start_dat <- as.character(Sys.Date()-31)
end_dat <- as.character(Sys.Date()-1)
#Authorize Google Analytics R- this will open a webpage
#You must be logged into your Google Analytics account on your web browser
ga_auth()
account_sum <- ga_account_list()
#Add the start and end date to the date frame, as well as some columns to use to populate the metrics
account_sum$start_dat <- start_dat
account_sum$end_dat <- end_dat
## choose the v3 segment
segment_for_call <- "gaid::-5"
## make the v3 segment object in the v4 segment object:
seg_ob <- segment_ga4("OrganicTraffic", segment_id = segment_for_call)
# cycle through the list of views, pull the data, and add it to the
#account_summary
for (i in 1:5){
view_id <- (Book1CSV[[1]][i])
views=view_id
ga_dat <- google_analytics_4(views,
date_range = c(start_dat, end_dat),
segments = seg_ob,
metrics = c("sessions", "pageviews"),
dimensions = c("year","segment"))
ga_dat <- summarise(ga_dat,
sessions = sum(sessions),
pageviews = sum(pageviews))
account_sum$sessions[i] <- ga_data$sessions
account_sum$pageviews[i] <- ga_data$pageviews
print(account_summary)
}
clean_sum <- select(account_sum,
ID = webPropertyId ,
Account = accountName,
Views = views,
Type = type,
Level = level,
'Start Date' = start_dat,
'End Date' = end_dat,
Sessions = sessions,
Pageviews = pageviews)
write.csv (ga_dat, "doesntwork.csv", row.names = TRUE)
THIS WORKS!!!!!!!!!!!!!!! Print statement prints code
library(googleAnalyticsR)
library(tidyverse)
#settings
start_date <- as.character(Sys.Date()-31)
end_date <- as.character(Sys.Date()-1)
metrics <- c("sessions", "pageviews")
dimensions <- "year"
#Authorize Google Analytics R- this will open a webpage
#You must be logged into your Google Analytics account on your web browser
ga_auth()
account_summary <- ga_account_list()
#Add the start and end date to the date frame, as well as some columns to use to populate the metrics
account_summary$start_date <- start_date
account_summary$end_date <- end_date
# cycle through the list of views, pull the data, and add it to the
#account_summary
for (i in 1:6){
view_id <- (Book1CSV[[1]][i])
ga_data <- google_analytics_4(viewId = view_id,
date_range = c(start_date,end_date),
metrics = metrics,
dimensions = dimensions)
# This query might return multiple rows (if it spans a year boundary), so
#collapse and clean up
ga_data <- summarise(ga_data,
sessions = sum(sessions),
pageviews = sum(pageviews))
#add the totals to the account summary
account_summary$sessions[i] <- ga_data$sessions
account_summary$pageviews[i] <- ga_data$pageviews
print(account_summary)
}
# Make a more compact set of data
clean_summary <- select(account_summary,
Account = accountName,
View = viewId,
Type = type,
Level = level,
'Start Date' = start_date,
'End Date' = end_date,
Sessions = sessions,
Pageviews = pageviews,
ID = webPropertyId)
select
write.csv (clean_summary, "worksfine.csv", row.names = FALSE)
Can't really understand whats going wrong here. Would like some detailed help. I have uploaded a file called Book1CSV as I couldn't get a try-catch statement that would work to catch the error. Google merchant store beta account was causing the crash.

This is an anti-R pattern, modifying an object in a loop. Its better to return a list of data.frames, then merge then at the end and create your summary.
So something like:
my_view_ids <- c(12345,233445,232434)
## fetch data, create a list of
all_data <- lapply(my_view_ids, function(id){
one_data <- tryCatch(google_analytics_4(viewId = id,
date_range = c(start_date,end_date),
metrics = metrics,
dimensions = dimensions),
error = function(ex){message(ex); NULL})
one_data$viewId <- id
one_data
})
## Reduce the list of data.frames to one data.frame
all_dataframe <- Reduce(rbind, all_data)
### make your summary etc.

Related

Fetching Data from SQL Server Through Shiny Application on Date selection bases

Hey Every One i am developing a Shiny Application, where we Extract a data from sql Server through ODBC Connector by selecting Date to and from in a Application. i am unable to identify where the issue is because if i execute the code independently on R studio i am able to extract the data from sql Server But then when the same code is executed in Shiny Environment i am unable to achieve the data on shiny here is the below Kindly Guide me on this Thank you.
# ---------------------ui Code -----------------------------
library(shiny)
shinyUI(pageWithSidebar(
headerPanel("Time Analytics"),
sidebarPanel(
dateRangeInput(inputId = "dateRange",
label = "Date range",
start = "2007-09-17",
max = Sys.Date()
)
),#sidebar Panel Ends
# 09-Main Panel ----
mainPanel(
tabsetPanel(id ="theTabs",
tabPanel("Summary", dataTableOutput("tabi"),textOutput("tabii"))
)
)#Main Panel Ends
))
#------------------Server ----------------------------------
library(shiny);library(sqldf)
library(plyr);library(RODBC)
library(ggplot2)
#Creating the connection
shinyServer(function(input, output, session){ # pass in a session argument
# prep data once and then pass around the program
passData <- reactive({
ch = odbcConnect("Test")
#qry <- "SELECT * FROM Nifty50"
#qry <- cat("SELECT * FROM Nifty50 WHERE Date >= ",as.date(input$dateRange[1])," AND Date <= ",input$dateRange[2])
qry <- paste("SELECT * FROM Nifty50 WHERE Date >= ",input$dateRange[1]," AND Date <= ",input$dateRange[2])
#paste("SELECT * FROM Nifty50 WHERE Date >= ",input$dateRange[1]," AND Date <= ",input$dateRange[2])
subset_Table <- sqlQuery(ch,qry)
odbcClose(ch)
subset_Table <- as.data.frame(subset_Table)
return(subset_Table)
})
output$tabi <- renderDataTable({
d<- as.data.frame(passData())
d
})
output$tabii <- renderText({
paste("Minimium Data :",input$dateRange[1], "Max Date:",input$dateRange[2])
})
# ----------------------------------------------------End
})
Here the task is i need to fetch the data from selected Table on the bases of Date to and From criteria, which will be the subset data as per the selected Date from shiny app.
Modify qry as follows:
qry <- paste("SELECT * FROM Nifty50 WHERE Date >= '", input$dateRange[1], "' AND Date <= '", input$dateRange[2], "'", sep = "")

T-SQL Temp Tables and Stored Procedures from R using RevoScaleR

In the example below, I was able to get the query to work with one exception. When I use q in place of source.query during the RxSqlServerData step, I get the error rxCompleteClusterJob Execution halted.
The first goal is to use a stored procedure in place of a longer query. Is this possible?
The second goal would be to create and call upon a #TEMPORARY table within the stored procedure. I'm wondering if that is possible, as well?
library (RODBC)
library (RevoScaleR)
sqlConnString <- "Driver=SQL Server;Server=SAMPLE_SERVER; Database=SAMPLE_DATABASE;Trusted_Connection=True"
sqlWait <- TRUE
sqlConsoleOutput <- FALSE
sql_share_directory <- paste("D:\\RWork\\AllShare\\", Sys.getenv("USERNAME"), sep = "")
sqlCompute <- RxInSqlServer(connectionString = sqlConnString, wait = sqlWait, consoleOutput = sqlConsoleOutput)
rxSetComputeContext(sqlCompute)
#This Sample Query Works
source.query <- paste("SELECT CASE WHEN [Order Date Key] = [Picked Date Key]",
"THEN 1 ELSE 0 END AS SameDayFulfillment,",
"[City Key] AS city, [STOCK ITEM KEY] AS item,",
"[PICKER KEY] AS picker, [QUANTITY] AS quantity",
"FROM [WideWorldImportersDW].[FACT].[ORDER]",
"WHERE [WWI ORDER ID] >= 63968")
#This Query Does Not
q <- paste("EXEC [dbo].[SAMPLE_STORED_PROCEDURE]")
inDataSource <- RxSqlServerData(sqlQuery=q, connectionString=sqlConnString, rowsPerRead=500)
order.logit.rx <- rxLogit(SameDayFulfillment ~ city + item + picker + quantity, data = inDataSource)
order.logit.rx
Currently, only T-SQL SELECT statements are allowed as input data-set, not stored procedures.

Google analytics API data to SQL via Python 2.7

Out of my depth here.
Have an assignment to download data from web and get it into SQL lite.
have pieced together some different code.
the key code is below. Suggestions on fixing how to get the data into the SQL table appreciated.
The API code works fine, It downloads a header row, and rows containing a country name and visitor numbers from that country. so its the SQL that i'm trying to write thats failing. No errors, just no data going in.
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='2016-04-01',
end_date='today',
metrics='ga:users',
dimensions='ga:country').execute()
def print_results(results):
print()
print('Profile Name: %s' %results.get('profileInfo').get('profileName'))
print()
Print header.
output = []
for header in results.get('columnHeaders'):output.append('%30s' % header.get('name'))
print(''.join(output))
Print data table.
start databasing results
if results.get('rows', []):
for row in results.get('rows'):
output = []
for cell in row:output.append('%30s' % cell)
cur.execute('SELECT max(id) FROM Orign')
try:
row = cur.fetchone()
if row[0] is not None:start = row[0]
except:
start = 0
row = None
cur.execute('''INSERT OR IGNORE INTO Origin (id, Country, Users)
VALUES ( ?, ?, )''', ( Country, Users))
conn.commit()
cur.close()
print(''.join(output))
start = 0
else:
print('No Rows Found')
if __name__ == '__main__':
main(sys.argv)

How to pass data.frame for UPDATE with R DBI

With RODBC, there were functions like sqlUpdate(channel, dat, ...) that allowed you pass dat = data.frame(...) instead of having to construct your own SQL string.
However, with R's DBI, all I see are functions like dbSendQuery(conn, statement, ...) which only take a string statement and gives no opportunity to specify a data.frame directly.
So how to UPDATE using a data.frame with DBI?
Really late, my answer, but maybe still helpful...
There is no single function (I know) in the DBI/odbc package but you can replicate the update behavior using a prepared update statement (which should work faster than RODBC's sqlUpdate since it sends the parameter values as a batch to the SQL server:
library(DBI)
library(odbc)
con <- dbConnect(odbc::odbc(), driver="{SQL Server Native Client 11.0}", server="dbserver.domain.com\\default,1234", Trusted_Connection = "yes", database = "test") # assumes Microsoft SQL Server
dbWriteTable(con, "iris", iris, row.names = TRUE) # create and populate a table (adding the row names as a separate columns used as row ID)
update <- dbSendQuery(con, 'update iris set "Sepal.Length"=?, "Sepal.Width"=?, "Petal.Length"=?, "Petal.Width"=?, "Species"=? WHERE row_names=?')
# create a modified version of `iris`
iris2 <- iris
iris2$Sepal.Length <- 5
iris2$Petal.Width[2] <- 1
iris2$row_names <- rownames(iris) # use the row names as unique row ID
dbBind(update, iris2) # send the updated data
dbClearResult(update) # release the prepared statement
# now read the modified data - you will see the updates did work
data1 <- dbReadTable(con, "iris")
dbDisconnect(con)
This works only if you have a primary key which I created in the above example by using the row names which are a unique number increased by one for each row...
For more information about the odbc package I have used in the DBI dbConnect statement see: https://github.com/rstats-db/odbc
Building on R Yoda's answer, I made myself the helper function below. This allows using a dataframe to specify update conditions.
While I built this to run transaction updates (i.e. single rows), it can in theory update multiple rows passing a condition. However, that's not the same as updating multiple rows using an input dataframe. Maybe somebody else can build on this...
dbUpdateCustom = function(x, key_cols, con, schema_name, table_name) {
if (nrow(x) != 1) stop("Input dataframe must be exactly 1 row")
if (!all(key_cols %in% colnames(x))) stop("All columns specified in 'key_cols' must be present in 'x'")
# Build the update string --------------------------------------------------
df_key <- dplyr::select(x, one_of(key_cols))
df_upt <- dplyr::select(x, -one_of(key_cols))
set_str <- purrr::map_chr(colnames(df_upt), ~glue::glue_sql('{`.x`} = {x[[.x]]}', .con = con))
set_str <- paste(set_str, collapse = ", ")
where_str <- purrr::map_chr(colnames(df_key), ~glue::glue_sql("{`.x`} = {x[[.x]]}", .con = con))
where_str <- paste(where_str, collapse = " AND ")
update_str <- glue::glue('UPDATE {schema_name}.{table_name} SET {set_str} WHERE {where_str}')
# Execute ------------------------------------------------------------------
query_res <- DBI::dbSendQuery(con, update_str)
DBI::dbClearResult(query_res)
return (invisible(TRUE))
}
Where
x: 1-row dataframe that contains 1+ key columns, and 1+ update columns.
key_cols: character vector, of 1 or more column names that are the keys (i.e. used in the WHERE clause)
Here is a little helper function I put together using REPLACE INTO to update a table using DBI, replacing old duplicate entries with the new values. It's basic and for my own needs, but should be easy to modify. All you need to pass to the function is the connection, table name, and dataframe. Note that the table must have a PRIMARY KEY column. I've also included a simple working example.
row_to_list <- function(Y) suppressWarnings(split(Y, f = row(Y)))
sql_val <- function(y){
if(!is.numeric(y)){
return(paste0("'",y,"'"))
}else{
if(is.na(y)){
return("NULL")
}else{
return(as.character(y))
}
}
}
to_sql_row <- function(x) paste0("(",paste(do.call("c", lapply(x, FUN = sql_val)), collapse = ", "),")")
bracket <- function(x) paste0("`",x,"`")
to_sql_string <- function(x) paste0("(",paste(sapply(x, FUN = bracket), collapse = ", "),")")
replace_into_table <- function(con, table_name, new_data){
#new_data <- data.table(new_data)
cols <- to_sql_string(names(new_data))
vals <- paste(lapply(row_to_list(new_data), FUN = to_sql_row), collapse = ", ")
query <- paste("REPLACE INTO", table_name, cols, "VALUES", vals)
rs <- dbExecute(con, query)
return(rs)
}
tb <- data.frame("id" = letters[1:20], "A" = 1:20, "B" = seq(.1,2,.1)) # sample data
dbWriteTable(con, "test_table", tb) # create table
dbExecute(con, "ALTER TABLE test_table ADD PRIMARY KEY (id)") # set primary key
new_data <- data.frame("id" = letters[19:23], "A" = 1:5, "B" = seq(101,105)) # new data
new_data[4,2] <- NA # add some NA values
new_data[5,3] <- NA
table_name <- "test_table"
replace_into_table(con, "test_table", new_data)
result <- dbReadTable(con, "test_table")

Between query equivalent on App Engine datastore?

I have a model containing ranges of IP addresses, similar to this:
class Country(db.Model):
begin_ipnum = db.IntegerProperty()
end_ipnum = db.IntegerProperty()
On a SQL database, I would be able to find rows which contained an IP in a certain range like this:
SELECT * FROM Country WHERE ipnum BETWEEN begin_ipnum AND end_ipnum
or this:
SELECT * FROM Country WHERE begin_ipnum < ipnum AND end_ipnum > ipnum
Sadly, GQL only allows inequality filters on one property, and doesn't support the BETWEEN syntax. How can I work around this and construct a query equivalent to these on App Engine?
Also, can a ListProperty be 'live' or does it have to be computed when the record is created?
question updated with a first stab at a solution:
So based on David's answer below and articles such as these:
http://appengine-cookbook.appspot.com/recipe/custom-model-properties-are-cute/
I'm trying to add a custom field to my model like so:
class IpRangeProperty(db.Property):
def __init__(self, begin=None, end=None, **kwargs):
if not isinstance(begin, db.IntegerProperty) or not isinstance(end, db.IntegerProperty):
raise TypeError('Begin and End must be Integers.')
self.begin = begin
self.end = end
super(IpRangeProperty, self).__init__(self.begin, self.end, **kwargs)
def get_value_for_datastore(self, model_instance):
begin = self.begin.get_value_for_datastore(model_instance)
end = self.end.get_value_for_datastore(model_instance)
if begin is not None and end is not None:
return range(begin, end)
class Country(db.Model):
begin_ipnum = db.IntegerProperty()
end_ipnum = db.IntegerProperty()
ip_range = IpRangeProperty(begin=begin_ipnum, end=end_ipnum)
The thinking is that after i add the custom property i can just import my dataset as is and then run queries on based on the ListProperty like so:
q = Country.gql('WHERE ip_range = :1', my_num_ipaddress)
When i try to insert new Country objects this fails though, complaning about not being able to create the name:
...
File "/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/db/__init__.py", line 619, in _attr_name
return '_' + self.name
TypeError: cannot concatenate 'str' and 'IntegerProperty' objects
I tried defining an attr_name method for the new property or just setting self.name but that does not seem to help. Hopelessly stuck or heading in the right direction?
Short answer: Between queries aren't really supported at the moment. However, if you know a priori that your range is going to be relatively small, then you can fake it: just store a list on the entity with every number in the range. Then you can use a simple equality filter to get entities whose ranges contain a particular value. Obviously this won't work if your range is large. But here's how it would work:
class M(db.Model):
r = db.ListProperty(int)
# create an instance of M which has a range from `begin` to `end` (inclusive)
M(r=range(begin, end+1)).put()
# query to find instances of M which contain a value `v`
q = M.gql('WHERE r = :1', v)
The better solution (eventually - for now the following only works on the development server due to a bug (see issue 798). In theory, you can work around the limitations you mentioned and perform a range query by taking advantage of how db.ListProperty is queried. The idea is to store both the start and end of your range in a list (in your case, integers representing IP addresses). Then to get entities whose ranges contain some value v (i.e., between the two values in your list), you simply perform a query with two inequality filters on the list - one to ensure that v is at least as big as the smallest element in the list, and one to ensure that v is at least as small as the biggest element in the list.
Here's a simple example of how to implement this technique:
class M(db.Model):
r = db.ListProperty(int)
# create an instance of M which has a rnage from `begin` to `end` (inclusive)
M(r=[begin, end]).put()
# query to find instances of M which contain a value `v`
q = M.gql('WHERE r >= :1 AND r <= :1', v)
My solution doesn't follow the pattern you have requested, but I think it would work well on app engine. I'm using a list of strings of CIDR ranges to define the IP blocks instead of specific begin and end numbers.
from google.appengine.ext import db
class Country(db.Model):
subnets = db.StringListProperty()
country_code = db.StringProperty()
c = Country()
c.subnets = ['1.2.3.0/24', '1.2.0.0/16', '1.3.4.0/24']
c.country_code = 'US'
c.put()
c = Country()
c.subnets = ['2.2.3.0/24', '2.2.0.0/16', '2.3.4.0/24']
c.country_code = 'CA'
c.put()
# Search for 1.2.4.5 starting with most specific block and then expanding until found
result = Country.all().filter('subnets =', '1.2.4.5/32').fetch(1)
result = Country.all().filter('subnets =', '1.2.4.4/31').fetch(1)
result = Country.all().filter('subnets =', '1.2.4.4/30').fetch(1)
result = Country.all().filter('subnets =', '1.2.4.0/29').fetch(1)
# ... repeat until found
# optimize by starting with the largest routing prefix actually found in your data (probably not 32)

Resources