Helper to build data.table capable non-sql nodes.
rq_df_funciton_node(
.,
f,
...,
f_db = NULL,
columns_produced,
display_form,
orig_columns = FALSE
)
or data.frame input.
function that takes a data.table to a data.frame (or data.table).
force later arguments to bind by name.
implementation signature: f_db(db, incoming_table_name, outgoing_table_name, nd, ...) (db being a database handle). NULL defaults to using f.
character columns produces by f.
display form for node.
orig_columns, if TRUE assume all input columns are present in derived table.
relop non-sql node implementation.
# a node generator is something an expert can
# write and part-time R users can use.
grouped_regression_node <- function(., group_col = "group", xvar = "x", yvar = "y") {
force(group_col)
formula_str <- paste(yvar, "~", xvar)
f <- function(df, nd = NULL) {
dlist <- split(df, df[[group_col]])
clist <- lapply(dlist,
function(di) {
mi <- lm(as.formula(formula_str), data = di)
ci <- as.data.frame(summary(mi)$coefficients)
ci$Variable <- rownames(ci)
rownames(ci) <- NULL
ci[[group_col]] <- di[[group_col]][[1]]
ci
})
data.table::rbindlist(clist)
}
columns_produced =
c("Variable", "Estimate", "Std. Error", "t value", "Pr(>|t|)", group_col)
rq_df_funciton_node(
., f,
columns_produced = columns_produced,
display_form = paste0(yvar, "~", xvar, " grouped by ", group_col))
}
# work an example
set.seed(3265)
d <- data.frame(x = rnorm(1000),
y = rnorm(1000),
group = sample(letters[1:5], 1000, replace = TRUE),
stringsAsFactors = FALSE)
rquery_pipeline <- local_td(d) %.>%
grouped_regression_node(.)
cat(format(rquery_pipeline))
#> mk_td("d", c(
#> "x",
#> "y",
#> "group")) %.>%
#> non_sql_node(., y~x grouped by group)
d %.>% rquery_pipeline
#> Estimate Std. Error t value Pr(>|t|) Variable group
#> 1 0.05921097 0.06246165 0.9479572 0.34421552 (Intercept) a
#> 2 -0.02301646 0.06093971 -0.3776924 0.70603174 x a
#> 3 0.09793586 0.06666844 1.4689988 0.14335117 (Intercept) b
#> 4 0.05703537 0.06963630 0.8190466 0.41370179 x b
#> 5 -0.05184909 0.07556010 -0.6861967 0.49348193 (Intercept) c
#> 6 0.05554476 0.08019680 0.6926057 0.48945965 x c
#> 7 0.15331654 0.07004124 2.1889469 0.02985964 (Intercept) d
#> 8 0.02056881 0.06921107 0.2971896 0.76665700 x d
#> 9 0.02250647 0.06919627 0.3252556 0.74531773 (Intercept) e
#> 10 -0.08785792 0.06864886 -1.2798162 0.20204920 x e