Giter Club home page Giter Club logo

Comments (1)

andykrause avatar andykrause commented on August 22, 2024

'
#' Hedonic model approach with base estimator
#'
#' Use of base estimator in hedonic model approach
#'
#' @section Further Details:
#' See ?rfModel for more information
#' @inherit rfModel params
#' @method rfModel shap
#' @importFrom ranger ranger
#' @importFrom pdp partial
#' @export

rfModel.shap <- function(estimator,
rf_df,
rf_spec,
ntrees = 200,
seed = 1,
shap_k = 10,
...){

n <- 1

Estimate model

mod_df <- rf_df[, unique(c(list(...)$ind_var, 'trans_period', 'price'))]
mod_df$price <- log(mod_df$price)

regr.task = makeRegrTask(id = "aa", data = mod_df, target = "price")
regr.lrn = mlr::makeLearner("regr.ranger", par.vals = list(num.trees = ntrees))
rf_model = mlr::train(regr.lrn, regr.task)

shap_df <- mod_df %>%
dplyr::mutate(row_id = 1:nrow(.)) %>%
dplyr::group_by(trans_period) %>%
dplyr::slice(1:shap_k) %>%
dplyr::arrange(row_id)

shapvalue_df <- shapleyR::getShapleyValues(
shapley(shap_df$row_id,
task = regr.task,
model = rf_model)) %>%
dplyr::mutate(period = shap_df$trans_period) %>%
dplyr::group_by(period) %>%
dplyr::summarize(value = mean(trans_period)) %>%
dplyr::filter(period %in% rf_df$trans_period)

rf_model$coefficients <- data.frame(time = 1:max(rf_df$trans_period)) %>%
dplyr::left_join(shapvalue_df %>%
dplyr::select(time = period,
coefficient = value),
by = 'time') %>%
dplyr::mutate(coefficient = coefficient - coefficient[1])

a <- as.data.frame(cbind(X$trans_period[kk], x$trans_period))

# Add 'coefficients'

log_dep <- ifelse(grepl('log', rf_spec[2]), TRUE, FALSE)

if(log_dep){

coefs <- pdp_df$yhat - pdp_df$yhat[1]

} else {

coefs <- pdp_df$yhat / pdp_df$yhat[1]

}

rf_model$coefficients <- data.frame(time = 1:max(rf_df$trans_period),

coefficient = coefs)

Structure and return

structure(rf_model, class = c('rfmodel', class(rf_model)))

}

#'
#' Hedonic model approach with base estimator
#'
#' Use of base estimator in hedonic model approach
#'
#' @section Further Details:
#' See ?rfModel for more information
#' @inherit rfModel params
#' @method rfModel sim
#' @importFrom ranger ranger
#' @export

rfModel.sim <- function(estimator,
rf_df,
rf_spec,
ntrees = 200,
seed = 1,
...){

set.seed(seed)

Estimate model

rf_model <- ranger::ranger(rf_spec,
data = rf_df,
num.tree = ntrees,
seed = seed)

Add class

class(rf_model) <- c('rfmodel', class(rf_model))

log_dep <- ifelse(grepl('log', rf_spec[2]), TRUE, FALSE)

rfSimulate(rf_obj = rf_model,
rf_df = rf_df,
log_dep = log_dep,
...)
}

#'
#' Simulate selected properties
#'
#' Handle simulation of all chosen properties
#' '
#' @param rf_obj A ranger random forest object
#' @param rf_df Full data.frame used to build the random forest
#' @param sim_type ['random'] Sampling type to use
#' @param sim_per [0.1] Percentage of the total set to simulate
#' @param sim_count [NULL] If not giving a percentage, the total number of properties to simulate
#' @param seed [1] Seed for reproducibility
#' @param ... Additional arguments
#' @importFrom purrr map
#' @export

rfSimulate <- function(rf_obj,
rf_df,
sim_type = 'random',
sim_per = .1,
sim_count = NULL,
seed = 1,
...){

if no count of simulation is given

if (is.null(sim_count)) sim_count <- floor(sim_per * nrow(rf_df))

Get simulation observations

set.seed(seed)
sim_df <- rf_df[sample(1:nrow(rf_df), sim_count, replace = TRUE), ]

Calculate individual price movements

sim_coefs <- purrr::map(.x = sim_df %>% split(., sim_df$trans_id),
.f = rfSim,
rf_obj = rf_obj,
periods = 1:max(rf_df$trans_period),
...)

rf_obj$coefficients <- data.frame(time = 1:max(rf_df$trans_period),
coefficient = Reduce('+', sim_coefs) / length(sim_coefs) - 1)

rf_obj

}

#'
#' Simulation engine
#'
#' Helper function to simulate each example proeprty over the time period(s)
#' '
#' @param rf_obj A ranger random forest object
#' @param sim_df Single property to simulate over time
#' @param periods Time periods to simulate over
#' @param log_dep [fALSE] Is the dependent variables in log form?
#' @param ... Additional arguments
#' @importFrom dplyr mutate
#' @importFrom stats predict
#' @export

rfSim <- function(rf_obj,
sim_df,
periods,
log_dep = FALSE,
...){

new_data <- sim_df[rep(1,length(periods)), ] %>%
dplyr::mutate(trans_period = periods)

pred <- stats::predict(rf_obj, new_data)$prediction
if (log_dep) pred <- exp(pred)
pred / pred[1]

}

from hpir.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.