This tutorial demonstrates the linear_plateau() function
for fitting a continuous response model and estimating a critical soil
test value (CSTV). This function fits a segmented regression model that
follows two phases: i) a linear phase described as
y = a + b * x, followed by ii) a plateau phase (Anderson
and Nelson, 1975) were the ry response to increasing
stv becomes NULL (flat), described as
plateau = y = a + b * Xc, where y represents
the fitted crop relative yield, x the soil test value,
a the intercept (ry when stv = 0) , b the
slope (as the change in RY per unit of soil nutrient supply), and
X_c the break point when the plateau phase starts (i.e. the
CSTV).
The parameters of this regression model have a simple interpretation.
Some disadvantages are that: i) the user does not have control to
estimate the CSTV (the model Xc parameter) for an specific
ry level; and ii) the default confidence interval
estimation of the CSTV is generally unreliable (based on
symmetric Wald’s intervals). We recommend the user to use a resampling
technique (e.g. bootstrapping) for a more reliable confidence interval
estimation for parameters and CSTV (for examples on bootstrapping, see
nlraa package
vignette. The linear_plateau() function works
automatically with self-starting initial values to facilitate the
model’s convergence.
Load your dataframe with soil test value and relative yield data.
Specify the following arguments into the function
-linear_plateau()-:
(a). data (optional),
(b). stv (soil test value) and ry (relative
yield) columns or vectors,
(c). target (optional) if want to know stv level needed
for a different ry than the plateau.
(d). tidy TRUE (produces a data.frame with results) or
FALSE (store results as list),
(e). plot TRUE (produces a ggplot as main output) or
FALSE (no plot, only results as data.frame),
(f). resid TRUE (produces plots with residuals analysis)
or FALSE (no plot),
Run and check results.
Check residuals plot, and warnings related to potential
limitations of this model.
Adjust curve plots as desired.
library(soiltestcorr)Suggested packages
# Install if needed
library(ggplot2) # Plots
library(dplyr) # Data wrangling
library(tidyr) # Data wrangling
library(utils) # Data wrangling
library(data.table) # Mapping
library(purrr) # MappingThis is a basic example using three different datasets:
# Example 1 dataset
# Fake dataset manually created
data_1 <- data.frame("RY" = c(65,80,85,88,90,94,93,96,97,95,98,100,99,99,100),
"STV" = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15))
# Example 2. Native fake dataset from soiltestcorr package
data_2 <- soiltestcorr::data_test
# Example 3. Native dataset from soiltestcorr package, Freitas et al. (1966), used by Cate & Nelson (1971)
data_3 <- soiltestcorr::freitas1966tidy = FALSE It returns a LIST (more efficient for multiple fits at once)
# Using dataframe argument, tidy = FALSE -> return a LIST
fit_1_tidy_false <-
soiltestcorr::linear_plateau(data = data_1,
ry = RY,
stv = STV,
tidy = FALSE)
utils::head(fit_1_tidy_false)
#> $intercept
#> [1] 65.87
#>
#> $slope
#> [1] 5.09
#>
#> $equation
#> [1] "65.9 + 5.09x if x<CSTV"
#>
#> $plateau
#> [1] 97.4
#>
#> $target
#> [1] 97.4
#>
#> $CSTV
#> [1] 6.2tidy = TRUE It returns a data.frame (more organized results)
# Using dataframe argument, tidy = FALSE -> return a LIST
fit_1_tidy_true <-
soiltestcorr::linear_plateau(data = data_1,
ry = RY,
stv = STV,
tidy = TRUE)
fit_1_tidy_true
#> intercept slope equation plateau target CSTV LL_cstv UL_cstv
#> 1 65.87 5.09 65.9 + 5.09x if x<CSTV 97.4 97.4 6.2 5.1 7.4
#> CI_type STVt AIC AICc R2
#> 1 Wald Conf. Interval 6.2 82 86 0.9You can call stv and ry vectors using the
$.
The tidy argument still applies for controlling the
output type
fit_1_vectors_list <-
soiltestcorr::linear_plateau(ry = data_1$RY,
stv = data_1$STV,
tidy = FALSE)
fit_1_vectors_tidy <-
soiltestcorr::linear_plateau(ry = data_1$RY,
stv = data_1$STV,
tidy = TRUE)
fit_2 <-
soiltestcorr::linear_plateau(data = data_2,
ry = RY,
stv = STV)
utils::head(fit_2)
#> $intercept
#> [1] 53.72
#>
#> $slope
#> [1] 1.55
#>
#> $equation
#> [1] "53.7 + 1.55x if x<CSTV"
#>
#> $plateau
#> [1] 96.2
#>
#> $target
#> [1] 96.2
#>
#> $CSTV
#> [1] 27.4
fit_3 <-
soiltestcorr::linear_plateau(data = data_3,
ry = RY,
stv = STK)
utils::head(fit_3)
#> $intercept
#> [1] 39.24
#>
#> $slope
#> [1] 0.75
#>
#> $equation
#> [1] "39.2 + 0.75x if x<CSTV"
#>
#> $plateau
#> [1] 95.6
#>
#> $target
#> [1] 95.6
#>
#> $CSTV
#> [1] 75 Note: the stv column needs to have the same name for
all datasets
#
data.all <- dplyr::bind_rows(data_1, data_2,
data_3 %>% dplyr::rename(STV = STK),
.id = "id") %>%
tidyr::nest(data = c("STV", "RY"))
# Run multiple examples at once with map()
fit_multiple_map <-
data.all %>%
mutate(linear_plateau = purrr::map(data,
~ soiltestcorr::linear_plateau(ry = .$RY,
stv = .$STV,
tidy = TRUE)))
utils::head(fit_multiple_map)
#> # A tibble: 3 × 3
#> id data linear_plateau
#> <chr> <list> <list>
#> 1 1 <tibble [15 × 2]> <df [1 × 13]>
#> 2 2 <tibble [137 × 2]> <df [1 × 13]>
#> 3 3 <tibble [24 × 2]> <df [1 × 13]>Alternatively, with group_map, we do not require nested data.
However, it requires to dplyr::bind_rows and add an id
column specifying the name of each dataset.
This option return models as lists objects.
fit_multiple_group_map <-
dplyr::bind_rows(data_1, data_2, .id = "id") %>%
dplyr::group_by(id) %>%
dplyr::group_map(~ soiltestcorr::linear_plateau(data = .,
ry = RY,
stv = STV,
tidy = TRUE))
utils::head(fit_multiple_group_map)
#> [[1]]
#> intercept slope equation plateau target CSTV LL_cstv UL_cstv
#> 1 65.87 5.09 65.9 + 5.09x if x<CSTV 97.4 97.4 6.2 5.1 7.4
#> CI_type STVt AIC AICc R2
#> 1 Wald Conf. Interval 6.2 82 86 0.9
#>
#> [[2]]
#> intercept slope equation plateau target CSTV LL_cstv UL_cstv
#> 1 53.72 1.55 53.7 + 1.55x if x<CSTV 96.2 96.2 27.4 24 30.7
#> CI_type STVt AIC AICc R2
#> 1 Wald Conf. Interval 27.4 1026 1026 0.52We can generate a ggplot with the same linear_plateau() function.
We just need to specify the argument plot = TRUE.
linear_plateau_plot <-
soiltestcorr::linear_plateau(data = data_3,
ry = RY,
stv = STK,
plot = TRUE)
linear_plateau_plot
### 3.1.2 Fine-tune the plots
As ggplot object, plots can be adjusted in several ways.
For example, modifying titles
linear_plateau_plot_2 <-
linear_plateau_plot +
# Main title
ggtitle("My own plot title")+
# Axis titles
labs(x = "Soil Test K (ppm)",
y = "Cotton RY(%)")
linear_plateau_plot_2Or modifying axis scales
linear_plateau_plot_3 <-
linear_plateau_plot_2 +
# Axis scales
scale_x_continuous(limits = c(20,220),
breaks = seq(0,220, by = 20))+
# Axis limits
scale_y_continuous(limits = c(30,100),
breaks = seq(30,100, by = 10))
linear_plateau_plot_3We can generate a plot with the same linear_plateau() function.
We just need to specify the argument resid = TRUE`.
# Residuals plot
soiltestcorr::linear_plateau(data = data_3,
ry = RY,
stv = STK,
resid = TRUE)#> $intercept
#> [1] 39.24
#>
#> $slope
#> [1] 0.75
#>
#> $equation
#> [1] "39.2 + 0.75x if x<CSTV"
#>
#> $plateau
#> [1] 95.6
#>
#> $target
#> [1] 95.6
#>
#> $CSTV
#> [1] 75
#>
#> $LL_cstv
#> [1] 46.4
#>
#> $UL_cstv
#> [1] 103.6
#>
#> $CI_type
#> [1] "Wald Conf. Interval"
#>
#> $STVt
#> [1] 75
#>
#> $AIC
#> [1] 188
#>
#> $AICc
#> [1] 190
#>
#> $R2
#> [1] 0.66
References
Anderson, R. L., and Nelson, L. A. (1975). A Family of Models
Involving Intersecting Straight Lines and Concomitant Experimental
Designs Useful in Evaluating Response to Fertilizer Nutrients.
Biometrics, 31(2), 303–318. 10.2307/2529422