% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/DataFrame.R
\name{group_by}
\alias{group_by}
\alias{groupBy}
\alias{groupBy,SparkDataFrame-method}
\alias{group_by,SparkDataFrame-method}
\title{GroupBy}
\usage{
group_by(x, ...)

groupBy(x, ...)

\S4method{groupBy}{SparkDataFrame}(x, ...)

\S4method{group_by}{SparkDataFrame}(x, ...)
}
\arguments{
\item{x}{a SparkDataFrame.}

\item{...}{character name(s) or Column(s) to group on.}
}
\value{
A GroupedData.
}
\description{
Groups the SparkDataFrame using the specified columns, so we can run aggregation on them.
}
\note{
groupBy since 1.4.0

group_by since 1.4.0
}
\examples{
\dontrun{
  # Compute the average for all numeric columns grouped by department.
  avg(groupBy(df, "department"))

  # Compute the max age and average salary, grouped by department and gender.
  agg(groupBy(df, "department", "gender"), salary="avg", "age" -> "max")
}
}
\seealso{
\link{agg}, \link{cube}, \link{rollup}

Other SparkDataFrame functions: 
\code{\link{SparkDataFrame-class}},
\code{\link{agg}()},
\code{\link{alias}()},
\code{\link{arrange}()},
\code{\link{as.data.frame}()},
\code{\link{attach,SparkDataFrame-method}},
\code{\link{broadcast}()},
\code{\link{cache}()},
\code{\link{checkpoint}()},
\code{\link{coalesce}()},
\code{\link{collect}()},
\code{\link{colnames}()},
\code{\link{coltypes}()},
\code{\link{createOrReplaceTempView}()},
\code{\link{crossJoin}()},
\code{\link{cube}()},
\code{\link{dapply}()},
\code{\link{dapplyCollect}()},
\code{\link{describe}()},
\code{\link{dim}()},
\code{\link{distinct}()},
\code{\link{drop}()},
\code{\link{dropDuplicates}()},
\code{\link{dropna}()},
\code{\link{dtypes}()},
\code{\link{except}()},
\code{\link{exceptAll}()},
\code{\link{explain}()},
\code{\link{filter}()},
\code{\link{first}()},
\code{\link{gapply}()},
\code{\link{gapplyCollect}()},
\code{\link{getNumPartitions}()},
\code{\link{head}()},
\code{\link{hint}()},
\code{\link{histogram}()},
\code{\link{insertInto}()},
\code{\link{intersect}()},
\code{\link{intersectAll}()},
\code{\link{isLocal}()},
\code{\link{isStreaming}()},
\code{\link{join}()},
\code{\link{limit}()},
\code{\link{localCheckpoint}()},
\code{\link{merge}()},
\code{\link{mutate}()},
\code{\link{ncol}()},
\code{\link{nrow}()},
\code{\link{persist}()},
\code{\link{printSchema}()},
\code{\link{randomSplit}()},
\code{\link{rbind}()},
\code{\link{rename}()},
\code{\link{repartition}()},
\code{\link{repartitionByRange}()},
\code{\link{rollup}()},
\code{\link{sample}()},
\code{\link{saveAsTable}()},
\code{\link{schema}()},
\code{\link{select}()},
\code{\link{selectExpr}()},
\code{\link{show}()},
\code{\link{showDF}()},
\code{\link{storageLevel}()},
\code{\link{str}()},
\code{\link{subset}()},
\code{\link{summary}()},
\code{\link{take}()},
\code{\link{toJSON}()},
\code{\link{union}()},
\code{\link{unionAll}()},
\code{\link{unionByName}()},
\code{\link{unpersist}()},
\code{\link{unpivot}()},
\code{\link{with}()},
\code{\link{withColumn}()},
\code{\link{withWatermark}()},
\code{\link{write.df}()},
\code{\link{write.jdbc}()},
\code{\link{write.json}()},
\code{\link{write.orc}()},
\code{\link{write.parquet}()},
\code{\link{write.stream}()},
\code{\link{write.text}()}
}
\concept{SparkDataFrame functions}
