% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spectrogram.R
\name{spectrogram}
\alias{spectrogram}
\title{Spectrogram}
\usage{
spectrogram(
  x,
  samplingRate = NULL,
  scale = NULL,
  from = NULL,
  to = NULL,
  dynamicRange = 80,
  windowLength = 50,
  step = windowLength/2,
  overlap = NULL,
  specType = c("spectrum", "reassigned", "spectralDerivative")[1],
  logSpec = TRUE,
  rasterize = FALSE,
  wn = "gaussian",
  zp = 0,
  normalize = TRUE,
  smoothFreq = 0,
  smoothTime = 0,
  qTime = 0,
  percentNoise = 10,
  noiseReduction = 0,
  output = c("original", "processed", "complex", "all")[1],
  specManual = NULL,
  reportEvery = NULL,
  cores = 1,
  plot = TRUE,
  savePlots = NULL,
  osc = c("none", "linear", "dB")[2],
  heights = c(3, 1),
  ylim = NULL,
  yScale = c("linear", "log", "bark", "mel", "ERB")[1],
  contrast = 0.2,
  brightness = 0,
  blur = 0,
  maxPoints = c(1e+05, 5e+05),
  padWithSilence = TRUE,
  colorTheme = c("bw", "seewave", "heat.colors", "...")[1],
  col = NULL,
  extraContour = NULL,
  xlab = NULL,
  ylab = NULL,
  xaxp = NULL,
  mar = c(5.1, 4.1, 4.1, 2),
  main = NULL,
  grid = NULL,
  width = 900,
  height = 500,
  units = "px",
  res = NA,
  ...
)
}
\arguments{
\item{x}{path to a folder, one or more wav or mp3 files c('file1.wav',
'file2.mp3'), Wave object, numeric vector, or a list of Wave objects or
numeric vectors}

\item{samplingRate}{sampling rate of \code{x} (only needed if \code{x} is a
numeric vector)}

\item{scale}{maximum possible amplitude of input used for normalization of
input vector (only needed if \code{x} is a numeric vector)}

\item{from, to}{if NULL (default), analyzes the whole sound, otherwise
from...to (s)}

\item{dynamicRange}{dynamic range, dB. All values more than one dynamicRange
under maximum are treated as zero}

\item{windowLength}{length of FFT window, ms (multiple values in a vector
produce a multi-resolution spectrogram)}

\item{step}{you can override \code{overlap} by specifying FFT step, ms - a
vector of the same length as windowLength (NB: because digital audio is
sampled at discrete time intervals of 1/samplingRate, the actual step and
thus the time stamps of STFT frames may be slightly different, eg 24.98866
instead of 25.0 ms)}

\item{overlap}{overlap between successive FFT frames, \%}

\item{specType}{plot the original FFT ('spectrum'), reassigned spectrogram
('reassigned'), or spectral derivative ('spectralDerivative')}

\item{logSpec}{if TRUE, log-transforms the spectrogram}

\item{rasterize}{(only applies if specType = 'reassigned') if TRUE, the
reassigned spectrogram is plotted after rasterizing it: that is, showing
density per time-frequency bins with the same resolution as an ordinary
spectrogram}

\item{wn}{window type accepted by \code{\link[seewave]{ftwindow}}, currently
gaussian, hanning, hamming, bartlett, blackman, flattop, rectangle}

\item{zp}{window length after zero padding, points}

\item{normalize}{if TRUE, scales input prior to FFT}

\item{smoothFreq, smoothTime}{length of the window for median smoothing in
frequency and time domains, respectively, points}

\item{qTime}{the quantile to be subtracted for each frequency bin. For ex.,
if qTime = 0.5, the median of each frequency bin (over the entire sound
duration) will be calculated and subtracted from each frame (see examples)}

\item{percentNoise}{percentage of frames (0 to 100\%) used for calculating
noise spectrum}

\item{noiseReduction}{how much noise to remove (non-negative number,
recommended 0 to 2). 0 = no noise reduction, 2 = strong noise reduction:
\eqn{spectrum - (noiseReduction * noiseSpectrum)}, where noiseSpectrum is
the average spectrum of frames with entropy exceeding the quantile set by
\code{percentNoise}}

\item{output}{specifies what to return: nothing ('none'), unmodified
spectrogram ('original'), denoised and/or smoothed spectrogram
('processed'), or unmodified spectrogram with the imaginary part giving
phase ('complex')}

\item{specManual}{manually calculated spectrogram-like representation in the
same format as the output of spectrogram(): rows = frequency in kHz,
columns = time in ms}

\item{reportEvery}{when processing multiple inputs, report estimated time
left every ... iterations (NULL = default, NA = don't report)}

\item{cores}{number of cores for parallel processing}

\item{plot}{should a spectrogram be plotted? TRUE / FALSE}

\item{savePlots}{full path to the folder in which to save the plots (NULL =
don't save, '' = same folder as audio)}

\item{osc}{"none" = no oscillogram; "linear" = on the original scale; "dB" =
in decibels}

\item{heights}{a vector of length two specifying the relative height of the
spectrogram and the oscillogram (including time axes labels)}

\item{ylim}{frequency range to plot, kHz (defaults to 0 to Nyquist
frequency). NB: still in kHz, even if yScale = bark, mel, or ERB}

\item{yScale}{scale of the frequency axis: 'linear' = linear, 'log' =
logarithmic (musical), 'bark' = bark with \code{\link[tuneR]{hz2bark}},
'mel' = mel with \code{\link[tuneR]{hz2mel}}, 'ERB' = Equivalent
Rectangular Bandwidths with \code{\link{HzToERB}}}

\item{contrast}{controls the sharpness or contrast of the image: <0 =
decrease contrast, 0 = no change, >0 increase contrast. Recommended range
approximately (-1, 1). The spectrogram is raised to the power of
\code{exp(3 * contrast)}}

\item{brightness}{makes the image lighter or darker: <0 = darker, 0 = no
change, >0 = lighter, range (-1, 1). The color palette is preserved, so
"brightness" works by capping an increasing proportion of image at the
lightest or darkest color. To lighten or darken the palette, just change
the colors instead}

\item{blur}{apply a Gaussian filter to blur or sharpen the image, two
numbers: frequency (Hz), time (ms). A single number is interpreted as
frequency, and a square filter is applied. NA / NULL / 0 means no blurring
in that dimension. Negative numbers mean un-blurring (sharpening) the image
by dividing instead of multiplying by the filter during convolution}

\item{maxPoints}{the maximum number of "pixels" in the oscillogram (if any)
and spectrogram; good for quickly plotting long audio files; defaults to
c(1e5, 5e5); does not affect reassigned spectrograms}

\item{padWithSilence}{if TRUE, pads the sound with just enough silence to
resolve the edges properly (only the original region is plotted, so the
apparent duration doesn't change)}

\item{colorTheme}{black and white ('bw'), as in seewave package ('seewave'),
matlab-type palette ('matlab'), or any palette from
\code{\link[grDevices]{palette}} such as 'heat.colors', 'cm.colors', etc}

\item{col}{actual colors, eg rev(rainbow(100)) - see ?hcl.colors for colors
in base R (overrides colorTheme)}

\item{extraContour}{a vector of arbitrary length scaled in Hz (regardless of
yScale, but nonlinear yScale also warps the contour) that will be plotted
over the spectrogram (eg pitch contour); can also be a list with extra
graphical parameters such as lwd, col, etc. (see examples)}

\item{xlab, ylab, main, mar, xaxp}{graphical parameters for plotting}

\item{grid}{if numeric, adds n = \code{grid} dotted lines per kHz}

\item{width, height, units, res}{graphical parameters for saving plots passed to
\code{\link[grDevices]{png}}}

\item{...}{other graphical parameters}
}
\value{
Returns nothing if output = 'none', spectral magnitudes - not power!
  - if output = 'original', denoised and/or smoothed spectrum if output =
  'processed', or spectral derivatives if specType = 'spectralDerivative'.
  The output is a matrix of real numbers with time in columns (ms) and
  frequency in rows (kHz). For multi-resolution spectrograms, the complex
  matrix corresponds to the last value of windowLength.
}
\description{
Produces the spectrogram of a sound using short-time Fourier transform.
Inspired by \code{\link[seewave]{spectro}}, this function offers added
routines for reassignment, multi-resolution spectrograms, noise reduction,
smoothing in time and frequency domains, manual control of contrast and
brightness, plotting the oscillogram on a dB scale, grid, etc. Gallery of
examples: \url{https://cogsci.se/soundgen/spectrograms.html}.
}
\details{
Many soundgen functions call \code{spectrogram}, and you can pass along most
of its graphical parameters from functions like \code{\link{soundgen}},
\code{\link{analyze}}, etc. However, in some cases this will not work (eg for
"units") or may produce unexpected results. If in doubt, omit extra graphical
parameters or save your sound first, then call spectrogram() explicitly.
Reassigned spectrograms are not affected by noise reduction or blurring.
}
\examples{
# Gallery of examples: https://cogsci.se/soundgen/spectrograms.html

# synthesize a sound 500 ms long, with gradually increasing hissing noise
sound = soundgen(sylLen = 500, temperature = 0.001, noise = list(
  time = c(0, 650), value = c(-40, 0)), formantsNoise = list(
  f1 = list(freq = 5000, width = 10000)))
# playme(sound, samplingRate = 16000)

# basic spectrogram
spectrogram(sound, samplingRate = 16000, yScale = 'bark')

# add bells and whistles
spectrogram(sound, samplingRate = 16000,
  windowLength = c(5, 40),  # multi-resolution
  osc = 'dB',  # plot oscillogram in dB
  heights = c(2, 1),  # spectro/osc height ratio
  noiseReduction = .9,  # subtract the spectrum of noisy parts
  brightness = -.5,  # reduce brightness
  # pick color theme - see ?hcl.colors
  # colorTheme = 'heat.colors',
  # ...or just specify the actual colors
  col = colorRampPalette(c('white', 'yellow', 'red'))(50),
  cex.lab = .75, cex.axis = .75,  # text size and other base graphics pars
  grid = 5,  # lines per kHz; to customize, add manually with graphics::grid()
  ylim = c(0, 5),  # always in kHz
  main = 'My spectrogram' # title
  # + axis labels, etc
)
\dontrun{
# save spectrograms of all sounds in a folder
spectrogram('~/Downloads/temp', savePlots = '', cores = 2)

# change dynamic range
spectrogram(sound, samplingRate = 16000, dynamicRange = 40)
spectrogram(sound, samplingRate = 16000, dynamicRange = 120)

# remove the oscillogram
spectrogram(sound, samplingRate = 16000, osc = 'none')  # or NULL etc

# frequencies on a logarithmic (musical) scale (mel/bark/ERB also available)
spectrogram(sound, samplingRate = 16000,
            yScale = 'log', ylim = c(.05, 8))

# broad-band instead of narrow-band
spectrogram(sound, samplingRate = 16000, windowLength = 5)

# reassigned spectrograms can be plotted without rasterizing, as a
# scatterplot instead of a contour plot
s = soundgen(sylLen = 500, pitch = c(100, 1100, 120, 1200, 90, 900, 110, 700),
  samplingRate = 22050, formants = NULL, lipRad = 0, rolloff = -20)
spectrogram(s, 22050, windowLength = 5, step = 1, yScale = 'bark')
spectrogram(s, 22050, specType = 'reassigned', windowLength = 5,
  step = 1, yScale = 'bark')
# ...or it can be rasterized, but that sacrifices frequency resolution:
sp = spectrogram(s, 22050, specType = 'reassigned', rasterize = TRUE,
                 windowLength = 5, step = 1, yScale = 'bark', output = 'all')
# The raw reassigned version is saved if output = 'all' for custom plotting
df = sp$reassigned
df$z1 = soundgen:::zeroOne(log(df$magn))
plot(df$time, df$freq, col = rgb(df$z1, df$z1, 1 - df$z1, 1),
  pch = 16, cex = 0.25, ylim = c(0, 2))

# multi-resolution spectrograms
spectrogram(s, 22050, windowLength = c(1, 10, 20, 50), yScale = 'bark')
# (works well in combination with de-blurring)
spectrogram(s, 22050, windowLength = c(1, 10, 20, 50), yScale = 'bark',
  blur = c(-50, -50))
spectrogram(s, 22050, windowLength = 1:10, yScale = 'bark',
  specType = 'reassigned', dynamicRange = 50)
spectrogram(s, 22050, windowLength = 1:10, yScale = 'bark',
  specType = 'reassigned', dynamicRange = 50, rasterize = TRUE)

# Different combinations of specType, mono/multiresolution, and rasterization
spectrogram(s, 22050, windowLength = 5)
spectrogram(s, 22050, windowLength = c(5, 10))

spectrogram(s, 22050, windowLength = 5, specType = 'reassigned',
  rasterize = FALSE)
spectrogram(s, 22050, windowLength = c(5, 10), specType = 'reassigned',
  rasterize = FALSE)

spectrogram(s, 22050, windowLength = 5, specType = 'reassigned',
  rasterize = TRUE)
spectrogram(s, 22050, windowLength = c(5, 10), specType = 'reassigned',
  rasterize = TRUE)

# focus only on values in the upper 5\% for each frequency bin
spectrogram(sound, samplingRate = 16000, qTime = 0.95)

# detect 10\% of the noisiest frames based on entropy and remove the pattern
# found in those frames (in this cases, breathing)
spectrogram(sound, samplingRate = 16000,  noiseReduction = 1.1,
  brightness = -2)  # white noise attenuated

# increase contrast, reduce brightness
spectrogram(sound, samplingRate = 16000, contrast = .7, brightness = -.5)

# increase brightness (drops quiet bins with the same color palette)
spectrogram(sound, samplingRate = 16000, brightness = .5)

# another approach is to just make the palette lighter:
spectrogram(sound, samplingRate = 16000, col = gray.colors(30, 1, .5))

# apply median smoothing in both time and frequency domains
spectrogram(sound, samplingRate = 16000, smoothFreq = 5,
  smoothTime = 5)

# Gaussian filter to blur or sharpen ("unblur") the image in time and/or
# frequency domains
spectrogram(sound, samplingRate = 16000, blur = c(100, 500))
# TIP: when unblurring, set the first (frequency) parameter to the
# frequency resolution of interest, eg ~500-1000 Hz for human formants
spectrogram(sound, samplingRate = 16000, windowLength = 10, blur = c(-500, 50))

# specify location of tick marks etc - see ?par() for base graphics
spectrogram(sound, samplingRate = 16000,
            ylim = c(0, 3), yaxp = c(0, 3, 5), xaxp = c(0, .8, 10))

# Plot long audio files with reduced resolution
data(sheep, package = 'seewave')
sp = spectrogram(sheep, overlap = 0,
  maxPoints = c(1e4, 5e3),  # limit the number of pixels in osc/spec
  output = 'original')
nrow(sp) * ncol(sp) / 5e3  # spec downsampled by a factor of ~2

# Plot some arbitrary contour over the spectrogram (simply calling lines()
# will not work if osc = TRUE b/c the plot layout is modified)
s = soundgen(sylLen = 1500, pitch = c(250, 350, 320, 220),
  jitterDep = c(0, 0, 3, 2, 0, 0))
an = analyze(s, 16000, plot = TRUE, extraContour = 'dom')
spectrogram(s, 16000, extraContour = an$detailed$dom,
  ylim = c(0, 2), yScale = 'bark')
spectrogram(s, 16000, extraContour = list(x = an$detailed$dom, col = 'blue'),
  ylim = c(0, 2), yScale = 'bark')
# or simply add whatever you like to a spectrogram with points(), lines(),
# etc., (but only works without an oscillogram):
spectrogram(s, 16000, ylim = c(0, 2), yScale = 'bark', osc = FALSE)
points(an$detailed$time/1000,  # time in s
       HzToOther(an$detailed$dom, 'bark'),  # values in barks
       lwd = 2, col = 'green', lty = 2)  # any graphic pars

# For values that are not in Hz, normalize any way you like. NB: if yScale !=
# 'linear', the extra contour is by default warped to the same scale b/c it
# is assumed to be in Hz. Specify "warp = FALSE" to avoid this
spectrogram(s, 16000, yScale = 'ERB', ylim = c(0, 5), extraContour = list(
  x = an$detailed$loudness / max(an$detailed$loudness, na.rm = TRUE) * 5000,
  # because ylim[2] = 2000 Hz
  type = 'b', pch = 5, lwd = 2, lty = 2, col = 'blue', warp = FALSE))
# compare:
spectrogram(s, 16000, yScale = 'ERB', ylim = c(0, 5), extraContour = list(
  x = an$detailed$loudness / max(an$detailed$loudness, na.rm = TRUE) * 5000,
  # because ylim[2] = 2000 Hz
  type = 'b', pch = 5, lwd = 2, lty = 2, col = 'blue'))

# Plot a spectrogram-like matrix paired with an osc
ms = modulationSpectrum(s, 16000, msType = '1D', amRes = 10)
spectrogram(s, 16000, specManual = ms$modulation_spectrogram,
  colorTheme = 'matlab', ylab = 'Modulation frequency, kHz',
  contrast = .25, blur = c(10, 10), yScale = 'log')
}
}
\seealso{
\code{\link{osc}} \code{\link{modulationSpectrum}} \code{\link{ssm}}
}
