% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/match_pipeline.R
\name{match_companies}
\alias{match_companies}
\title{Match Company Names against a Dictionary}
\usage{
match_companies(
  queries,
  dictionary,
  query_col = "company_name",
  dict_col = "company_name",
  unique_id_col = "query_id",
  dict_id_col = "orbis_id",
  threshold_jw = 0.8,
  threshold_zoomer = 0.4,
  threshold_rarity = 1,
  n_cores = 1
)
}
\arguments{
\item{queries}{Data frame. Must contain columns specified in \code{query_col} and \code{unique_id_col}.}

\item{dictionary}{Data frame. Must contain columns specified in \code{dict_col} and \code{dict_id_col}.}

\item{query_col}{String. Column name for company names in \code{queries}.}

\item{dict_col}{String. Column name for company names in \code{dictionary}.}

\item{unique_id_col}{String. ID column in \code{queries}.}

\item{dict_id_col}{String. ID column in \code{dictionary}.}

\item{threshold_jw}{Numeric (0-1). Minimum Jaro-Winkler similarity. Default 0.8.}

\item{threshold_zoomer}{Numeric (0-1). Jaccard threshold for blocking. Default 0.4.}

\item{threshold_rarity}{Numeric. Minimum score for rarity matching. Default 1.0.}

\item{n_cores}{Integer. Number of cores (reserved for future parallel implementation).}
}
\value{
A data.table containing \code{query_id}, \code{dict_id}, and \code{match_type}.
}
\description{
Runs a cascading matching pipeline: Exact -> Fuzzy (Zoomer) -> FTS5 -> Rarity.
Matches found in earlier steps are removed from subsequent steps.
}
\examples{
# Create sample query data
queries <- data.frame(
  query_id = 1:3,
  company_name = c("BMW", "Siemens AG", "Deutsche Bank")
)

# Create sample dictionary
dictionary <- data.frame(
  orbis_id = c("D001", "D002", "D003"),
  company_name = c("BMW AG", "Siemens Aktiengesellschaft", "Commerzbank AG")
)

# Match companies
results <- match_companies(
  queries = queries,
  dictionary = dictionary,
  query_col = "company_name",
  dict_col = "company_name",
  unique_id_col = "query_id",
  dict_id_col = "orbis_id"
)

print(results)
}
