#' Test whether two strings match with an LLM prompt.
#'
#' @param string1 A string or vector of strings
#' @param string2 A string or vector of strings
#' @param model Which LLM to prompt; defaults to 'gpt-5.2'. Also accepts Mistral models (e.g. 'mistral-large-latest') and Anthropic Claude models (e.g. 'claude-sonnet-4-5-20250929').
#' @param record_type A character describing what type of entity `string1` and `string2` represent. Should be a singular noun (e.g. "person", "organization", "interest group", "city").
#' @param instructions A string containing additional instructions to include in the LLM prompt.
#' @param openai_api_key Your OpenAI API key. By default, looks for a system environment variable called "OPENAI_API_KEY" (recommended option). Otherwise, it will prompt you to enter the API key as an argument.
#' @param parallel TRUE to submit API requests in parallel. Setting to FALSE can reduce rate limit errors at the expense of longer runtime.
#'
#' @return A vector the same length as `string1` and `string2`. "Yes" if the pair of strings match, "No" otherwise.
#' @export
#'
#' @examples
#' \dontrun{
#' check_match('UPS', 'United Parcel Service')
#' check_match('UPS', 'United States Postal Service')
#' check_match(c('USPS', 'USPS', 'USPS'),
#'             c('Post Office', 'United Parcel', 'US Postal Service'))
#' }
check_match <- function(string1, string2,
                        model = 'gpt-5.2', # 'gpt-4o-2024-11-20',
                        record_type = 'entity',
                        instructions = NULL,
                        openai_api_key = Sys.getenv('OPENAI_API_KEY'),
                        parallel = TRUE){

  if(length(string1) != length(string2)){
    stop('Inputs must have the same number of elements.')
  }

  if(openai_api_key == ''){
    stop("No API key detected in system environment. You can add one using the 'openai_api_key()' function.")
  }

  # encode strings as characters
  string1 <- as.character(string1)
  string2 <- as.character(string2)

  # use the Completions endpoint if the model is a "Legacy" model
  if(model %in% c('gpt-3.5-turbo-instruct', 'davinci-002', 'babbage-002')){

    # if non-NULL, pad the instructions
    if(!is.null(instructions)){
      instructions <- paste0(instructions, ' ')
    }

    # format the prompt
    p <- paste0('Decide if the following two names refer to the same ',
                record_type, '. ', instructions,
                'Think carefully. Respond \"Yes\" or \"No\".\n\n',
                'Name A: ', string1, '\nName B: ', string2,
                '\n\nResponse:')

    # empty vector of labels
    labels <- character(length = length(string1))

    # labels="Yes" wherever the two strings match exactly
    labels[string1==string2] <- 'Yes'

    # don't submit prompts for exact string matches
    p <- p[string1 != string2]

    # build path parameters
    base_url <- "https://api.openai.com/v1/completions"

    headers <- c(
      "Authorization" = paste("Bearer", openai_api_key),
      "Content-Type" = "application/json"
    )

    # batch prompts to handle API rate limits
    max_prompts <- 1500 # 2048
    start_index <- 1

    while(start_index <= length(p)){

      end_index <- min(length(p), start_index + max_prompts - 1)

      # build request body
      body <- list()
      body[['model']] <- model
      body[['prompt']] <- p[start_index:end_index]
      body[['max_tokens']] <- 1
      body[['temperature']] <- 0

      repeat{
        # make API request
        response <- httr::POST(
          url = base_url,
          httr::add_headers(.headers = headers),
          body = body,
          encode = "json"
        )

        # parse the response
        parsed <- response |>
          httr::content(as = "text", encoding = "UTF-8") |>
          jsonlite::fromJSON(flatten = TRUE)

        # if you've hit a rate limit, wait and resubmit
        if(response$status_code == 429){

          time_to_wait <- gsub('.*Please try again in\\s(.+)\\.\\sVisit.*', '\\1', parsed$error$message)
          warning(paste0('Exceeded Rate Limit. Waiting ', time_to_wait, ' seconds.\n\n'))

          time_val <- as.numeric(gsub('[^0-9.]+', '', time_to_wait))
          time_unit <- gsub('[^A-z]+', '', time_to_wait)

          time_to_wait <- ceiling(time_val / ifelse(time_unit == 'ms', 1000, 1))

          Sys.sleep(time_to_wait)

        } else{
          break
        }
      }

      # update labels vector (non-exact matches)
      labels[string1!=string2][start_index:end_index] <- gsub(' |\n', '', parsed$choices$text) |>
        stringr::str_to_title()

      start_index <- end_index + 1

    }

    return(labels)

  } else if(stringr::str_detect(model, 'mistral|mixtral')){

    if(Sys.getenv('MISTRAL_API_KEY') == ''){
      stop("No Mistral API key detected in system environment. You can add one using the 'mistral_api_key()' function.")
    }
    if(is.null(instructions)){
      instructions <- ''
    }
    chat <- ellmer::chat_mistral('Respond with "Yes" or "No".',
                                 model = model)

    prompts <- ellmer::interpolate('Decide if the following two names refer to the same {{record_type}}. {{instructions}}\n\nName A: {{string1}}\nName B: {{string2}}')

    labels <- ellmer::parallel_chat_text(chat, prompts)
    labels <- stringr::str_to_title(stringr::str_extract(labels, '^\\w+'))

    return(labels)

  } else if(stringr::str_detect(model, 'claude')){

    if(Sys.getenv('ANTHROPIC_API_KEY') == ''){
      stop("No Anthropic API key detected in system environment. You can add one using the 'anthropic_api_key()' function.")
    }
    if(is.null(instructions)){
      instructions <- ''
    }
    chat <- ellmer::chat_anthropic('Respond with "Yes" or "No".',
                                   model = model)

    prompts <- ellmer::interpolate('Decide if the following two names refer to the same {{record_type}}. {{instructions}}\n\nName A: {{string1}}\nName B: {{string2}}')

    labels <- ellmer::parallel_chat_text(chat, prompts)
    labels <- stringr::str_to_title(stringr::str_extract(labels, '^\\w+'))

    return(labels)

  } else{ # OpenAI chat models

    if(is.null(instructions)){
      instructions <- ''
    }
    chat <- ellmer::chat_openai('Respond with "Yes" or "No".',
                                model = model,
                                credentials = function(){openai_api_key})

    prompts <- ellmer::interpolate('Decide if the following two names refer to the same {{record_type}}. {{instructions}}\n\nName A: {{string1}}\nName B: {{string2}}')

    labels <- ellmer::parallel_chat_text(chat, prompts)
    labels <- stringr::str_to_title(stringr::str_extract(labels, '^\\w+'))

    return(labels)
  }
}
