import uniq from '@lib/utilities/uniq'
import { getAllConditionNames } from '@modules/conditions/helpers/getAllConditionNames'
import type { TrialToTagWithFilters } from '@modules/trials/etl/updateTrialsWithFilterTags'
import extractInclusionCriteria from '@modules/trials/helpers/parseEligibilityCriteria/extractInclusionCriteria'
import { type TrialAssociation } from '@modules/trials/types/TrialAssociation'
import isRegExp from 'lodash/isRegExp'
import {
  type FilterItem,
  diseaseStageFilterOptions,
  keywordMatchers,
  treatmentStageFilterOptions,
} from './ngramData'

export const parseInclusionStringFromTrial = (
  trial?: Pick<
    TrialAssociation,
    | 'eligibilityCriteria'
    | 'conditions'
    | 'studyPopulation'
    | 'briefSummary'
    | 'detailedDescription'
    | 'briefTitle'
  >,
) => {
  if (!trial) {
    return ''
  }

  const inclusionCriteria = trial.eligibilityCriteria
    ? extractInclusionCriteria(trial.eligibilityCriteria)
    : ''

  const authoritativeConditionNames = getAllConditionNames(trial.conditions, {
    authoritativeOnly: true,
  })

  return [
    inclusionCriteria,
    trial.studyPopulation,
    trial.briefSummary,
    trial.detailedDescription,
    trial.briefTitle,
    ...authoritativeConditionNames,
  ]
    .join(' ')
    .toLowerCase()
}

export function hasMatch(content: string, matcher: string | RegExp) {
  if (!isRegExp(matcher)) {
    matcher = new RegExp(matcher, 'i')
  }

  return matcher.test(content)
}

/**
 * This function does some naive string mapping to pull keywords out of a trial.
 * We are using an allow-list of n-gram strings stored in the adjacent file.
 * For each group of keywordMatchers, we only want a single match.
 *
 * @param trial: A clinical trial object
 * @returns an array of strings that represent characteristics of the trial based on its text content
 */
export const parseKeywordsFromTrial = (trial?: TrialToTagWithFilters) => {
  const trialTextContent = parseInclusionStringFromTrial(trial)

  const nonOverlappingKeywords: Set<string> = new Set()

  keywordMatchers.forEach((keywordMatcherGroup) => {
    // Some groups have mutually exclusive terms so exit early from the loop with `.some`
    keywordMatcherGroup.some((keywordMatch) => {
      for (const index in keywordMatch.matches) {
        const match = keywordMatch.matches[index]
        if (hasMatch(trialTextContent, match!)) {
          nonOverlappingKeywords.add(keywordMatch.label)
          return true
        }
      }
    })
  })

  return Array.from(nonOverlappingKeywords).sort()
}

export const extractFilterTagsFromTrial = (trial?: TrialToTagWithFilters) => {
  const trialTextContent = parseInclusionStringFromTrial(trial)
  const trialMatchesTag = (tag: FilterItem) =>
    tag.matches.some((match) => hasMatch(trialTextContent, match))

  const stageOfDisease = uniq(
    diseaseStageFilterOptions.filter(trialMatchesTag).map((tag) => tag.label),
  ).sort()

  const stageOfTreatment = uniq(
    treatmentStageFilterOptions.filter(trialMatchesTag).map((tag) => tag.label),
  ).sort()

  return { stageOfDisease, stageOfTreatment }
}
