"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.defineExplainLogRateSpikesRoute = void 0;

var _lodash = require("lodash");

var _i18n = require("@kbn/i18n");

var _std = require("@kbn/std");

var _fieldTypes = require("@kbn/field-types");

var _aiopsUtils = require("@kbn/aiops-utils");

var _mlAggUtils = require("@kbn/ml-agg-utils");

var _mlStringHash = require("@kbn/ml-string-hash");

var _explain_log_rate_spikes = require("../../common/api/explain_log_rate_spikes");

var _api = require("../../common/api");

var _fetch_change_point_p_values = require("./queries/fetch_change_point_p_values");

var _fetch_field_candidates = require("./queries/fetch_field_candidates");

var _fetch_frequent_items = require("./queries/fetch_frequent_items");

var _get_simple_hierarchical_tree = require("./queries/get_simple_hierarchical_tree");

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
// 10s ping frequency to keep the stream alive.
const PING_FREQUENCY = 10000; // Overall progress is a float from 0 to 1.

const LOADED_FIELD_CANDIDATES = 0.2;
const PROGRESS_STEP_P_VALUES = 0.5;
const PROGRESS_STEP_GROUPING = 0.1;
const PROGRESS_STEP_HISTOGRAMS = 0.1;
const PROGRESS_STEP_HISTOGRAMS_GROUPS = 0.1;

const defineExplainLogRateSpikesRoute = (router, license, logger) => {
  router.post({
    path: _api.API_ENDPOINT.EXPLAIN_LOG_RATE_SPIKES,
    validate: {
      body: _explain_log_rate_spikes.aiopsExplainLogRateSpikesSchema
    }
  }, async (context, request, response) => {
    if (!license.isActivePlatinumLicense) {
      return response.forbidden();
    }

    let logMessageCounter = 1;

    function logDebugMessage(msg) {
      logger.debug(`Explain Log Rate Spikes #${logMessageCounter}: ${msg}`);
      logMessageCounter++;
    }

    logDebugMessage('Starting analysis.');
    const groupingEnabled = !!request.body.grouping;
    const client = (await context.core).elasticsearch.client.asCurrentUser;
    const controller = new AbortController();
    let isRunning = false;
    let loaded = 0;
    let shouldStop = false;
    request.events.aborted$.subscribe(() => {
      logDebugMessage('aborted$ subscription trigger.');
      shouldStop = true;
      controller.abort();
    });
    request.events.completed$.subscribe(() => {
      logDebugMessage('completed$ subscription trigger.');
      shouldStop = true;
      controller.abort();
    });
    const {
      end: streamEnd,
      push,
      responseWithHeaders
    } = (0, _aiopsUtils.streamFactory)(request.headers, logger, request.body.compressResponse, request.body.flushFix);

    function pushPingWithTimeout() {
      setTimeout(() => {
        if (isRunning) {
          logDebugMessage('Ping message.');
          push((0, _explain_log_rate_spikes.pingAction)());
          pushPingWithTimeout();
        }
      }, PING_FREQUENCY);
    }

    function end() {
      isRunning = false;
      logDebugMessage('Ending analysis.');
      streamEnd();
    }

    function endWithUpdatedLoadingState() {
      push((0, _explain_log_rate_spikes.updateLoadingStateAction)({
        ccsWarning: false,
        loaded: 1,
        loadingState: _i18n.i18n.translate('xpack.aiops.explainLogRateSpikes.loadingState.doneMessage', {
          defaultMessage: 'Done.'
        })
      }));
      end();
    }

    function pushError(m) {
      logDebugMessage('Push error.');
      push((0, _explain_log_rate_spikes.addErrorAction)(m));
    }

    async function runAnalysis() {
      try {
        isRunning = true;
        logDebugMessage('Reset.');
        push((0, _explain_log_rate_spikes.resetAction)());
        pushPingWithTimeout();
        logDebugMessage('Load field candidates.');
        push((0, _explain_log_rate_spikes.updateLoadingStateAction)({
          ccsWarning: false,
          loaded,
          loadingState: _i18n.i18n.translate('xpack.aiops.explainLogRateSpikes.loadingState.loadingFieldCandidates', {
            defaultMessage: 'Loading field candidates.'
          })
        }));
        let fieldCandidates;

        try {
          fieldCandidates = await (0, _fetch_field_candidates.fetchFieldCandidates)(client, request.body);
        } catch (e) {
          logger.error(`Failed to fetch field candidates, got: \n${e.toString()}`);
          pushError(`Failed to fetch field candidates.`);
          end();
          return;
        }

        loaded += LOADED_FIELD_CANDIDATES;
        push((0, _explain_log_rate_spikes.updateLoadingStateAction)({
          ccsWarning: false,
          loaded,
          loadingState: _i18n.i18n.translate('xpack.aiops.explainLogRateSpikes.loadingState.identifiedFieldCandidates', {
            defaultMessage: 'Identified {fieldCandidatesCount, plural, one {# field candidate} other {# field candidates}}.',
            values: {
              fieldCandidatesCount: fieldCandidates.length
            }
          })
        }));

        if (fieldCandidates.length === 0) {
          endWithUpdatedLoadingState();
        } else if (shouldStop) {
          end();
          return;
        }

        const changePoints = [];
        const fieldsToSample = new Set();
        const chunkSize = 10;
        let chunkCount = 0;
        const fieldCandidatesChunks = (0, _lodash.chunk)(fieldCandidates, chunkSize);
        logDebugMessage('Fetch p-values.');

        for (const fieldCandidatesChunk of fieldCandidatesChunks) {
          var _changePoints$length;

          chunkCount++;
          logDebugMessage(`Fetch p-values. Chunk ${chunkCount} of ${fieldCandidatesChunks.length}`);
          let pValues;

          try {
            pValues = await (0, _fetch_change_point_p_values.fetchChangePointPValues)(client, request.body, fieldCandidatesChunk, logger, pushError);
          } catch (e) {
            logger.error(`Failed to fetch p-values for ${JSON.stringify(fieldCandidatesChunk)}, got: \n${e.toString()}`);
            pushError(`Failed to fetch p-values for ${JSON.stringify(fieldCandidatesChunk)}.`); // Still continue the analysis even if chunks of p-value queries fail.

            continue;
          }

          if (pValues.length > 0) {
            pValues.forEach(d => {
              fieldsToSample.add(d.fieldName);
            });
            changePoints.push(...pValues);
          }

          loaded += 1 / fieldCandidatesChunks.length * PROGRESS_STEP_P_VALUES;

          if (pValues.length > 0) {
            push((0, _explain_log_rate_spikes.addChangePointsAction)(pValues));
          }

          push((0, _explain_log_rate_spikes.updateLoadingStateAction)({
            ccsWarning: false,
            loaded,
            loadingState: _i18n.i18n.translate('xpack.aiops.explainLogRateSpikes.loadingState.identifiedFieldValuePairs', {
              defaultMessage: 'Identified {fieldValuePairsCount, plural, one {# significant field/value pair} other {# significant field/value pairs}}.',
              values: {
                fieldValuePairsCount: (_changePoints$length = changePoints === null || changePoints === void 0 ? void 0 : changePoints.length) !== null && _changePoints$length !== void 0 ? _changePoints$length : 0
              }
            })
          }));

          if (shouldStop) {
            logDebugMessage('shouldStop fetching p-values.');
            end();
            return;
          }
        }

        if ((changePoints === null || changePoints === void 0 ? void 0 : changePoints.length) === 0) {
          logDebugMessage('Stopping analysis, did not find change points.');
          endWithUpdatedLoadingState();
          return;
        }

        const histogramFields = [{
          fieldName: request.body.timeFieldName,
          type: _fieldTypes.KBN_FIELD_TYPES.DATE
        }];
        logDebugMessage('Fetch overall histogram.');
        let overallTimeSeries;

        try {
          overallTimeSeries = (await (0, _mlAggUtils.fetchHistogramsForFields)(client, request.body.index, {
            match_all: {}
          }, // fields
          histogramFields, // samplerShardSize
          -1, undefined))[0];
        } catch (e) {
          logger.error(`Failed to fetch the overall histogram data, got: \n${e.toString()}`);
          pushError(`Failed to fetch overall histogram data.`); // Still continue the analysis even if loading the overall histogram fails.
        }

        function pushHistogramDataLoadingState() {
          push((0, _explain_log_rate_spikes.updateLoadingStateAction)({
            ccsWarning: false,
            loaded,
            loadingState: _i18n.i18n.translate('xpack.aiops.explainLogRateSpikes.loadingState.loadingHistogramData', {
              defaultMessage: 'Loading histogram data.'
            })
          }));
        }

        if (groupingEnabled) {
          logDebugMessage('Group results.');
          push((0, _explain_log_rate_spikes.updateLoadingStateAction)({
            ccsWarning: false,
            loaded,
            loadingState: _i18n.i18n.translate('xpack.aiops.explainLogRateSpikes.loadingState.groupingResults', {
              defaultMessage: 'Transforming significant field/value pairs into groups.'
            })
          })); // To optimize the `frequent_items` query, we identify duplicate change points by count attributes.
          // Note this is a compromise and not 100% accurate because there could be change points that
          // have the exact same counts but still don't co-occur.

          const duplicateIdentifier = ['doc_count', 'bg_count', 'total_doc_count', 'total_bg_count']; // These are the deduplicated change points we pass to the `frequent_items` aggregation.

          const deduplicatedChangePoints = (0, _fetch_frequent_items.dropDuplicates)(changePoints, duplicateIdentifier); // We use the grouped change points to later repopulate
          // the `frequent_items` result with the missing duplicates.

          const groupedChangePoints = (0, _fetch_frequent_items.groupDuplicates)(changePoints, duplicateIdentifier).filter(g => g.group.length > 1);

          try {
            const {
              fields,
              df
            } = await (0, _fetch_frequent_items.fetchFrequentItems)(client, request.body.index, JSON.parse(request.body.searchQuery), deduplicatedChangePoints, request.body.timeFieldName, request.body.deviationMin, request.body.deviationMax, logger, pushError);

            if (fields.length > 0 && df.length > 0) {
              // The way the `frequent_items` aggregations works could return item sets that include
              // field/value pairs that are not part of the original list of significant change points.
              // This cleans up groups and removes those unrelated field/value pairs.
              const filteredDf = df.map(fi => {
                fi.set = Object.entries(fi.set).reduce((set, [field, value]) => {
                  if (changePoints.some(cp => cp.fieldName === field && cp.fieldValue === value)) {
                    set[field] = value;
                  }

                  return set;
                }, {});
                fi.size = Object.keys(fi.set).length;
                return fi;
              }).filter(fi => fi.size > 1); // `frequent_items` returns lot of different small groups of field/value pairs that co-occur.
              // The following steps analyse these small groups, identify overlap between these groups,
              // and then summarize them in larger groups where possible.
              // Get a tree structure based on `frequent_items`.

              const {
                root
              } = (0, _get_simple_hierarchical_tree.getSimpleHierarchicalTree)(filteredDf, true, false, fields); // Each leave of the tree will be a summarized group of co-occuring field/value pairs.

              const treeLeaves = (0, _get_simple_hierarchical_tree.getSimpleHierarchicalTreeLeaves)(root, []); // To be able to display a more cleaned up results table in the UI, we identify field/value pairs
              // that occur in multiple groups. This will allow us to highlight field/value pairs that are
              // unique to a group in a better way. This step will also re-add duplicates we identified in the
              // beginning and didn't pass on to the `frequent_items` agg.

              const fieldValuePairCounts = (0, _get_simple_hierarchical_tree.getFieldValuePairCounts)(treeLeaves);
              const changePointGroups = (0, _get_simple_hierarchical_tree.markDuplicates)(treeLeaves, fieldValuePairCounts).map(g => {
                const group = [...g.group];

                for (const groupItem of g.group) {
                  const {
                    duplicate
                  } = groupItem;
                  const duplicates = groupedChangePoints.find(d => d.group.some(dg => dg.fieldName === groupItem.fieldName && dg.fieldValue === groupItem.fieldValue));

                  if (duplicates !== undefined) {
                    group.push(...duplicates.group.map(d => {
                      return {
                        fieldName: d.fieldName,
                        fieldValue: d.fieldValue,
                        duplicate
                      };
                    }));
                  }
                }

                return { ...g,
                  group
                };
              }); // Some field/value pairs might not be part of the `frequent_items` result set, for example
              // because they don't co-occur with other field/value pairs or because of the limits we set on the query.
              // In this next part we identify those missing pairs and add them as individual groups.

              const missingChangePoints = deduplicatedChangePoints.filter(cp => {
                return !changePointGroups.some(cpg => {
                  return cpg.group.some(d => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue);
                });
              });
              changePointGroups.push(...missingChangePoints.map(({
                fieldName,
                fieldValue,
                doc_count: docCount,
                pValue
              }) => {
                const duplicates = groupedChangePoints.find(d => d.group.some(dg => dg.fieldName === fieldName && dg.fieldValue === fieldValue));

                if (duplicates !== undefined) {
                  return {
                    id: `${(0, _mlStringHash.stringHash)(JSON.stringify(duplicates.group.map(d => ({
                      fieldName: d.fieldName,
                      fieldValue: d.fieldValue
                    }))))}`,
                    group: duplicates.group.map(d => ({
                      fieldName: d.fieldName,
                      fieldValue: d.fieldValue,
                      duplicate: false
                    })),
                    docCount,
                    pValue
                  };
                } else {
                  return {
                    id: `${(0, _mlStringHash.stringHash)(JSON.stringify({
                      fieldName,
                      fieldValue
                    }))}`,
                    group: [{
                      fieldName,
                      fieldValue,
                      duplicate: false
                    }],
                    docCount,
                    pValue
                  };
                }
              })); // Finally, we'll find out if there's at least one group with at least two items,
              // only then will we return the groups to the clients and make the grouping option available.

              const maxItems = Math.max(...changePointGroups.map(g => g.group.length));

              if (maxItems > 1) {
                push((0, _explain_log_rate_spikes.addChangePointsGroupAction)(changePointGroups));
              }

              loaded += PROGRESS_STEP_GROUPING;
              pushHistogramDataLoadingState();
              logDebugMessage('Fetch group histograms.');
              await (0, _std.asyncForEach)(changePointGroups, async cpg => {
                if (overallTimeSeries !== undefined) {
                  var _overallTimeSeries$da;

                  const histogramQuery = {
                    bool: {
                      filter: cpg.group.map(d => ({
                        term: {
                          [d.fieldName]: d.fieldValue
                        }
                      }))
                    }
                  };
                  let cpgTimeSeries;

                  try {
                    cpgTimeSeries = (await (0, _mlAggUtils.fetchHistogramsForFields)(client, request.body.index, histogramQuery, // fields
                    [{
                      fieldName: request.body.timeFieldName,
                      type: _fieldTypes.KBN_FIELD_TYPES.DATE,
                      interval: overallTimeSeries.interval,
                      min: overallTimeSeries.stats[0],
                      max: overallTimeSeries.stats[1]
                    }], // samplerShardSize
                    -1, undefined))[0];
                  } catch (e) {
                    logger.error(`Failed to fetch the histogram data for group #${cpg.id}, got: \n${e.toString()}`);
                    pushError(`Failed to fetch the histogram data for group #${cpg.id}.`);
                    return;
                  }

                  const histogram = (_overallTimeSeries$da = overallTimeSeries.data.map((o, i) => {
                    var _cpgTimeSeries$data$f, _o$key_as_string;

                    const current = (_cpgTimeSeries$data$f = cpgTimeSeries.data.find(d1 => d1.key_as_string === o.key_as_string)) !== null && _cpgTimeSeries$data$f !== void 0 ? _cpgTimeSeries$data$f : {
                      doc_count: 0
                    };
                    return {
                      key: o.key,
                      key_as_string: (_o$key_as_string = o.key_as_string) !== null && _o$key_as_string !== void 0 ? _o$key_as_string : '',
                      doc_count_change_point: current.doc_count,
                      doc_count_overall: Math.max(0, o.doc_count - current.doc_count)
                    };
                  })) !== null && _overallTimeSeries$da !== void 0 ? _overallTimeSeries$da : [];
                  push((0, _explain_log_rate_spikes.addChangePointsGroupHistogramAction)([{
                    id: cpg.id,
                    histogram
                  }]));
                }
              });
            }
          } catch (e) {
            logger.error(`Failed to transform field/value pairs into groups, got: \n${e.toString()}`);
            pushError(`Failed to transform field/value pairs into groups.`);
          }
        }

        loaded += PROGRESS_STEP_HISTOGRAMS_GROUPS;
        logDebugMessage('Fetch field/value histograms.'); // time series filtered by fields

        if (changePoints && overallTimeSeries !== undefined) {
          await (0, _std.asyncForEach)(changePoints, async cp => {
            if (overallTimeSeries !== undefined) {
              var _overallTimeSeries$da2;

              const histogramQuery = {
                bool: {
                  filter: [{
                    term: {
                      [cp.fieldName]: cp.fieldValue
                    }
                  }]
                }
              };
              let cpTimeSeries;

              try {
                cpTimeSeries = (await (0, _mlAggUtils.fetchHistogramsForFields)(client, request.body.index, histogramQuery, // fields
                [{
                  fieldName: request.body.timeFieldName,
                  type: _fieldTypes.KBN_FIELD_TYPES.DATE,
                  interval: overallTimeSeries.interval,
                  min: overallTimeSeries.stats[0],
                  max: overallTimeSeries.stats[1]
                }], // samplerShardSize
                -1, undefined))[0];
              } catch (e) {
                logger.error(`Failed to fetch the histogram data for field/value pair "${cp.fieldName}:${cp.fieldValue}", got: \n${e.toString()}`);
                pushError(`Failed to fetch the histogram data for field/value pair "${cp.fieldName}:${cp.fieldValue}".`);
                return;
              }

              const histogram = (_overallTimeSeries$da2 = overallTimeSeries.data.map((o, i) => {
                var _cpTimeSeries$data$fi, _o$key_as_string2;

                const current = (_cpTimeSeries$data$fi = cpTimeSeries.data.find(d1 => d1.key_as_string === o.key_as_string)) !== null && _cpTimeSeries$data$fi !== void 0 ? _cpTimeSeries$data$fi : {
                  doc_count: 0
                };
                return {
                  key: o.key,
                  key_as_string: (_o$key_as_string2 = o.key_as_string) !== null && _o$key_as_string2 !== void 0 ? _o$key_as_string2 : '',
                  doc_count_change_point: current.doc_count,
                  doc_count_overall: Math.max(0, o.doc_count - current.doc_count)
                };
              })) !== null && _overallTimeSeries$da2 !== void 0 ? _overallTimeSeries$da2 : [];
              const {
                fieldName,
                fieldValue
              } = cp;
              loaded += 1 / changePoints.length * PROGRESS_STEP_HISTOGRAMS;
              pushHistogramDataLoadingState();
              push((0, _explain_log_rate_spikes.addChangePointsHistogramAction)([{
                fieldName,
                fieldValue,
                histogram
              }]));
            }
          });
        }

        endWithUpdatedLoadingState();
      } catch (e) {
        logger.error(`Explain log rate spikes analysis failed to finish, got: \n${e.toString()}`);
        pushError(`Explain log rate spikes analysis failed to finish.`);
        end();
      }
    } // Do not call this using `await` so it will run asynchronously while we return the stream already.


    runAnalysis();
    return response.ok(responseWithHeaders);
  });
};

exports.defineExplainLogRateSpikesRoute = defineExplainLogRateSpikesRoute;