import { DataArray } from "../data/DataChunk";
import { histogram } from "../util/Utils";
import { ColumnDetect, detectColumnType } from "./DetectColumnTypes";
import { NameValueParseParams, parseNameValue } from "./ParseNameValue";
import { NameValueParser } from "./ParseTabular";
import { splitTextByLine } from "./SplitTextByLine";
import { transposeRows } from "./TransposeRows";

export interface NameValueFormat {
  lineBreak: string;
  headerLines: number;
}

/**
  try parsing a text as name=value pairs.

  count the number of key,value pairs found per row
  consistency score: (rows with the same label set / total rows) * number of labels in that label set
 */
export function detectNameValue(
  text: string,
  ignoreLastLine = false
): NameValueParser | undefined {
  const { nonEmpty: lines, lineBreak } = splitTextByLine(text, ignoreLastLine);
  if (lines.length === 0) {
    return {
      kind: "nameValue",
      score: 0,
      columns: [],
      parse: () => [],
      parseBody: () => [],
    };
  }

  const kvRows = lines.map(nameEqValue);
  const { chosenKeys, score } = chooseKeys(kvRows);
  const detected = detectValueTypes(chosenKeys, kvRows);
  if (!detected) {
    return undefined;
  }
  const { columnDetects, headerLines } = detected;

  const columns = chosenKeys.map((key, i) => {
    const dataParser = columnDetects[i].dataParser;
    return {
      label: key,
      dataParser,
    };
  });

  const parseOptions = { lineBreak, headerLines };
  const parseBodyOptions = { lineBreak, headerLines: 0 };
  const nvInfo: NameValueParseParams = { columns, parseOptions };
  const nvBodyInfo: NameValueParseParams = { columns, parseOptions: parseBodyOptions };
  const parse = (text: string): DataArray[] => parseNameValue(text, nvInfo);
  const parseBody = (text: string): DataArray[] => parseNameValue(text, nvBodyInfo);
  const result: NameValueParser = {
    kind: "nameValue",
    columns,
    score,
    parse,
    parseBody,
  };

  return result;
}

interface CompositeKey {
  compositeKey: string;
  keys: string[];
}

/** return the most frequent set of keys from the key=value pairs in rows */
function chooseKeys(kvRows: KeyValue[][]): { chosenKeys: string[]; score: number } {
  // composite string for all keys in each row
  const keysRows: CompositeKey[] = kvRows.map((row) => {
    const keys = row.map((kv) => kv.key);
    const compositeKey = keys.sort().join("/");
    return { compositeKey, keys };
  });

  const groups = histogram(keysRows.map((ck) => ck.compositeKey));
  const [maxKey, maxFrequency] = groups.reduce((prev, current) => {
    const [, prevCount] = prev;
    const [, curCount] = current;
    if (curCount > prevCount) {
      return current;
    } else {
      return prev;
    }
  });
  const chosenKeys = keysRows.find((ck) => ck.compositeKey === maxKey)?.keys || [];
  const score = (maxFrequency / kvRows.length) * chosenKeys.length;
  // dlog({ maxFrequency, rows: kvRows.length, keysLength: chosenKeys.length });

  return { chosenKeys, score };
}

/** return a parser for the values for the chosen keys */
function detectValueTypes(
  chosenKeys: string[],
  kvRows: KeyValue[][]
): { columnDetects: ColumnDetect[]; headerLines: number } | undefined {
  const valueRows = kvRows.map((row) => {
    const values = chosenKeys.map((key) => row.find((kv) => kv.key === key)?.value || "");
    return values;
  });
  const valueColumns = transposeRows(valueRows);
  const columnDetects = valueColumns.map(detectColumnType);

  const headerLines = columnDetects.reduce(
    (max, col) => Math.max(col.headerLines, max),
    0
  );
  return { columnDetects, headerLines };
}

const nvRegex = /([a-zA-Z0-9._-]+)=([a-zA-Z0-9._-]+)/g;

interface KeyValue {
  key: string;
  value: string;
}

export function nameEqValue(line: string): KeyValue[] {
  const matches = [...line.matchAll(nvRegex)];
  return matches.map((m) => ({ key: m[1], value: m[2] }));
}
