import { dsert } from "../util/DebugLog";
import { histogram } from "../util/Utils";
import { mergeLinePatterns } from "./MergeLinePatterns";

/** Pattern of spaces and columns in lines, used for ascii table detection */
export interface LinePattern {
  starts: number[]; // column starts
  ends: number[]; // column ends
}

export interface ChosenPattern {
  pattern: LinePattern;
  score: number;
}

/** return the most frequent pattern, possibly merged with a header pattern */
export function choosePattern(patterns: LinePattern[]): ChosenPattern {
  const startsByRow = patterns.map((p) => p.starts);
  const endsByRow = patterns.map((p) => p.ends);
  const { score: startScore, rowDex: byStartRow } = mostFrequentPositions(startsByRow);
  const { score: endScore, rowDex: byEndRow } = mostFrequentPositions(endsByRow);

  // dlog({ startScore, endScore, byStartRow, byEndRow });
  if (endScore >= startScore) {
    return mixHeaderPattern(byEndRow, patterns, endScore);
  } else {
    return mixHeaderPattern(byStartRow, patterns, startScore);
  }
}

/** return the pattern of columns and spaces in a single line */
export function linePattern(line: string): LinePattern {
  let spaces = 0;
  let inColumn = false;
  const starts: number[] = [];
  const ends: number[] = [];
  const length = line.length;

  // find column start/end transitions, bordered by at least two spaces
  for (let i = 0; i < length; i++) {
    const char = line[i];
    if (char === " ") {
      spaces++;
      if (spaces === 2 && inColumn) {
        /* column ends */
        ends.push(i - 1);
        inColumn = false;
      }
    } else {
      if (!inColumn) {
        /* column starts */
        starts.push(i);
        inColumn = true;
      } else if (spaces >= 2) {
        /* column starts */
        starts.push(i);
        inColumn = true;
      }
      spaces = 0;
    }
  }

  // end last column if necessary
  if (ends.length !== starts.length) {
    dsert(inColumn);
    ends.push(length);
  }
  dsert(starts.length === ends.length);

  return { starts, ends };
}

/** return the most frequently used pattern of positions */
export function mostFrequentPositions(rows: number[][]): {
  score: number;
  rowDex: number;
} {
  const posPatterns = rows.map((pos) => pos.join(" "));
  const groups = histogram(posPatterns);
  const [maxPattern, maxFrequency] = groups.reduce((prev, current) => {
    const [, prevCount] = prev;
    const [, curCount] = current;
    if (curCount > prevCount) {
      return current;
    } else {
      return prev;
    }
  });
  const rowDex = posPatterns.findIndex((p) => p === maxPattern);
  const positions = rowDex >= 0 ? rows[rowDex] : [];
  const score = (maxFrequency / rows.length) * positions.length;

  return {
    score,
    rowDex,
  };
}

/**
 * Combine a header row pattern with the first data row pattern if the spacing
 * lines up sufficiently that a merged pattern would work for both header and data.
 */
function mixHeaderPattern(
  row: number,
  patterns: LinePattern[],
  score: number
): ChosenPattern {
  const dataPattern = patterns[row];
  if (row > 0) {
    const possibleHeaderPattern = patterns[row - 1];
    const mergedPattern = mergeLinePatterns(dataPattern, possibleHeaderPattern);
    if (mergedPattern) {
      return { pattern: mergedPattern, score: score + 1 };
    }
  }

  return { pattern: dataPattern, score };
}
