import { CsvFormat } from "../csv/CsvFormat";
import { detectCsvText } from "../csv/DetectCsvFormat";
import { ColumnChunk, constructChunks, DataArray } from "../data/DataChunk";
import { dlog } from "../util/DebugLog";
import { AsciiTableFormat, detectAsciiTable } from "./DetectAsciiTable";
import { detectNameValue } from "./DetectNameValue";
import { parseDebug } from "./ParseDebug";
import { DataType } from "../data/ColumnFrame";
import { CellParser } from "./DetectCellFormat";
import { detectJsonTable, JsonTableFormat } from "./DetectJsonTable";

export interface TabularColumn {
  label: string;
  dataParser: CellParser;
  displayType?: DataType; // e.g. if we parse as a number, but should interpret as epoch milliseconds
}
export type TabularParser =
  | CsvParser
  | NameValueParser
  | AsciiTableParser
  | JsonTableParser;

export interface TabularParserBase {
  columns: TabularColumn[];
  score: number;

  /** parse tabular text including the header*/
  parse: (text: string) => DataArray[];

  /** parse tabular text excluding the header*/
  parseBody: (text: string) => DataArray[];
}

export interface CsvParser extends TabularParserBase {
  kind: "csv";
  format: CsvFormat;
}

export interface AsciiTableParser extends TabularParserBase {
  kind: "ascii";
  format?: AsciiTableFormat; // exposed for testing
}
export interface NameValueParser extends TabularParserBase {
  kind: "nameValue";
}

export interface JsonTableParser extends TabularParserBase {
  kind: "json";
  format: JsonTableFormat;
}

/** parse fully loaded text that contains tabular data. */
export function parseTabularText(text: string): ColumnChunk[] {
  const parser = chooseTabularParser(text);
  if (parser) {
    const data = parser.parse(text);
    return constructChunks(parser.columns, data);
  } else {
    return [];
  }
}

/** @return the best table parser for a given text, or undefined if no parser matches well */
export function chooseTabularParser(
  text: string,
  ignoreLastLine = false
): TabularParser | undefined {
  const nv = detectNameValue(text, ignoreLastLine);
  const ascii = detectAsciiTable(text, ignoreLastLine);
  const dsv = detectCsvText(text, ignoreLastLine);
  const json = detectJsonTable(text);
  const parsers = [dsv, ascii, nv, json].filter(
    (p) => p !== undefined
  ) as TabularParser[];
  const bestParser = parsers.reduce((a, b) => (a.score >= b.score ? a : b));

  // report for debugging
  if (parseDebug.chooseTabularLog) {
    const scores = parsers.map((p) => {
      const { kind, score } = p;
      return { kind, score };
    });
    dlog({ scores });
  }

  if (bestParser.score > 0.5) {
    return bestParser;
  } else {
    return undefined;
  }
}

/* scoring for the format detectors:

    roughly 1 point per consistently matched column
    penalty for mismatches is to divide by the number of different column counts to the 1.25 power

    score is penalized for likely misuse of quotes
    space separator in .dsv files is also penalized, to not interfere with e.g. name-value detector

*/

/** (for testing) parse buffer than contains tabular data text. */
export function parseTabularBuffer(buffer: Uint8Array): ColumnChunk[] {
  const text = new TextDecoder().decode(buffer);
  return parseTabularText(text);
}
