import { CsvFormat } from "../csv/CsvFormat";
import { ParsedChunk, parseDsvFile } from "../csv/PapaParsing";
import { ChunkFn, ColumnChunk, constructChunks } from "../data/DataChunk";
import { chooseStreamParser } from "./ChooseStreamParser";
import { TabularColumn } from "./ParseTabular";
import { parseColumns } from "./ParseColumns";
import { fitTableToWidth } from "./TransposeRows";

/** Load a tabular file asynchronously. Automatically guessing the format and data types.
 * dsv files are loaded and parsed in chunks of a few thousand rows
 * at a timeO other formats are loaded in their entirety before parsing.
 * @param chunkFn is called with the the columns for each chunk of the file.
 */
export async function loadTabularFile(file: File, chunkFn: ChunkFn): Promise<void> {
  const byteStream = file.stream();
  const parser = await chooseStreamParser(byteStream as unknown as ReadableStream);

  if (!parser) {
    return Promise.reject(new Error(`unable to parse ${file.name}`));
  }

  if (parser.kind === "csv") {
    // we have a streaming loader for dsv format
    const { format, columns } = parser;
    return loadDsvFile(file, format, columns, chunkFn);
  } else {
    // other formats we load all at once
    const fullText = await file.text();
    const dataArray = parser.parse(fullText);
    const chunks = constructChunks(parser.columns, dataArray);
    return chunkFn(chunks, fullText.length);
  }
}

/** incrementally load a dsv file via papaparse, calling a chunkFn for each section as its read */
function loadDsvFile(
  file: File,
  format: CsvFormat,
  columns: TabularColumn[],
  chunkFn: ChunkFn
): Promise<void> {
  let consumedHeader = false;

  return parseDsvFile(file, format, handleRows);

  function handleRows(parsedChunk: ParsedChunk): Promise<void> {
    const { rows, bytesRead } = parsedChunk;
    let dataRows = rows;
    if (!consumedHeader) {
      dataRows = rows.slice(format.headerLines);
      consumedHeader = true;
    }
    const chunks = parseToChunks(dataRows, columns);
    return chunkFn(chunks, bytesRead);
  }
}

function parseToChunks(rowTable: string[][], csvInfo: TabularColumn[]): ColumnChunk[] {
  if (rowTable.length === 0) {
    return [];
  }
  const fixedTable = fitTableToWidth(rowTable, csvInfo.length);
  const columns = parseColumns(fixedTable, csvInfo);

  return constructChunks(csvInfo, columns);
}
