import _ from "lodash";
import { readStreamUntil } from "../data/ReadStream";
import { chooseTabularParser, TabularParser } from "./ParseTabular";

const enoughRegularLines = 10; // read at least this many non-comment lines if poossible
const maxRegularLines = 100; // trim if we've more than this many non-comment lines
const maxDetectSize = 2 ** 18; // don't read more than 256K

/** pick a parser by reading from a tee'd copy of the start of the stream */
export async function chooseStreamParser(
  byteStream: ReadableStream
): Promise<TabularParser | undefined> {
  // LATER fix this compile problem
  // We're casting beause typescript sees the nodejs type for Blob instead of the browser's defn.
  // File extends Blob, and nodejs stream() returns a node type that doesn't support tee().
  const [streamCopy] = (byteStream as unknown as ReadableStream).tee();

  const sampleText = await readBeyondComments(streamCopy);
  // streamCopy.cancel();

  return chooseTabularParser(sampleText, true); // always ignore last line when detecting on a stream
}

/** Read enough to get past a long commment section at the top of a file.  */
async function readBeyondComments(stream: ReadableStream<Uint8Array>): Promise<string> {
  let collected: ReadProgress = { bytesRead: 0, lines: [], nonComment: 0 };

  await readStreamUntil(stream, nextBuffer);

  /** process the next buffer from the stream,  */
  async function nextBuffer(buffer: Uint8Array): Promise<boolean> {
    const result = linesFromBuffer(buffer);
    collected = combineProgress(collected, result);
    const { nonComment, bytesRead } = collected;
    return nonComment >= enoughRegularLines || bytesRead >= maxDetectSize;
  }

  const lines = trimExtraLines(collected);
  return lines.join("\n");
}

/** discard lines beyond the maxRegularLines(+commentLines) so we don't overwork the detector */
function trimExtraLines(collected: ReadProgress): string[] {
  const { lines, nonComment } = collected;
  if (nonComment > maxRegularLines) {
    const commentLines = lines.length - nonComment;
    const keepLines = maxRegularLines + commentLines;
    return lines.slice(0, keepLines);
  } else {
    return lines;
  }
}

interface ReadProgress {
  bytesRead: number;
  nonComment: number;
  lines: string[];
}

/** collect lines and count likely non-comment lines from a buffer */
function linesFromBuffer(buffer: Uint8Array): ReadProgress {
  const bytesRead = buffer.length;
  const text = new TextDecoder().decode(buffer);
  const lines = text.split("\n");
  const commentLines = _.takeWhile(lines, (l) => l.length === 0 || l[0] === "#");
  const nonComment = lines.length - commentLines.length;
  return { lines, nonComment, bytesRead };
}

function combineProgress(a: ReadProgress, b: ReadProgress): ReadProgress {
  const nonComment = a.nonComment + b.nonComment;
  const lastA = _.last(a.lines);
  const firstB = _.first(b.lines);
  let lines: string[];
  if (lastA && firstB) {
    const merged = lastA.concat(firstB);
    lines = [...a.lines.slice(0, -1), merged, ...b.lines.slice(1)];
  } else {
    lines = [...a.lines, ...b.lines];
  }
  const bytesRead = a.bytesRead + b.bytesRead;

  return { lines, nonComment, bytesRead };
}
