import Papa, { ParseResult } from "papaparse";
import { ParsedCSV, HeaderCSVRow, CSVRow, CSVError, HeaderEntry, HeaderSegmentsRow, SegmentValue } from "./csvTypes";
import { cloneDeep } from "lodash";
import { formatNoHyphensOrSpaceLowercase, formatNumberWithCommas } from "../stringUtils/stringFormatters";


interface ParseProps {
    /**
     * @description the raw file passed in at construction to be parsed
     */
    rawFile: File;

    /**
     * @description a read-only instance of what the csv header should be
     */
    csvHeaders: HeaderCSVRow;
}

/**
 * @description Parser Object responsible for provider an unvalidated parsed csv for validation
 */
export const useCsvParserHook = () => {
    /**
     * @description the maximum number of rows a CSV can have
     */
    const maxRows: number = 10000;

    /**
     * @description top level function to parse a CSV in `csvHook`
     * @returns the parsedCSV or an error created while parsing
     * @throws CSVError containing the parsed error
     */
    async function parse({ rawFile, csvHeaders }: ParseProps): Promise<ParsedCSV> {
        try {
            const parsedCSV = await createParsedCSV(rawFile, csvHeaders)
            return parsedCSV;
        } catch (error) {
            throw error;
        }
    }

    return {
        parse,
    }

    /**
     * @description parses an object of type File and returns a ParsedCSV or an error
     * @returns a Promise containing either an UnvalidatedParsedCSV with a valid csvData or an error
     * @throws CSVError if the file uploaded is not a CSV, exceeds max row length, or contains no data
     */
    async function createParsedCSV(rawFile: File, csvHeaders: HeaderCSVRow): Promise<ParsedCSV> {
        // case 1) the file uploaded is not a CSV
        if (!rawFile.name.endsWith(".csv")) {
            throw new CSVError("Invalid File Type: the file uploaded must be a '.csv'");
        }
        // case 2) the file uploaded is a CSV, parse the file
        return new Promise<ParsedCSV>((resolve, reject) => Papa.parse(rawFile,
            {
                // do not use the default header since it makes changing capitalization difficult
                header: false,

                // parseResult is in the following shape:
                // [[header],[row1],[row2], ..., [rowN]]
                complete: (parseResult: ParseResult<string[]>) => {
                    try {
                        // case 1) file is too large
                        if (parseResult.data.length > maxRows) {
                            reject(new CSVError(`The file uploaded must have less than ${formatNumberWithCommas(maxRows)} rows`));
                            return;
                        }

                        // case 2) empty file, only contains header or no data
                        if (parseResult.data.length <= 1) {
                            reject(new CSVError("The file must have at least 1 row of data"));
                            return;
                        }

                        // case 3) file size and type is correct, parse file
                        const totalRows: number = getTotalRows(parseResult.data.slice(1)); // subtract the first row since we don't count the header
                        const updatedHeader: HeaderCSVRow = createHeaderAndIdx(parseResult.data[0], csvHeaders);
                        const segHeader: HeaderSegmentsRow = createSegmentsHeaderAndIdx(parseResult.data[0], updatedHeader);
                        const createdRowFromUpdatedHeader: CSVRow[] = createRows(parseResult.data.slice(1), updatedHeader, segHeader);
                        resolve({
                            numRows: totalRows, 
                            header: updatedHeader,
                            segmentHeader: segHeader,
                            rows: createdRowFromUpdatedHeader,
                        } as ParsedCSV)
                    } catch (error: any) {
                        reject(new CSVError(error.message))
                        return;
                    }
                }
            }
        ));
    }

    /**
     * @description populate a CSVHeaders object with the correct csvIndex and return the header by reference
     * @param headerRow is the parsed string[] containing the header field from Papaparse
     * @param csvHeaders a parameter to be deep copied which will be populated with CSVIndexes for the header
     * @returns a copy of the local csvHeaders object with the updated HeaderIndexes
     */
    function createHeaderAndIdx(headerRow: string[] | undefined, csvHeaders: HeaderCSVRow): HeaderCSVRow {
        // case 1) ensure that this file has a header row
        if (!headerRow) { throw new CSVError("No CSV Header Specified") }

        // case 2) put the string in consistent lowercase format
        headerRow = headerRow.map((field: string) => field.toLocaleLowerCase().trim());

        // step 3) create a deep copy of the defaultCSV Header from csvHook
        const csvHeadersDeepCopy: HeaderCSVRow = cloneDeep(csvHeaders);

        // update the index of each field as it's index or -1
        Object.entries(csvHeadersDeepCopy)
        .forEach(([columnName, entry]) => {
            entry.csvIndex = headerRow!.indexOf(entry.fieldTitle.toLowerCase().replace(/ /g, '_'));
        });
        return csvHeadersDeepCopy;
    }

    /**
     * @description creates a HeaderSegmentsRow object containing any header field not equal to the default HeaderCSVRow fields
     * @param headerRow the first row of the CSV
     * @returns a header containing all the segments
     */
    function createSegmentsHeaderAndIdx(headerRow: string[] | undefined, csvHeaders: HeaderCSVRow): HeaderSegmentsRow {
        // case 1) ensure that this file has a header row
        if (!headerRow) { throw new CSVError("No CSV Header Specified") }

        // case 2) format the header

        // step 2A) create list of all fields that are not Segments as a deep copy
        const segmentsExcludeList = Object.keys(csvHeaders).map((fieldName: string) => {
            // regular expression to remove "_", in this case make the field `source_url` = `sourceurl`
            return formatNoHyphensOrSpaceLowercase(fieldName);
        })

        // step 2B) get segments
        let ret: HeaderSegmentsRow = { segHeader: [] } as HeaderSegmentsRow;
        // get fields in the header that are not defaults (aka segments)
        headerRow.filter((userUploadField: string) =>
            // note: the userUploadField is not being changed, we're just formatting
            //          the user input and header in a common format (compare user input `Source URL` CSVRow field `source_url`)
            !segmentsExcludeList.includes(formatNoHyphensOrSpaceLowercase(userUploadField))
        ).forEach((segmentName) => {
            // case 1) enforce that the segment name is not empty (i.e a comma at the end of the header row)
            if (segmentName.trim() != "") {
                ret.segHeader.push(
                    {
                        // note: this is where the segments header name is set
                        fieldTitle: segmentName.trim(),
                        toolTipDescription: "Segment Value: a custom metric specific to your company’s data",

                        // headerRow is not null because of check in case 1
                        csvIndex: headerRow!.indexOf(segmentName),

                        // segments are always optional
                        required: false,
                        charsTillEllipsis: segmentName.trim().length,
                    } as HeaderEntry
                )
            }
        })
        return ret;
    }

    /**
     * @description returns a CSVRow[] of the body (everything but the header) of the CSV to be validation
     *                  Note the order of the CSVRow must be maintained or it will not be rendered in order in the Preview Table
     * @param csvBody an array containing a row of entries
     * @returns a CSVRow[] containing keyed parameters for the NLP
     */
    function createRows(csvBody: string[][], header: HeaderCSVRow, headerSegmentsRow?: HeaderSegmentsRow): CSVRow[] {
        let returnBody: CSVRow[] = [];
        csvBody.forEach((row: string[]) => {
            // row.length occurs when Papaparse mistakenly parses an EOF \n as an actual row
            if (row.length !== 1) {
                returnBody.push({
                  // if the header column is undefined, put a default value, or else give the trimmed version
                  title: !row[header.title.csvIndex] ? '' : row[header.title.csvIndex].trim(),
                  details: !row[header.details.csvIndex] ? '' : row[header.details.csvIndex].trim(),
                  date: !row[header.date.csvIndex] ? '' : row[header.date.csvIndex].trim(),
                  user: !row[header.user.csvIndex] ? '' : row[header.user.csvIndex].trim(),
                  id: !row[header.id.csvIndex] ? '' : row[header.id.csvIndex].trim(),
                  stars: !row[header.stars.csvIndex] ? undefined : row[header.stars.csvIndex].trim(),
                  source_url: !row[header.source_url.csvIndex] ? '' : row[header.source_url.csvIndex].trim(),
                  segments: headerSegmentsRow?.segHeader.map((segment) => {
                    return {
                      name: segment.fieldTitle,
                      // Note: !row[segment.csvIndex] should never be true since segments.csvIndex is always defined,
                      //          but protect against unforseen edge cases
                      value: !row[segment.csvIndex] ? '' : row[segment.csvIndex].trim(),
                    } as SegmentValue;
                  }),
                });
            }

        })
        return returnBody;
    }

    /**
     * @description returns the true total number of CSVRows for a file including PapaParse undefined behavior
     * @param csvBody the content of the csv excluding the headers and may contain an extra `\n` at the end of the file
     * @returns the true total number of csv rows
     */
    function getTotalRows(csvBody: string[][]): number {
        let totalRows: number = csvBody.length;
        csvBody.forEach((row: string[]) => {
            // handle the PapaParse edge case; last empty line is considered its own row `[""]` with length 1
            if (row.length === 1) {
                totalRows--;
            }
        })
        return totalRows;
    }
}