import { inject, Injectable } from '@angular/core';
import { OcrService } from './ocr.service';
import { CellIndices, Cut, Intersection, Interval, WordRect } from '../../models/interfaces';
import { CtPage } from '../../models/ct-batch-model';

@Injectable({
  providedIn: 'root',
})
export class OcrTableService {
  private ocrService = inject(OcrService);

  getWordsInTable(
    page: CtPage,
    verticalOffsets: number[],
    horizontalOffsets: number[],
  ): string[][] {
    const startTime = performance.now();

    verticalOffsets.sort((x, y) => x - y);
    horizontalOffsets.sort((x, y) => x - y);

    const tableRect: WordRect = {
      x: [verticalOffsets[0], verticalOffsets[verticalOffsets.length - 1]],
      y: [horizontalOffsets[0], horizontalOffsets[horizontalOffsets.length - 1]],
    };

    const wordRectsInTable = this.ocrService
      .getWordRects(page)
      .filter((wordRect) => this.ocrService.areRectsIntersecting(wordRect, tableRect));

    const cellRects = this.getRectsByCellIndices(horizontalOffsets, verticalOffsets);

    const selectedWordsByCell = this.getSelectedWordsByCell(cellRects, wordRectsInTable, page);

    const endTime = performance.now();
    console.debug(`time taken: ${endTime - startTime}ms`);
    console.debug(selectedWordsByCell);

    const tableData: string[][] = [];
    for (let row = 0; row < horizontalOffsets.length - 1; row++) {
      tableData[row] = [];
      for (let column = 0; column < verticalOffsets.length - 1; column++) {
        tableData[row][column] = selectedWordsByCell.get(`${row}|${column}`)!.join(' ');
      }
    }

    console.debug(tableData);
    return tableData;
  }

  private getRectsByCellIndices(horizontalOffsets: number[], verticalOffsets: number[]) {
    const cellRects: Map<CellIndices, WordRect> = new Map<CellIndices, WordRect>();
    for (let row = 0; row < horizontalOffsets.length - 1; row++) {
      for (let column = 0; column < verticalOffsets.length - 1; column++) {
        const cellRect: WordRect = {
          x: [verticalOffsets[column], verticalOffsets[column + 1]],
          y: [horizontalOffsets[row], horizontalOffsets[row + 1]],
        };
        cellRects.set(`${row}|${column}`, cellRect);
      }
    }
    return cellRects;
  }

  private getSelectedWordsByCell(
    cellRects: Map<CellIndices, WordRect>,
    wordRectsInTable: WordRect[],
    page: CtPage,
  ) {
    const selectedWordsByCell: Map<CellIndices, string[]> = new Map<CellIndices, string[]>();
    for (const indices of cellRects.keys()) {
      selectedWordsByCell.set(indices, []);
    }

    for (const word of wordRectsInTable) {
      const intersections = this.getIntersections(cellRects, word);

      const shareSum = intersections
        .map((intersection) => intersection.share)
        .reduce((previous, current) => previous + current, 0);

      if (shareSum >= 0.5) {
        const cuts = this.calculateCuts(intersections);

        for (const cut of cuts) {
          selectedWordsByCell.get(cut.indices)?.push(this.getWordCut(page, word, cut.interval));
        }
      }
    }
    return selectedWordsByCell;
  }

  /**
   * For each data-capturing-table cell intersecting with the given word rect this method returns an object
   * which stores the data-capturing-table position (as a combined string and row, column separately) together
   * with
   * - share: amount of overlapping area relative to the total area of the word rect
   *   (i.e. what "area share" of this word is this data-capturing-table cell covering)
   * - xShare: same as share, but one dimensional only considering the horizontal intersection
   *   amount on the x-axis
   */
  private getIntersections(cellRects: Map<CellIndices, WordRect>, word: WordRect): Intersection[] {
    return (
      [...cellRects.entries()]
        .filter(([_, rect]) => this.ocrService.areRectsIntersecting(word, rect))
        .map(
          ([indices, rect]): Intersection => ({
            indices,
            row: Number(indices.split('|')[0]),
            column: Number(indices.split('|')[1]),
            xShare: Math.abs(
              this.ocrService.getAxisIntersectionLengths(rect, word)[0] / (word.x[1] - word.x[0]),
            ),
            share: this.ocrService.getIntersectionArea(rect, word) / this.ocrService.getArea(word),
          }),
        )
        // sort descending by share
        .toSorted((i1, i2) => i2.share - i1.share)
    );
  }

  /**
   * Context: There are situations where a bunch of text is detected as one large string by the ocr.
   * A Prominent example is the combination "{fromdate}-{todate}"  in AOK tables.
   * This method, given a set of intersections for a word, returns in what cuts this word
   * should be split. The idea is that we want to be able to separate long ocr-strings by putting
   * column separators inside.
   *
   * Right now we consider only the following simplified case:
   * - Assume that one word rect is intersecting with at most 4 data-capturing-table cells
   * - Pick the cell C with the biggest intersecting area
   * - Pick the left or right neighbour N of this cell (whichever intersects with the word)
   * - If the intersection share of N is bigger than 30%, cut the word at c% where c is
   *   the horizontal intersection share of the left-most intersecting cell (of C and N)
   * - By "Cutting the word by c%" we mean picking the first c% characters (and the last 1-c%).
   *   This is obviously not very accurate but enough for the first draft
   *
   *   Picture:
   *   https://miro.com/app/board/uXjVKQ6CXfY=/?moveToWidget=3458764589864696023&cot=14
   */
  private calculateCuts(intersections: Intersection[]) {
    const greatestShareIntersection = intersections.shift()!;
    const horizontalNeighbourIntersection = intersections.find(
      (i) => i.row === greatestShareIntersection.row,
    );

    let cuts: Cut[] = [{ indices: greatestShareIntersection.indices, interval: [0, 1] }];

    if (horizontalNeighbourIntersection && horizontalNeighbourIntersection.share >= 0.3) {
      const leftIntersection =
        horizontalNeighbourIntersection.column < greatestShareIntersection.column
          ? horizontalNeighbourIntersection
          : greatestShareIntersection;
      const rightIntersection =
        horizontalNeighbourIntersection.column > greatestShareIntersection.column
          ? horizontalNeighbourIntersection
          : greatestShareIntersection;

      cuts = [
        { indices: leftIntersection.indices, interval: [0, leftIntersection.xShare] },
        { indices: rightIntersection.indices, interval: [leftIntersection.xShare, 1] },
      ];
    }
    return cuts;
  }

  private getWordCut(page: CtPage, word: WordRect, cutInterval: Interval) {
    const wordContent = this.ocrService.getWord(page, word);
    const leftBoundary = Math.round(cutInterval[0] * wordContent.length);
    const rightBoundary = Math.round(cutInterval[1] * wordContent.length);
    return wordContent.slice(leftBoundary, rightBoundary);
  }
}
