import { createArray } from './utils';

const filterRelevantSentenceStarts = (sentenceStarts, sentenceStartIdsToRemove) => {
  const userSentenceStarts = sentenceStarts.map((paragraphSentenceStarts, paragraphIdx) => {
    return paragraphSentenceStarts.filter(
      (start, idx) => !sentenceStartIdsToRemove[paragraphIdx].includes(idx),
    );
  });
  return userSentenceStarts;
};

const getTextDataPerPage = (paragraphs, annotations, sentenceStarts, sentenceStartIdsToRemove) => {
  // filter user-removed sentence starts from the predicted
  // TODO: when we overwrite this in backend there is no easy way to get the original (predicted) sentence starts back
  // -> Worst case now is that all text in each page is a single sentence -> sentence might be too long for the model (e.g. 1000 tokens)
  //    However we have methods to artifically split overlong sentences (unsure maybe remove the sentence start alltogether then??)
  // If we want to keep the predicted sentence starts we must either:
  // -> store them separately under piis e.g. as "predictedSentenceStarts"
  // -> or we must copy them from the predicted piis each time into the dto for the frontend
  const userSentenceStarts = filterRelevantSentenceStarts(sentenceStarts, sentenceStartIdsToRemove);

  // extract the relevant data
  const textDataPerPage = [];
  for (let pageNum = 0; pageNum < paragraphs.length; pageNum += 1) {
    textDataPerPage.push({
      tokens: paragraphs[pageNum].tokens.map((token) => ({
        startChar: token.startChar,
        endChar: token.endChar,
        text: token.text,
        hasWs: token.hasWhitespace,
      })),
      ents: annotations[pageNum].map((annotation) => ({
        startChar: paragraphs[pageNum].tokens[annotation.start].startChar,
        endChar: paragraphs[pageNum].tokens[annotation.end].endChar,
        tag: annotation.textLabelName,
        text: annotation.text,
        score: annotation.score,
        startTok: annotation.start,
        endTok: annotation.end,
      })),
      sentenceStarts: userSentenceStarts[pageNum],
      pseudonyms: annotations[pageNum].map((annotation) => annotation.pseudonym),
      crIds: annotations[pageNum].map((annotation) => annotation.crId),
      locationsInOriginalDocument: annotations[pageNum].map(
        (annotation) => annotation.locationInOriginalDocument,
      ),
      locationsInAnonymizedDocument: annotations[pageNum].map(
        (annotation) => annotation.locationInAnonymizedDocument,
      ),
    });
  }
  return textDataPerPage;
};

/**
 * Get the images data per page
 * @param {number} numPages The number of pages in the document. If the document has no images on a page, the array is set empty for that page.
 * @param {Array} images The images in the document
 * @returns {Array} Two dimensional array with the images data per page
 */
const getImagesData = (numPages, images) => {
  const imagesDataPerPage = createArray(numPages, [], true);

  images.forEach((image) => {
    imagesDataPerPage[image.pageNum - 1].push({
      imageUrl: image.imageUrl,
      xref: image.xref,
      detections: image.detections,
    });
  });

  return imagesDataPerPage;
};

export const getDocumentDataUpdate = (
  sliceDocumentData,
  paragraphs,
  annotations,
  sentenceStarts,
  sentenceStartIdsToRemove,
  images,
) => {
  const textDataPerPage = getTextDataPerPage(
    paragraphs,
    annotations,
    sentenceStarts,
    sentenceStartIdsToRemove,
  );
  const numPages = sliceDocumentData.pageData.length;
  const imagesDataPerPage = getImagesData(numPages, images);

  const updatedDocumentData = {
    ...sliceDocumentData,
    pageData: sliceDocumentData.pageData.map((page, pageNum) => ({
      ...page,
      textData: {
        ...page.textData,
        detections: textDataPerPage[pageNum],
      },
      imagesData: imagesDataPerPage[pageNum],
    })),
  };
  return updatedDocumentData;
};
