import { Type } from "class-transformer";
import { TDMSBase } from "../../base";
import { DataStoreFile } from "../data-store/data.store";
import { Diarization } from "./diarization";

/**
 * The available model options we have for transcription capabilities
 */
export const TranscriptionModelOptions = ["tiny.en", "small.en", "medium.en", "large"] as const;
export type TranscriptionModelOptionsType = (typeof TranscriptionModelOptions)[number];

/**
 * This class defines a transcription containing transcribed data
 */
export class Transcription extends TDMSBase {
  /** The date-fns format we wish to use for our display of transcription start/end times */
  static readonly CUSTOM_TIME_FORMAT = "HH:mm:ss.SSSS";
  /** A regex that can be used to validate our custom time format based on {@link CUSTOM_TIME_FORMAT} */
  static readonly TIME_PATTERN: RegExp = /^[0-9][0-9]?:[0-9][0-9]?:[0-9][0-9]?\.?[0-9]{0,4}$/;
  /** The microservice handles times in seconds. This Regex can match that expected format. */
  static readonly MICROSERVICE_CLI_TIME_PATTERN: RegExp = /\d+\.\d+/;

  /** The prefix to name the speakers in front of their index */
  static readonly SPEAKER_PREFIX = "SPEAKER_";

  /**
   * Unique transcription identifier
   */
  id: number;

  /**
   * The start time, in seconds, from the start of the audio file to when this sentence is spoke
   */
  start: number;

  /**
   * The end time, in seconds, from the start of the audio file to the end of this spoken sentence
   */
  end: number;

  /**
   * The model confidence for an transcription
   */
  confidence: number = 0;

  /**
   * Actual transcribed text
   */
  text: string;

  /** Tracks which speaker actually spoke this content */
  speaker: string | undefined;

  /** This classification helps identify what this spoken content actually meant */
  classification: Array<string> | undefined;

  /** This value provides how confident the analysis algorithm was in the classification, if available*/
  classificationConfidence: number = 0;

  /**
   * In the event this transcription is created from a multi file request, this identifier will be populated what that of the multi file request.
   *
   * **Not stored in the database.**
   */
  multiFileIdentifier: string | undefined;

  /** Returns the speaker or a placeholder if one couldn't be determined */
  get prettySpeaker() {
    return this.guessPrettySpeaker();
  }

  /** Returns if this transcription element is a closed loop communication */
  get isCLC() {
    return this.classification?.includes("Closing loop") ?? false;
  }

  /** Attempts to guess a prettified speaker based on the identification of the given speaker */
  guessPrettySpeaker(speaker: number | string | undefined = this.speaker): string {
    if (speaker == null) return "Unknown";
    else {
      const speakerAsNumber = typeof speaker === "number" ? speaker : parseInt(speaker);
      const speakerIsNumber = !isNaN(speakerAsNumber);
      if (speakerIsNumber) return `Speaker ${speakerAsNumber + 1}`;
      else {
        // Base level string handling
        const speakerAsString = speaker as string;
        // Speaker formatted like SPEAKER_1
        if (speakerAsString.startsWith(Transcription.SPEAKER_PREFIX))
          return `Speaker ${parseInt(speakerAsString.replace(Transcription.SPEAKER_PREFIX, ""), 10) + 1}`;
        // Else a format we don't expect or know
        else return speakerAsString;
      }
    }
  }

  /**
   * The data store file associated with this transcription
   */
  @Type(() => DataStoreFile)
  dataStoreFile: DataStoreFile;

  /**
   * Given an array of transcriptions, infers the diarizations
   */
  static getDiarizations(transcriptions: Transcription[]) {
    return transcriptions.map((x) => Diarization.fromPlain({ speaker: x.speaker, start: x.start, end: x.end }));
  }

  constructor(
    id: number | undefined,
    start: number,
    end: number,
    text: string,
    confidence: number,
    dataStoreFile: DataStoreFile
  ) {
    super();
    this.id = id!;
    this.start = start;
    this.end = end;
    this.text = text;
    this.confidence = confidence;
    this.dataStoreFile = dataStoreFile;
  }

  /** Returns the unique speakers for the given transcription content */
  static getUniqueSpeakers(transcriptions: Transcription[]): string[] {
    const speakers = transcriptions
      .filter((x) => x.speaker != null)
      .map((d) => d.prettySpeaker as string)
      .sort((a, b) => +a.replace(Transcription.SPEAKER_PREFIX, "") - +b.replace(Transcription.SPEAKER_PREFIX, ""));
    return [...new Set(speakers)];
  }

  /** Given a transcript, determines the session duration, in seconds. This is determined by the last utterances end time. */
  static getAssumedSessionDuration(transcript: Transcription[]) {
    return transcript[transcript.length - 1].end;
  }
}

/**
 * This class defines the word level structure for each transcription
 */
export class TranscriptionWordContent extends TDMSBase {
  /** The start time, in seconds, from the start of the audio file for when this word is beginning to be spoken */
  start: number;

  /** The end time, in seconds, from the start of the audio file to when this word is finished being spoken */
  end: number;

  /** The actual spoken word */
  word: string;

  /** The score of this word accuracy */
  score: number;

  /** Tracks which speaker actually spoke this content */
  speaker: string | undefined;

  constructor(start: number, end: number, word: string, score: number) {
    super();
    this.start = start;
    this.end = end;
    this.word = word;
    this.score = score;
  }
}
