import * as SpeechSDK from 'microsoft-cognitiveservices-speech-sdk';

interface WordDict {
  [key: string]: number;
}

export class MsSpeechToText {
  private readonly AZURE_SUB_KEY = '3558139b21184b7cabdd4f5f928047c2';
  private readonly AZURE_SUB_REGION = 'westeurope';
  private readonly speechConfig: SpeechSDK.SpeechConfig;
  private speechRecognizer: SpeechSDK.SpeechRecognizer;
  private timeDelay: number;
  private isRecognizing = false;
  private silenceTimeout: NodeJS.Timeout | null = null;
  private initialSpeechTimeout: NodeJS.Timeout | null = null;
  private silenceDurationMs: number;

  constructor(recognitionLanguage: number, timeDelay = 1000) {
    this.speechConfig = SpeechSDK.SpeechConfig.fromSubscription(
      this.AZURE_SUB_KEY,
      this.AZURE_SUB_REGION
    );

    this.timeDelay = timeDelay;
    this.silenceDurationMs = timeDelay;

    switch (recognitionLanguage) {
      case 0:
        this.speechConfig.speechRecognitionLanguage = 'en-US';
        break;
      case 1:
        this.speechConfig.speechRecognitionLanguage = 'nb-NO';
        break;
      case 2:
        this.speechConfig.speechRecognitionLanguage = 'sv-SE';
        break;
      default:
        this.speechConfig.speechRecognitionLanguage = 'en-US';
        break;
    }

    this.setConfig();

    this.speechRecognizer = new SpeechSDK.SpeechRecognizer(
      this.speechConfig,
      SpeechSDK.AudioConfig.fromMicrophoneInput()
    );
  }

  private setConfig() {
    this.speechConfig.setProperty(
      SpeechSDK.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs,
      '0'
    );

    this.speechConfig.setProperty(
      SpeechSDK.PropertyId.Speech_SegmentationSilenceTimeoutMs,
      `${this.timeDelay}`
    );
  }

  public startRecognition() {
    return new Promise<string>((resolve, reject) => {
      if (this.isRecognizing) {
        return reject('Recognition already started');
      }

      this.isRecognizing = true;

      const recognitionLoop = () => {
        this.speechRecognizer.recognizeOnceAsync(
          result => {
            console.log('Recognised text: ', result.text);
            resolve(result.text);

            this.resetSilenceTimeout();
            this.clearInitialSpeechTimeout();
          },
          err => {
            console.log('Recognition error: ', err);
            this.isRecognizing = false; // Reset recognition state on error
            reject(err);
          }
        );
      };

      recognitionLoop();

      // Set the initial timeout for user to start speaking
      this.initialSpeechTimeout = setTimeout(() => {
        if (!this.silenceTimeout) {
          console.log(
            'No speech detected within initial timeout. Stopping recognition.'
          );
          this.stopRecognition();
          reject('No speech detected within initial timeout.');
        }
      }, 40000);
    });
  }

  public stopRecognition() {
    this.isRecognizing = false;
    this.clearSilenceTimeout();
    this.clearInitialSpeechTimeout();
    this.speechRecognizer.stopContinuousRecognitionAsync(
      () => {
        console.log('Recognition stopped.');
      },
      err => {
        console.error('Error stopping recognition:', err);
      }
    );
  }

  private resetSilenceTimeout(): void {
    this.clearSilenceTimeout();

    this.silenceTimeout = setTimeout(() => {
      console.log('No speech detected. Stopping recognition.');
      this.stopRecognition();
    }, this.silenceDurationMs);
  }

  private clearSilenceTimeout(): void {
    if (this.silenceTimeout) {
      clearTimeout(this.silenceTimeout);
      this.silenceTimeout = null;
    }
  }

  private clearInitialSpeechTimeout(): void {
    if (this.initialSpeechTimeout) {
      clearTimeout(this.initialSpeechTimeout);
      this.initialSpeechTimeout = null;
    }
  }

  public countWords(text: string) {
    const text_without_special_chars = text.replace(/[.,?!]/g, '');

    const words = text_without_special_chars.split(' ');
    const word_dict: WordDict = {};

    for (const word of words) {
      const loweredWord = word.toLowerCase();

      if (word_dict[loweredWord]) {
        word_dict[loweredWord] += 1;
      } else {
        word_dict[loweredWord] = 1;
      }
    }

    return word_dict;
  }

  public compareText(scriptText: string, spokenText: string) {
    const scriptWords = this.countWords(scriptText);
    const spokenWords = this.countWords(spokenText);

    const scriptWordsKeys = Object.keys(scriptWords);

    let correctWords = 0;
    let total = 0;

    for (const word of scriptWordsKeys) {
      while (scriptWords[word]) {
        if (spokenWords[word]) {
          correctWords += 1;
          spokenWords[word] -= 1;
        }

        scriptWords[word] -= 1;
        total += 1;
      }
    }

    const accuracy = correctWords / total;

    console.log('Accuracy is: ', accuracy);

    return accuracy;
  }

  public setDelay(delay: string) {
    this.timeDelay = +delay;

    this.setConfig();

    this.speechRecognizer = new SpeechSDK.SpeechRecognizer(
      this.speechConfig,
      SpeechSDK.AudioConfig.fromMicrophoneInput()
    );
  }
}
