Creating a live transcript bot using Vosk Ai

This post has been de-listed

It is no longer included in search results and normal feeds (front page, hot posts, subreddit posts, etc). It remains visible only via the author's post history.

Post Body

I have been looking everywhere and having a lot of difficulties finding a solution so sorry if I am coming to the wrong place
I am trying to create a discord bot that can transcript conversations live, I chose vosk because its an offline too, l but I am unsure of how to implement it in a live setting, I've seen it done in python and disc.js but I dunno...so to cover all bases here is what I have so far.

 const { SlashCommandBuilder} = require('discord.js');
  const { joinVoiceChannel, createAudioPlayer, NoSubscriberBehavior } = require('@discordjs/voice');
  VoiceConnectionStatus, createAudioPlayer, createAudioResource, StreamType
  const { Readable } = require('stream');
  const vosk = require('vosk');
  //https://alphacephei.com/vosk/models
  const model = new vosk.Model('model/vosk-model-en-us-0.42-gigaspeech');
  module.exports = {
      data: new SlashCommandBuilder()
          .setName('connect')
          .setDescription('Join channel to start listening'),
          async execute(interaction) {
              let talkingUser;
              const ch = interaction.guild.channels.cache.find(c => c.name === 'transcript');
              const voiceChannel = interaction.member.voice.channel;//gets current user id
              if (!voiceChannel){
                  await interaction.reply({content: 'Error: The voice channel does not exist!', ephemeral: true});
                  return;//if they are in a voice channel return an error
              }
              const voiceConn = joinVoiceChannel({
                  channelId: voiceChannel.id,
                  guildId: interaction.guild.id,
                  adapterCreator: interaction.guild.voiceAdapterCreator,
                  selfDeaf: false,
              });

              await interaction.reply({ content: 'Ready to listen!', ephemeral: true });
              voiceConn.receiver.speaking.on('start', (userId) => {//'end' works too
                  talkingUser = (interaction.guild.members.cache.get(userId)).displayName
                  console.log(talkingUser, 'started')
              })
          }
  }

so currently I have it so that when someone speaks in a call it is able to send a console message (Or a message in the chat) that "talkingUser" is talking. When a user starts talking it sends, I am not sure if there is a better implantation because of the switch to discord/voice I can't find much on alternatives.

but back to my issue that involves vosk this is my first implementation of using VOSK in a live environment, I am getting an error saying TypeError recognizer.setVoice(voiceConn); is not a constructor
after that, if I comment it out I get an error on recognizer.on is not a function

async function performSpeechRecognition(voiceConn, ch) {
    const recognizer = new vosk.Recognizer({ model: model, sampleRate: 48000 });
    recognizer.setWords(true);
    recognizer.setVoice(voiceConn);

    const audioPlayer = createAudioPlayer();
    voiceConn.subscribe(audioPlayer);

    audioPlayer.on('data', (audioData) => {
        recognizer.acceptWaveform(audioData);
    });

    audioPlayer.on('error', (error) => {
        console.error('Audio player error:', error);
    });

    recognizer.on('result', (result) => {
        const text = result.text;
        console.log('Recognized text:', text);
        // Do whatever you want with the recognized text
        // For example, you can send it to a transcript channel:
        // if (ch) {
        //     ch.send({ content: text });
        // }
        console.log("Said", text);
    });

    recognizer.on('error', (error) => {
        console.error('Recognition error:', error);
    });
    recognizer.free();
    model.free();
}

This is a second implementation that I found looking through git and stack overflow giving me an error saying voice_Connection.receiver.createStream is not a function

function speak_impl(voice_Connection, userId, voiceChannelId) {
        console.log(`I'm listening to ${userId.displayName}`)
        // this creates a 16-bit signed PCM, stereo 48KHz stream
        //VOSK PCM 16khz 16bit mono
        const audioStream = voice_Connection.receiver.createStream(userId, { mode: 'pcm' })
        audioStream.on('error',  (e) => {
            console.log('audioStream: '   e)
        });
        let buffer = [];
        audioStream.on('data', (data) => {
            buffer.push(data)
        })
        audioStream.on('end', async () => {
            buffer = Buffer.concat(buffer)
            const duration = buffer.length / 48000 / 4;
            console.log("duration: "   duration)

            if (duration < 0.8 || duration > 19) { // 20 seconds max dur
                console.log("TOO SHORT / TOO LONG; SKPPING")
                return;
            }

            try {
                let new_buffer = await convert_audio(buffer)
                let out = await transcribe(new_buffer);
                if (out != null)
                //user.name
                    process_commands_query(out, mapKey, user.id);
            } catch (e) {
                console.log('tmpraw rename: '   e)
            }
        })
}
async function transcribe(buffer) {
    return transcribe_vosk(buffer)
}

So I don't know if my issue comes from my lack of knowledge of discord.js/voice or VOSK. so I guess the most important thing I need to see is if I am creating a proper stream for the Vosk API to capture the audio. if I can figure out how to capture an audio stream I can probably import that in to vosk and figure out how to use vosk myself. but right now I can't even get close!
Thank you in advance...Sorry if this isn't the right place for this

Author

Account Strength

100%

Account Age

6 years

Verified Email

Yes

Verified Flair

Total Karma

5,142

Link Karma

834

Comment Karma

4,280

Profile updated: 2 days ago

Posts updated: 2 months ago

Mrsnowmanmanson

Subreddit

r/Discordjs

Post Details

We try to extract some basic information from the post title. This is not always successful or accurate, please use your best judgement and compare these values to the post title and body for confirmation.

Posted: 1 year ago
Reddit URL: View post on reddit.com
External URL: reddit.com/r/Discordjs/c...