This post has been de-listed
It is no longer included in search results and normal feeds (front page, hot posts, subreddit posts, etc). It remains visible only via the author's post history.
I have been looking everywhere and having a lot of difficulties finding a solution so sorry if I am coming to the wrong place
I am trying to create a discord bot that can transcript conversations live, I chose vosk because its an offline too, l but I am unsure of how to implement it in a live setting, I've seen it done in python and disc.js but I dunno...so to cover all bases here is what I have so far.
const { SlashCommandBuilder} = require('discord.js');
const { joinVoiceChannel, createAudioPlayer, NoSubscriberBehavior } = require('@discordjs/voice');
VoiceConnectionStatus, createAudioPlayer, createAudioResource, StreamType
const { Readable } = require('stream');
const vosk = require('vosk');
//https://alphacephei.com/vosk/models
const model = new vosk.Model('model/vosk-model-en-us-0.42-gigaspeech');
module.exports = {
data: new SlashCommandBuilder()
.setName('connect')
.setDescription('Join channel to start listening'),
async execute(interaction) {
let talkingUser;
const ch = interaction.guild.channels.cache.find(c => c.name === 'transcript');
const voiceChannel = interaction.member.voice.channel;//gets current user id
if (!voiceChannel){
await interaction.reply({content: 'Error: The voice channel does not exist!', ephemeral: true});
return;//if they are in a voice channel return an error
}
const voiceConn = joinVoiceChannel({
channelId: voiceChannel.id,
guildId: interaction.guild.id,
adapterCreator: interaction.guild.voiceAdapterCreator,
selfDeaf: false,
});
await interaction.reply({ content: 'Ready to listen!', ephemeral: true });
voiceConn.receiver.speaking.on('start', (userId) => {//'end' works too
talkingUser = (interaction.guild.members.cache.get(userId)).displayName
console.log(talkingUser, 'started')
})
}
}
so currently I have it so that when someone speaks in a call it is able to send a console message (Or a message in the chat) that "talkingUser" is talking. When a user starts talking it sends, I am not sure if there is a better implantation because of the switch to discord/voice I can't find much on alternatives.
but back to my issue that involves vosk this is my first implementation of using VOSK in a live environment, I am getting an error saying TypeError recognizer.setVoice(voiceConn); is not a constructor
after that, if I comment it out I get an error on recognizer.on is not a function
async function performSpeechRecognition(voiceConn, ch) {
const recognizer = new vosk.Recognizer({ model: model, sampleRate: 48000 });
recognizer.setWords(true);
recognizer.setVoice(voiceConn);
const audioPlayer = createAudioPlayer();
voiceConn.subscribe(audioPlayer);
audioPlayer.on('data', (audioData) => {
recognizer.acceptWaveform(audioData);
});
audioPlayer.on('error', (error) => {
console.error('Audio player error:', error);
});
recognizer.on('result', (result) => {
const text = result.text;
console.log('Recognized text:', text);
// Do whatever you want with the recognized text
// For example, you can send it to a transcript channel:
// if (ch) {
// ch.send({ content: text });
// }
console.log("Said", text);
});
recognizer.on('error', (error) => {
console.error('Recognition error:', error);
});
recognizer.free();
model.free();
}
This is a second implementation that I found looking through git and stack overflow giving me an error saying voice_Connection.receiver.createStream is not a function
function speak_impl(voice_Connection, userId, voiceChannelId) {
console.log(`I'm listening to ${userId.displayName}`)
// this creates a 16-bit signed PCM, stereo 48KHz stream
//VOSK PCM 16khz 16bit mono
const audioStream = voice_Connection.receiver.createStream(userId, { mode: 'pcm' })
audioStream.on('error', (e) => {
console.log('audioStream: ' e)
});
let buffer = [];
audioStream.on('data', (data) => {
buffer.push(data)
})
audioStream.on('end', async () => {
buffer = Buffer.concat(buffer)
const duration = buffer.length / 48000 / 4;
console.log("duration: " duration)
if (duration < 0.8 || duration > 19) { // 20 seconds max dur
console.log("TOO SHORT / TOO LONG; SKPPING")
return;
}
try {
let new_buffer = await convert_audio(buffer)
let out = await transcribe(new_buffer);
if (out != null)
//user.name
process_commands_query(out, mapKey, user.id);
} catch (e) {
console.log('tmpraw rename: ' e)
}
})
}
async function transcribe(buffer) {
return transcribe_vosk(buffer)
}
So I don't know if my issue comes from my lack of knowledge of discord.js/voice or VOSK. so I guess the most important thing I need to see is if I am creating a proper stream for the Vosk API to capture the audio. if I can figure out how to capture an audio stream I can probably import that in to vosk and figure out how to use vosk myself. but right now I can't even get close!
Thank you in advance...Sorry if this isn't the right place for this
Subreddit
Post Details
- Posted
- 1 year ago
- Reddit URL
- View post on reddit.com
- External URL
- reddit.com/r/Discordjs/c...