chat / client /src /components /Chat /Input /AudioRecorder.tsx
helloya20's picture
Upload 2345 files
f0743f4 verified
import { useCallback, useRef } from 'react';
import { useToastContext, TooltipAnchor, ListeningIcon, Spinner } from '@librechat/client';
import { useLocalize, useSpeechToText, useGetAudioSettings } from '~/hooks';
import { useChatFormContext } from '~/Providers';
import { globalAudioId } from '~/common';
import { cn } from '~/utils';
const isExternalSTT = (speechToTextEndpoint: string) => speechToTextEndpoint === 'external';
export default function AudioRecorder({
disabled,
ask,
methods,
textAreaRef,
isSubmitting,
}: {
disabled: boolean;
ask: (data: { text: string }) => void;
methods: ReturnType<typeof useChatFormContext>;
textAreaRef: React.RefObject<HTMLTextAreaElement>;
isSubmitting: boolean;
}) {
const { setValue, reset, getValues } = methods;
const localize = useLocalize();
const { showToast } = useToastContext();
const { speechToTextEndpoint } = useGetAudioSettings();
const existingTextRef = useRef<string>('');
const onTranscriptionComplete = useCallback(
(text: string) => {
if (isSubmitting) {
showToast({
message: localize('com_ui_speech_while_submitting'),
status: 'error',
});
return;
}
if (text) {
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement | null;
if (globalAudio) {
console.log('Unmuting global audio');
globalAudio.muted = false;
}
/** For external STT, append existing text to the transcription */
const finalText =
isExternalSTT(speechToTextEndpoint) && existingTextRef.current
? `${existingTextRef.current} ${text}`
: text;
ask({ text: finalText });
reset({ text: '' });
existingTextRef.current = '';
}
},
[ask, reset, showToast, localize, isSubmitting, speechToTextEndpoint],
);
const setText = useCallback(
(text: string) => {
let newText = text;
if (isExternalSTT(speechToTextEndpoint)) {
/** For external STT, the text comes as a complete transcription, so append to existing */
newText = existingTextRef.current ? `${existingTextRef.current} ${text}` : text;
} else {
/** For browser STT, the transcript is cumulative, so we only need to prepend the existing text once */
newText = existingTextRef.current ? `${existingTextRef.current} ${text}` : text;
}
setValue('text', newText, {
shouldValidate: true,
});
},
[setValue, speechToTextEndpoint],
);
const { isListening, isLoading, startRecording, stopRecording } = useSpeechToText(
setText,
onTranscriptionComplete,
);
if (!textAreaRef.current) {
return null;
}
const handleStartRecording = async () => {
existingTextRef.current = getValues('text') || '';
startRecording();
};
const handleStopRecording = async () => {
stopRecording();
/** For browser STT, clear the reference since text was already being updated */
if (!isExternalSTT(speechToTextEndpoint)) {
existingTextRef.current = '';
}
};
const renderIcon = () => {
if (isListening === true) {
return <ListeningIcon className="stroke-red-500" />;
}
if (isLoading === true) {
return <Spinner className="stroke-text-secondary" />;
}
return <ListeningIcon className="stroke-text-secondary" />;
};
return (
<TooltipAnchor
description={localize('com_ui_use_micrphone')}
render={
<button
id="audio-recorder"
type="button"
aria-label={localize('com_ui_use_micrphone')}
onClick={isListening === true ? handleStopRecording : handleStartRecording}
disabled={disabled}
className={cn(
'flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover',
)}
title={localize('com_ui_use_micrphone')}
aria-pressed={isListening}
>
{renderIcon()}
</button>
}
/>
);
}