diff --git a/src/LabelVoice.Toolkit/LabelVoice.Toolkit.csproj b/src/LabelVoice.Toolkit/LabelVoice.Toolkit.csproj new file mode 100644 index 0000000..c6d7c8d --- /dev/null +++ b/src/LabelVoice.Toolkit/LabelVoice.Toolkit.csproj @@ -0,0 +1,13 @@ + + + + net6.0 + enable + enable + + + + + + + diff --git a/src/LabelVoice.Toolkit/Slicer/IAdaptiveAudioSlicer.cs b/src/LabelVoice.Toolkit/Slicer/IAdaptiveAudioSlicer.cs new file mode 100644 index 0000000..5c68673 --- /dev/null +++ b/src/LabelVoice.Toolkit/Slicer/IAdaptiveAudioSlicer.cs @@ -0,0 +1,14 @@ +using NAudio.Wave; + +namespace LabelVoice.Toolkit.Slicer; + +/// +/// An adaptive audio slicer slices audios with no knowledge or supervision other than the signal itself. +/// +public interface IAdaptiveAudioSlicer : IAudioSlicer +{ + /// + /// Initialize the audio slicer with signal provided by . + /// + void Init(ISampleProvider provider); +} diff --git a/src/LabelVoice.Toolkit/Slicer/IAudioSlicer.cs b/src/LabelVoice.Toolkit/Slicer/IAudioSlicer.cs new file mode 100644 index 0000000..75f5963 --- /dev/null +++ b/src/LabelVoice.Toolkit/Slicer/IAudioSlicer.cs @@ -0,0 +1,28 @@ +using NAudio.Wave; + +namespace LabelVoice.Toolkit.Slicer; + +/// +/// An audio slicer slices audios into multiple pieces in a streaming way. +/// +public interface IAudioSlicer +{ + /// + /// Try to get the next piece. + /// + /// When this method returns, contains the range of next piece in the given signal, or null if there are no more pieces to be sliced. + /// if the next valid piece is sliced, otherwise . + public bool TrySlice(out AudioRange? range); +} + +public readonly struct AudioRange +{ + public readonly double In; + public readonly double Out; + + public AudioRange(double inPoint, double outPoint) + { + In = inPoint; + Out = outPoint; + } +} diff --git a/src/LabelVoice.Toolkit/Slicer/ISupervisedAudioSlicer.cs b/src/LabelVoice.Toolkit/Slicer/ISupervisedAudioSlicer.cs new file mode 100644 index 0000000..d709b59 --- /dev/null +++ b/src/LabelVoice.Toolkit/Slicer/ISupervisedAudioSlicer.cs @@ -0,0 +1,17 @@ +using NAudio.Wave; + +namespace LabelVoice.Toolkit.Slicer; + +/// +/// A supervised audio slicer slices audios with given knowledge or supervision such as transcriptions or subtitles. +/// +/// The type of the instance carrying the supervision. +public interface ISupervisedAudioSlicer : IAudioSlicer +{ + /// + /// Initialize the audio slicer with signal provided by and knowledge carried by . + /// + /// + /// + public void Init(ISampleProvider provider, T supervision); +} diff --git a/src/LabelVoice.Toolkit/Slicer/MonoAudioSlicerBase.cs b/src/LabelVoice.Toolkit/Slicer/MonoAudioSlicerBase.cs new file mode 100644 index 0000000..f901ccc --- /dev/null +++ b/src/LabelVoice.Toolkit/Slicer/MonoAudioSlicerBase.cs @@ -0,0 +1,39 @@ +using NAudio.Wave; +using NAudio.Wave.SampleProviders; + +namespace LabelVoice.Toolkit.Slicer; + +/// +/// This abstract class represents audio slicers that slices mono audio signals and provides property and conversion to ensure the given signal is mono. +/// Any class that inherits this class gets the `Provider` property whose setter converts stereo providers into mono providers. Thus, the provider got from this property is guaranteed to be mono.
+/// Example: +/// +/// public void Init(ISampleProvider provider) +/// { +/// Provider = provider; +/// // Once assigned, the `Provider` property is guaranteed to provide mono signals. +/// } +/// +///
+public abstract class MonoAudioSlicerBase : IAudioSlicer +{ + private ISampleProvider _provider; + + /// + /// Represents the source provider which is guaranteed to provide mono signals. + /// + protected ISampleProvider Provider + { + get => _provider; + set => _provider = RequireMono(value); + } + + private static ISampleProvider RequireMono(ISampleProvider provider) + { + return provider.WaveFormat.Channels >= 2 + ? new StereoToMonoSampleProvider(provider) + : provider; + } + + public abstract bool TrySlice(out AudioRange? range); +} diff --git a/src/LabelVoice.Toolkit/Slicer/SilenceDetectionSlicer.cs b/src/LabelVoice.Toolkit/Slicer/SilenceDetectionSlicer.cs new file mode 100644 index 0000000..f798773 --- /dev/null +++ b/src/LabelVoice.Toolkit/Slicer/SilenceDetectionSlicer.cs @@ -0,0 +1,19 @@ +using NAudio.Wave; + +namespace LabelVoice.Toolkit.Slicer; + +/// +/// A silence detection slicer slices audios via silence detection, i.e. cuts off audios from detected silence parts. +/// +public class SilenceDetectionSlicer : MonoAudioSlicerBase, IAdaptiveAudioSlicer +{ + public void Init(ISampleProvider provider) + { + Provider = provider; + } + + public override bool TrySlice(out AudioRange? range) + { + throw new NotImplementedException(); + } +} diff --git a/src/LabelVoice.sln b/src/LabelVoice.sln index 5b3826f..c18b4ba 100644 --- a/src/LabelVoice.sln +++ b/src/LabelVoice.sln @@ -5,10 +5,12 @@ VisualStudioVersion = 17.4.32916.344 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LabelVoice", "LabelVoice\LabelVoice.csproj", "{C7639302-E10B-4FBC-AD48-A645E4403C67}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LabelVoice.Core", "LabelVoice.Core\LabelVoice.Core.csproj", "{340ADDD3-5B13-4328-A836-34A87EC8DEC5}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LabelVoice.Core", "LabelVoice.Core\LabelVoice.Core.csproj", "{340ADDD3-5B13-4328-A836-34A87EC8DEC5}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test.Playback", "Test.Playback\Test.Playback.csproj", "{EFE88D45-1EFD-4E3A-879F-6A03ECFECCDB}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LabelVoice.Toolkit", "LabelVoice.Toolkit\LabelVoice.Toolkit.csproj", "{09B69A2A-6F91-4CA6-8629-87BE08E26149}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,6 +29,10 @@ Global {EFE88D45-1EFD-4E3A-879F-6A03ECFECCDB}.Debug|Any CPU.Build.0 = Debug|Any CPU {EFE88D45-1EFD-4E3A-879F-6A03ECFECCDB}.Release|Any CPU.ActiveCfg = Release|Any CPU {EFE88D45-1EFD-4E3A-879F-6A03ECFECCDB}.Release|Any CPU.Build.0 = Release|Any CPU + {09B69A2A-6F91-4CA6-8629-87BE08E26149}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {09B69A2A-6F91-4CA6-8629-87BE08E26149}.Debug|Any CPU.Build.0 = Debug|Any CPU + {09B69A2A-6F91-4CA6-8629-87BE08E26149}.Release|Any CPU.ActiveCfg = Release|Any CPU + {09B69A2A-6F91-4CA6-8629-87BE08E26149}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE