From 31453eff7bccffbf75b2f2f9f290bb261cfd1d1f Mon Sep 17 00:00:00 2001 From: David Li Date: Thu, 6 Oct 2022 04:39:47 +0000 Subject: [PATCH] feat: closes #34, fixes ffmpeg usage --- .coveragerc | 19 +++++++++++++++- .github/workflows/transcribe_video.yml | 1 + processing.py | 31 ++++++++++++++++++++++++-- transcript_manager.py | 6 ++--- 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/.coveragerc b/.coveragerc index 8e829eb..2564848 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,19 @@ [run] -omit = tests \ No newline at end of file +omit = tests + +# Regexes for lines to exclude from consideration +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + + # Don't complain about missing debug-only code: + def __repr__ + if self\.debug + + # Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + + # Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: \ No newline at end of file diff --git a/.github/workflows/transcribe_video.yml b/.github/workflows/transcribe_video.yml index d9ec80d..8c290ae 100644 --- a/.github/workflows/transcribe_video.yml +++ b/.github/workflows/transcribe_video.yml @@ -67,6 +67,7 @@ jobs: run: | echo "ITERATION=${{ github.event.inputs.iteration }}" >> $GITHUB_ENV echo "YOUTUBE_URL=${{ github.event.inputs.youtube_url}}" >> $GITHUB_ENV + echo ${{github.event.inputs.youtube_url}} - name: set table name if available if: "${{ github.event.inputs.table_name != '' }}" diff --git a/processing.py b/processing.py index 61d9a6f..18ca404 100644 --- a/processing.py +++ b/processing.py @@ -25,7 +25,7 @@ def get_video_length(video_path: str): return None -def get_video_from_start(url: str, config: dict): +def get_video_from_start_legacy(url: str, config: dict): """ Get video from start time. """ @@ -50,9 +50,22 @@ def get_video_from_start(url: str, config: dict): ic(result) return result.stdout.decode("utf-8") +def get_video_from_start(url: str, config: dict): + """ + Get video from start time. + """ + filename = config.get("filename", "livestream01.mp4") + end = config.get("end", "00:00:10") + ( + ffmpeg + .input(url, t=end) + .output(filename) + .run() + ) + # wit ai process integration -def convert_mp4_to_mp3(filename: str): +def convert_mp4_to_mp3_legacy(filename: str): """ Convert mp4 to mp3 using ffmpeg """ @@ -65,6 +78,18 @@ def convert_mp4_to_mp3(filename: str): ic(result) return result +def convert_to_mp4_to_mp3(filename: str): + """ + Convert mp4 to mp3 using ffmpeg + """ + ic("Converting mp4 to mp3") + mp4_filename = filename.replace(".mp4", ".mp3") + ( + ffmpeg + .input(filename, vn=None) + .output(mp4_filename) + .run() + ) # parse all the partial json responses and attempt to find the last one @@ -230,6 +255,8 @@ def split_vid_into_chunks(filename: str, is_livestream: bool = False, chunk_size ic("No chunks to process for video") # convert_mp4_to_mp3(filename) else: + # is this even needed for whispers as the library may be able todo it + # TODO figure this out later convert_mp4_to_mp3(filename) yield filename t2_start = time.perf_counter() diff --git a/transcript_manager.py b/transcript_manager.py index e416919..c70f525 100644 --- a/transcript_manager.py +++ b/transcript_manager.py @@ -296,9 +296,9 @@ def main(params: dict): args = parser.parse_args() # ensure WIT_AI_TOKEN is set ic("Running main") - if os.environ.get("WIT_AI_TOKEN") is None: - print("WIT_AI_TOKEN is not set") - exit(1) + # if os.environ.get("WIT_AI_TOKEN") is None: + # print("WIT_AI_TOKEN is not set") + # exit(1) dict_args = { "url": args.url, "exit_on_video": args.exit_for_videos,