Skip to content

Commit

Permalink
Merge branch 'main' into docs-to-mkdocs-material
Browse files Browse the repository at this point in the history
  • Loading branch information
audreyfeldroy authored Oct 24, 2023
2 parents 1516422 + 9a3d389 commit b5427a6
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 97 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/update_contributors.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
on:
push:
branches:
- main

jobs:
contrib-readme-job:
runs-on: ubuntu-latest
name: A job to automate contrib in readme
steps:
- name: Contribute List
uses: akhilmhdh/[email protected]
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
lint:
black .
ruff check . --fix
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ First install the project using the installation instructions in docs/source/get
```
hist --help
```

## Contributors

<!-- readme: contributors -start -->
<!-- readme: contributors -end -->
53 changes: 29 additions & 24 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ dependencies = [
dev = [
"black", # code auto-formatting
"coverage==7.3.2", # testing
"isort", # code auto-formatting
"mypy", # linting
"pytest", # testing
"ruff==0.0.292", # linting
Expand All @@ -59,30 +58,36 @@ package-dir = {"" = "src"}
[project.scripts]
hist = "interviewkit.cli:app"


# Isort
# -----

[tool.isort]
line_length = 99
profile = "black"
default_section = "THIRDPARTY"
lines_after_imports = 2


# Mypy
# Ruff
# ----

[tool.mypy]
files = "."
[tool.ruff]
select = [
"E", # pycodestyle
"F", # pyflakes
"I", # isort
]
ignore = [
"E501", # line too long - black takes care of this for us
]

# Use strict defaults
strict = true
warn_unreachable = true
warn_no_return = true
[tool.ruff.per-file-ignores]
# Allow unused imports in __init__ files, as these are convenience imports
"**/__init__.py" = [ "F401" ]

[tool.ruff.isort]
lines-after-imports = 2
section-order = [
"future",
"standard-library",
"third-party",
"first-party",
"project",
"local-folder",
]

[[tool.mypy.overrides]]
# Don't require test functions to include types
module = "tests.*"
allow_untyped_defs = true
disable_error_code = "attr-defined"
[tool.ruff.isort.sections]
"project" = [
"src",
"tests",
]
24 changes: 21 additions & 3 deletions src/interviewkit/cli.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import sys
import typer

from pathlib import Path
from typing_extensions import Annotated

import typer
from slicer import audio_slicing
from transcript import transcribe_from_paths
from typing_extensions import Annotated


__version__ = '0.0.1'
Expand Down Expand Up @@ -41,6 +40,25 @@ def slice(
"""Slices an audio file into smaller audio files."""
audio_slicing(source, start, duration)

@app.command()
def generate_questions(source: Annotated[
Path,
typer.Argument(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
help="Source transcript file",
),
],
target: Path
):
"""Generates questions from a transcript."""

questions = generate_questions_from_transcript(source.read_text())
target.write_text(questions)


@app.command()
def transcribe(
Expand Down
4 changes: 2 additions & 2 deletions src/interviewkit/interview.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from random import choice
from interviewee import Interviewee
from enum import StrEnum, auto

from interviewee import Interviewee
from transcript import Transcript


Expand Down
50 changes: 50 additions & 0 deletions src/interviewkit/questions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2

# # Securely get your credentials
# TODO: Pass in arguments or use env vars
CLARIFAI_PAT = ''
# Specify the correct user_id/app_id pairings
# Since you're making inferences outside your app's scope
CLARIFAI_USER_ID = 'meta'
CLARIFAI_APP_ID = 'Llama-2'
# Change these to whatever model and text URL you want to use
CLARIFAI_MODEL_ID = 'llama2-70b-chat'
CLARIFAI_MODEL_VERSION_ID = 'acba9c1995f8462390d7cb77d482810b'


def generate_questions_from_transcript(transcript: str):

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + CLARIFAI_PAT),)
userDataObject = resources_pb2.UserAppIDSet(
user_id=CLARIFAI_USER_ID, app_id=CLARIFAI_APP_ID)

post_model_outputs_response = stub.PostModelOutputs(
service_pb2.PostModelOutputsRequest(
user_app_id=userDataObject,
model_id=CLARIFAI_MODEL_ID,
version_id=CLARIFAI_MODEL_VERSION_ID,
inputs=[
resources_pb2.Input(
data=resources_pb2.Data(
text=resources_pb2.Text(
raw=transcript
)
)
)
]
),
metadata=metadata
)

if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
print(post_model_outputs_response.status)
status = post_model_outputs_response.status.description
raise Exception(f"Post model outputs failed, status: {status}")

output = post_model_outputs_response.outputs[0]
return output.data.text.raw
3 changes: 2 additions & 1 deletion src/interviewkit/slicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
python interviewkit/slicer.py data/Martine+Barrat_FINAL.mp3 80:30 90:40
"""
import shutil
import sys
from pathlib import Path
import shutil


try:
import pydub
Expand Down
64 changes: 0 additions & 64 deletions src/interviewkit/transcript.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from pathlib import Path
from rich.console import Console
import sys
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2

try:
import whisper
Expand All @@ -20,53 +17,6 @@ class Transcript(BaseModel):
"""The Transcript entity represents the transcript of an interview."""
content: str

# # Securely get your credentials
# PAT = os.getenv('CLARIFAI_PAT')
# USER_ID = os.getenv('CLARIFAI_USER_ID')
# APP_ID = os.getenv('CLARIFAI_APP_ID')
# MODEL_ID = os.getenv('CLARIFAI_MODEL_ID')
# MODEL_VERSION_ID = os.getenv('CLARIFAI_MODEL_VERSION_ID')
PAT = ''
# Specify the correct user_id/app_id pairings
# Since you're making inferences outside your app's scope
USER_ID = 'meta'
APP_ID = 'Llama-2'
# Change these to whatever model and text URL you want to use
MODEL_ID = 'llama2-70b-chat'
MODEL_VERSION_ID = 'acba9c1995f8462390d7cb77d482810b'

def generate_questions(transcript_chunk):
channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + PAT),)
userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID)

post_model_outputs_response = stub.PostModelOutputs(
service_pb2.PostModelOutputsRequest(
user_app_id=userDataObject,
model_id=MODEL_ID,
version_id=MODEL_VERSION_ID,
inputs=[
resources_pb2.Input(
data=resources_pb2.Data(
text=resources_pb2.Text(
raw=transcript_chunk
)
)
)
]
),
metadata=metadata
)

if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
print(post_model_outputs_response.status)
raise Exception(f"Post model outputs failed, status: {post_model_outputs_response.status.description}")

output = post_model_outputs_response.outputs[0]
return output.data.text.raw

def transcribe_from_paths(source: Path, target: Path) -> None:
console.print("Loading whisper base model...")
model = whisper.load_model("base")
Expand All @@ -89,20 +39,6 @@ def transcribe_from_paths(source: Path, target: Path) -> None:
console.print("Transcript saved to:")
console.print(f" [green bold]{target / source.name}.txt[/green bold]")

# Generate questions from the transcript
transcript_chunk = result['text'] # Assuming 'result' contains the transcribed text
# Debug: Print type and value of transcript_chunk
print(f"Type of transcript_chunk: {type(transcript_chunk)}")
print(f"Value of transcript_chunk: {transcript_chunk}")

# Ensure transcript_chunk is a string
if not isinstance(transcript_chunk, str):
print("Warning: transcript_chunk is not a string. Trying to convert...")
transcript_chunk = str(transcript_chunk)

questions = generate_questions(transcript_chunk)
console.print("Generated Questions:\n", questions)

if __name__ == "__main__":
source = Path(sys.argv[1])
target = Path(sys.argv[2])
Expand Down
9 changes: 6 additions & 3 deletions src/interviewkit/transcript_using_m5.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import sys
from pathlib import Path

from rich.console import Console
import sys
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import T5ForConditionalGeneration, T5Tokenizer


try:
import whisper
except ImportError:
print("Please install Whisper: pip install openai-whisper")
exit(1)

from whisper.utils import get_writer
from pydantic import BaseModel
from whisper.utils import get_writer


console = Console()

Expand Down

0 comments on commit b5427a6

Please sign in to comment.