diff --git a/.eslintrc b/.eslintrc index cb5c21b085..19620e2f43 100644 --- a/.eslintrc +++ b/.eslintrc @@ -11,6 +11,7 @@ }, "plugins": ["@typescript-eslint"], "rules": { - "semi": [2, "never"] + "semi": [2, "never"], + "@typescript-eslint/no-unnecessary-type-assertion": [0, "never"] } } diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 647e2c5028..57905dfba7 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -30,3 +30,4 @@ jobs: run: | docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/content-fetch:latest docker push ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA} + diff --git a/.github/workflows/build-self-host-docker-images.yml b/.github/workflows/build-self-host-docker-images.yml new file mode 100644 index 0000000000..0d3bc5b170 --- /dev/null +++ b/.github/workflows/build-self-host-docker-images.yml @@ -0,0 +1,63 @@ +name: Build Self-Hosting Docker Images +on: + # For testing purposes. + pull_request: + branches: + - main + push: + branches: + - main + paths-ignore: + - 'apple/**' + - 'android/**' + +jobs: + build-self-hostdocker-images: + name: Build self-host docker images + permissions: + contents: read + packages: write + attestations: write + id-token: write + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: 'Login to GitHub container registry' + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{github.actor}} + password: ${{secrets.GITHUB_TOKEN}} + - name: Build the backend docker image + run: | + docker build . --file packages/api/Dockerfile --tag "ghcr.io/omnivore-app/sh-backend:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-backend:latest + docker push ghcr.io/omnivore-app/sh-backend:${GITHUB_SHA} + - name: Build the content-fetch docker image + run: | + docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/sh-content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-content-fetch:latest + docker push ghcr.io/omnivore-app/sh-content-fetch:${GITHUB_SHA} + - name: Build the queue-processor docker image. + run: | + docker build . --file packages/api/queue-processor/Dockerfile --tag "ghcr.io/omnivore-app/sh-queue-processor:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-queue-processor:latest + docker push ghcr.io/omnivore-app/sh-queue-processor:${GITHUB_SHA} + - name: Build the web docker image + run: | + docker build --file packages/web/Dockerfile . --tag "ghcr.io/omnivore-app/sh-web:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-web:latest + docker push ghcr.io/omnivore-app/sh-web:${GITHUB_SHA} + - name: Build the migrate docker image + run: | + docker build --file packages/db/Dockerfile . --tag "ghcr.io/omnivore-app/sh-migrate:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-migrate:latest + docker push ghcr.io/omnivore-app/sh-migrate:${GITHUB_SHA} + - name: Build the image-proxy docker image + run: | + docker build --file imageproxy/Dockerfile . --tag "ghcr.io/omnivore-app/sh-image-proxy:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-image-proxy:latest + docker push ghcr.io/omnivore-app/sh-image-proxy:${GITHUB_SHA} + - name: Build the mail-watch-server docker image + run: | + docker build --file packages/local-mail-watcher/Dockerfile . --tag "ghcr.io/omnivore-app/sh-local-mail-watcher:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-local-mail-watcher:latest + docker push ghcr.io/omnivore-app/sh-local-mail-watcher:${GITHUB_SHA} + + diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index de40ff68e0..a07ff8f209 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -57,7 +57,7 @@ jobs: - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v2 with: - node-version: 18.16 + node-version: 22.12.0 - name: Get yarn cache directory path id: yarn-cache-dir-path run: echo "::set-output name=dir::$(source ~/.nvm/nvm.sh && yarn cache dir)" diff --git a/.node-version b/.node-version index b492b08635..1d9b7831ba 100644 --- a/.node-version +++ b/.node-version @@ -1 +1 @@ -18.16 +22.12.0 diff --git a/README.md b/README.md index 3a239b6498..1fa9f08bfc 100644 --- a/README.md +++ b/README.md @@ -151,24 +151,7 @@ is done fetching your content you will see it in your library. ## How to deploy to your own server -Omnivore was originally designed to be deployed on GCP and takes advantage -of some of GCP's PaaS features. We are working to make Omnivore more portable -so you can easily run the service on your own infrastructure. You can track -progress here: - -To deploy Omnivore on your own hardware you will need to deploy three -dockerized services and configure access to a postgres service. To handle -PDF documents you will need to configure access to a Google Cloud Storage -bucket. - -- `packages/api` - the backend API service -- `packages/web` - the web frontend (can easily be deployed to vercel) -- `packages/puppeteer-parse` - the content fetching service (can easily - be deployed as an AWS lambda or GCP Cloud Function) - -Additionally, you will need to run our database migrations to initialize -your database. These are dockerized and can be run with the -`packages/db` service. +A guide for running a self hosted server can be found [here](./self-hosting/GUIDE.md) ## License diff --git a/android/Omnivore/README.md b/android/Omnivore/README.md index cd631fb4fe..7f60a883fc 100644 --- a/android/Omnivore/README.md +++ b/android/Omnivore/README.md @@ -1,5 +1,12 @@ # Omnivore - Android +## Setup with gradle + +In case you do not have Android Studio and you do not want to install it, you may want to use gradlew scripts to build the application. + +`./gradlew assembleDebug` should create `./app/build/outputs/apk/debug/app-debug.apk` + + ## Setup From the root directory run the following command: diff --git a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/editinfo/EditInfoViewModel.kt b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/editinfo/EditInfoViewModel.kt index bcd64b9fce..08c7d0db97 100644 --- a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/editinfo/EditInfoViewModel.kt +++ b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/editinfo/EditInfoViewModel.kt @@ -10,6 +10,7 @@ import app.omnivore.omnivore.R import app.omnivore.omnivore.core.data.DataService import app.omnivore.omnivore.core.datastore.DatastoreRepository import app.omnivore.omnivore.core.datastore.omnivoreAuthToken +import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer import app.omnivore.omnivore.graphql.generated.UpdatePageMutation import app.omnivore.omnivore.graphql.generated.type.UpdatePageInput import app.omnivore.omnivore.utils.Constants @@ -48,6 +49,12 @@ class EditInfoViewModel @Inject constructor( datastoreRepo.getString(omnivoreAuthToken) } + fun baseUrl() = runBlocking { + datastoreRepo.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL + } + + private fun serverUrl() = "${baseUrl()}/api/graphql" + fun editInfo(itemId: String, title: String, author: String?, description: String?) { viewModelScope.launch { isLoading = true @@ -62,7 +69,7 @@ class EditInfoViewModel @Inject constructor( } val apolloClient = ApolloClient.Builder() - .serverUrl("${Constants.apiURL}/api/graphql") + .serverUrl(serverUrl()) .addHttpHeader("Authorization", value = authToken) .build() diff --git a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/onboarding/OnboardingViewModel.kt b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/onboarding/OnboardingViewModel.kt index 5887807ffb..939db54462 100644 --- a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/onboarding/OnboardingViewModel.kt +++ b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/onboarding/OnboardingViewModel.kt @@ -176,6 +176,12 @@ class OnboardingViewModel @Inject constructor( resetPendingEmailUserCreds() } + fun baseUrl() = runBlocking { + datastoreRepository.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL + } + + private fun serverUrl() = "${baseUrl()}/api/graphql" + fun validateUsername(potentialUsername: String) { validateUsernameJob?.cancel() @@ -209,7 +215,7 @@ class OnboardingViewModel @Inject constructor( } val apolloClient = - ApolloClient.Builder().serverUrl("${Constants.apiURL}/api/graphql").build() + ApolloClient.Builder().serverUrl(serverUrl()).build() try { val response = apolloClient.query( diff --git a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveURLWorker.kt b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveURLWorker.kt index fbf869e58e..25d077820c 100644 --- a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveURLWorker.kt +++ b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveURLWorker.kt @@ -16,6 +16,7 @@ import androidx.work.WorkerParameters import app.omnivore.omnivore.R import app.omnivore.omnivore.core.datastore.DatastoreRepository import app.omnivore.omnivore.core.datastore.omnivoreAuthToken +import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer import app.omnivore.omnivore.graphql.generated.SaveUrlMutation import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput import app.omnivore.omnivore.utils.Constants @@ -59,11 +60,16 @@ class SaveURLWorker @AssistedInject constructor( } } + suspend fun baseUrl() = + datastoreRepository.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL + + private suspend fun serverUrl() = "${baseUrl()}/api/graphql" + private suspend fun saveURL(url: String): Boolean { val authToken = datastoreRepository.getString(omnivoreAuthToken) ?: return false val apolloClient = ApolloClient.Builder() - .serverUrl("${Constants.apiURL}/api/graphql") + .serverUrl(serverUrl()) .addHttpHeader("Authorization", value = authToken) .build() diff --git a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveViewModel.kt b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveViewModel.kt index b708c8b047..2bbbb8a3db 100644 --- a/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveViewModel.kt +++ b/android/Omnivore/app/src/main/java/app/omnivore/omnivore/feature/save/SaveViewModel.kt @@ -11,6 +11,7 @@ import androidx.lifecycle.viewModelScope import app.omnivore.omnivore.R import app.omnivore.omnivore.core.datastore.DatastoreRepository import app.omnivore.omnivore.core.datastore.omnivoreAuthToken +import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer import app.omnivore.omnivore.graphql.generated.SaveUrlMutation import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput import app.omnivore.omnivore.utils.Constants @@ -71,6 +72,12 @@ class SaveViewModel @Inject constructor( return null } + fun baseUrl() = runBlocking { + datastoreRepo.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL + } + + private fun serverUrl() = "${baseUrl()}/api/graphql" + fun saveURL(url: String) { viewModelScope.launch { isLoading = true @@ -86,7 +93,7 @@ class SaveViewModel @Inject constructor( } val apolloClient = ApolloClient.Builder() - .serverUrl("${Constants.apiURL}/api/graphql") + .serverUrl(serverUrl()) .addHttpHeader("Authorization", value = authToken) .build() diff --git a/docs/guides/images/cloudflare-tunnel.png b/docs/guides/images/cloudflare-tunnel.png new file mode 100644 index 0000000000..41e85097de Binary files /dev/null and b/docs/guides/images/cloudflare-tunnel.png differ diff --git a/docs/guides/images/create-new-email.png b/docs/guides/images/create-new-email.png new file mode 100644 index 0000000000..e625a2182f Binary files /dev/null and b/docs/guides/images/create-new-email.png differ diff --git a/docs/guides/images/imported-email.png b/docs/guides/images/imported-email.png new file mode 100644 index 0000000000..4663fab56a Binary files /dev/null and b/docs/guides/images/imported-email.png differ diff --git a/docs/guides/images/received-email.png b/docs/guides/images/received-email.png new file mode 100644 index 0000000000..f7db471ac4 Binary files /dev/null and b/docs/guides/images/received-email.png differ diff --git a/docs/guides/images/ses-add-domain.png b/docs/guides/images/ses-add-domain.png new file mode 100644 index 0000000000..37465f4140 Binary files /dev/null and b/docs/guides/images/ses-add-domain.png differ diff --git a/docs/guides/images/ses-verify.png b/docs/guides/images/ses-verify.png new file mode 100644 index 0000000000..b8c6e54f70 Binary files /dev/null and b/docs/guides/images/ses-verify.png differ diff --git a/docs/guides/images/sns-add-action-publish.png b/docs/guides/images/sns-add-action-publish.png new file mode 100644 index 0000000000..f3dd66e357 Binary files /dev/null and b/docs/guides/images/sns-add-action-publish.png differ diff --git a/docs/guides/images/sns-add-actions-sns-menu.png b/docs/guides/images/sns-add-actions-sns-menu.png new file mode 100644 index 0000000000..97fda735e7 Binary files /dev/null and b/docs/guides/images/sns-add-actions-sns-menu.png differ diff --git a/docs/guides/images/sns-create-identity.png b/docs/guides/images/sns-create-identity.png new file mode 100644 index 0000000000..cc0e91a95a Binary files /dev/null and b/docs/guides/images/sns-create-identity.png differ diff --git a/docs/guides/images/sns-create-ruleset.png b/docs/guides/images/sns-create-ruleset.png new file mode 100644 index 0000000000..2018f748b5 Binary files /dev/null and b/docs/guides/images/sns-create-ruleset.png differ diff --git a/docs/guides/images/sns-create-subscription.png b/docs/guides/images/sns-create-subscription.png new file mode 100644 index 0000000000..9521c62d04 Binary files /dev/null and b/docs/guides/images/sns-create-subscription.png differ diff --git a/docs/guides/images/sns-create-topic.png b/docs/guides/images/sns-create-topic.png new file mode 100644 index 0000000000..da557d0c1e Binary files /dev/null and b/docs/guides/images/sns-create-topic.png differ diff --git a/docs/guides/images/sns-define-incoming-rule.png b/docs/guides/images/sns-define-incoming-rule.png new file mode 100644 index 0000000000..613d0efad7 Binary files /dev/null and b/docs/guides/images/sns-define-incoming-rule.png differ diff --git a/docs/guides/images/sns-publish-menu.png b/docs/guides/images/sns-publish-menu.png new file mode 100644 index 0000000000..3b33145646 Binary files /dev/null and b/docs/guides/images/sns-publish-menu.png differ diff --git a/docs/guides/images/sns-topic-menu.png b/docs/guides/images/sns-topic-menu.png new file mode 100644 index 0000000000..1b90b71c7a Binary files /dev/null and b/docs/guides/images/sns-topic-menu.png differ diff --git a/docs/guides/images/testing-incoming-email.png b/docs/guides/images/testing-incoming-email.png new file mode 100644 index 0000000000..44152dd31e Binary files /dev/null and b/docs/guides/images/testing-incoming-email.png differ diff --git a/docs/guides/images/zapier-email-webhook.png b/docs/guides/images/zapier-email-webhook.png new file mode 100644 index 0000000000..4c4dadd6c1 Binary files /dev/null and b/docs/guides/images/zapier-email-webhook.png differ diff --git a/docs/guides/images/zapier-javascript-step.png b/docs/guides/images/zapier-javascript-step.png new file mode 100644 index 0000000000..2738b5bc6c Binary files /dev/null and b/docs/guides/images/zapier-javascript-step.png differ diff --git a/docs/guides/images/zapier-webhook-step.png b/docs/guides/images/zapier-webhook-step.png new file mode 100644 index 0000000000..9f4b27ce82 Binary files /dev/null and b/docs/guides/images/zapier-webhook-step.png differ diff --git a/imageproxy/Dockerfile b/imageproxy/Dockerfile index 57575184a6..ec0f320a0c 100644 --- a/imageproxy/Dockerfile +++ b/imageproxy/Dockerfile @@ -1,4 +1,4 @@ -FROM willnorris/imageproxy:v0.10.0 as build +FROM ghcr.io/willnorris/imageproxy:main as build # Above imageproxy image is built from scratch image and is barebones # Switching over to ubuntu base image to allow us to debug better. diff --git a/packages/api/.eslintrc b/packages/api/.eslintrc index bc325835ae..2344556f2e 100644 --- a/packages/api/.eslintrc +++ b/packages/api/.eslintrc @@ -10,7 +10,8 @@ { "files": ["test/**/*.ts"], "rules": { - "@typescript-eslint/no-unsafe-member-access": 0 + "@typescript-eslint/no-unsafe-member-access": 0, + "@typescript-eslint/no-unnecessary-type-assertion": 0 } } ] diff --git a/packages/api/Dockerfile b/packages/api/Dockerfile index 175362ea5b..b20ffb5d05 100644 --- a/packages/api/Dockerfile +++ b/packages/api/Dockerfile @@ -1,51 +1,36 @@ -FROM node:18.16 as builder +FROM node:22.12 AS builder WORKDIR /app -ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true -RUN apt-get update && apt-get install -y g++ make python3 +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true -COPY package.json . -COPY yarn.lock . -COPY tsconfig.json . -COPY .prettierrc . -COPY .eslintrc . +RUN apt-get update && apt-get install -y g++ make python3 && apt-get clean && rm -rf /var/lib/apt/lists/* -COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json -COPY /packages/api/package.json ./packages/api/package.json -COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json -COPY /packages/content-handler/package.json ./packages/content-handler/package.json -COPY /packages/liqe/package.json ./packages/liqe/package.json -COPY /packages/utils/package.json ./packages/utils/package.json +COPY package.json yarn.lock tsconfig.json .prettierrc .eslintrc ./ +COPY packages ./packages -RUN yarn install --pure-lockfile +# Remove all except needed packages +RUN find packages -mindepth 1 -type d \ + ! -regex '^packages/\(api\|readabilityjs\|text-to-speech\|content-handler\|liqe\|utils\)\(/.*\)?' \ + -exec rm -rf {} + -ADD /packages/readabilityjs ./packages/readabilityjs -ADD /packages/api ./packages/api -ADD /packages/text-to-speech ./packages/text-to-speech -ADD /packages/content-handler ./packages/content-handler -ADD /packages/liqe ./packages/liqe -ADD /packages/utils ./packages/utils +RUN yarn install --pure-lockfile && \ + yarn workspace @omnivore/utils build && \ + yarn workspace @omnivore/text-to-speech-handler build && \ + yarn workspace @omnivore/content-handler build && \ + yarn workspace @omnivore/liqe build && \ + yarn workspace @omnivore/api build && \ + rm -rf /app/packages/api/node_modules /app/node_modules && \ + yarn install --pure-lockfile --production -RUN yarn workspace @omnivore/utils build -RUN yarn workspace @omnivore/text-to-speech-handler build -RUN yarn workspace @omnivore/content-handler build -RUN yarn workspace @omnivore/liqe build -RUN yarn workspace @omnivore/api build - -# After building, fetch the production dependencies -RUN rm -rf /app/packages/api/node_modules -RUN rm -rf /app/node_modules -RUN yarn install --pure-lockfile --production - -FROM node:18.16 as runner +FROM node:22.12-alpine AS runner LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" -RUN apt-get update && apt-get install -y netcat-openbsd +RUN apk update && apk add netcat-openbsd && rm -rf /var/cache/apk/* WORKDIR /app -ENV NODE_ENV production +ENV NODE_ENV=production ENV NODE_OPTIONS=--max-old-space-size=4096 ENV PORT=8080 @@ -59,6 +44,7 @@ COPY --from=builder /app/packages/text-to-speech/ /app/packages/text-to-speech/ COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/ COPY --from=builder /app/packages/liqe/ /app/packages/liqe/ COPY --from=builder /app/packages/utils/ /app/packages/utils/ + EXPOSE 8080 CMD ["yarn", "workspace", "@omnivore/api", "start"] diff --git a/packages/api/Dockerfile-test b/packages/api/Dockerfile-test index 72aa2f4334..4d6eebff39 100644 --- a/packages/api/Dockerfile-test +++ b/packages/api/Dockerfile-test @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine WORKDIR /app diff --git a/packages/api/package.json b/packages/api/package.json index 96d3056300..814bb3fbb9 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -120,7 +120,10 @@ "voca": "^1.4.0", "winston": "^3.3.3", "yaml": "^2.4.1", - "youtubei": "^1.5.4" + "youtubei": "^1.5.4", + "@aws-sdk/client-s3": "^3.679.0", + "@aws-sdk/s3-request-presigner": "^3.679.0", + "@aws-sdk/lib-storage": "^3.679.0" }, "devDependencies": { "@istanbuljs/nyc-config-typescript": "^1.0.2", @@ -171,9 +174,9 @@ "ts-node-dev": "^1.1.8" }, "engines": { - "node": "18.16.1" + "node": "22.12.0" }, "volta": { "extends": "../../package.json" } -} \ No newline at end of file +} diff --git a/packages/api/queue-processor/Dockerfile b/packages/api/queue-processor/Dockerfile new file mode 100644 index 0000000000..81743e92fa --- /dev/null +++ b/packages/api/queue-processor/Dockerfile @@ -0,0 +1,61 @@ +FROM node:22.12 as builder + +WORKDIR /app + +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true +RUN apt-get update && apt-get install -y g++ make python3 + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json +COPY /packages/api/package.json ./packages/api/package.json +COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json +COPY /packages/content-handler/package.json ./packages/content-handler/package.json +COPY /packages/liqe/package.json ./packages/liqe/package.json +COPY /packages/utils/package.json ./packages/utils/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/readabilityjs ./packages/readabilityjs +ADD /packages/api ./packages/api +ADD /packages/text-to-speech ./packages/text-to-speech +ADD /packages/content-handler ./packages/content-handler +ADD /packages/liqe ./packages/liqe +ADD /packages/utils ./packages/utils + +RUN yarn workspace @omnivore/utils build +RUN yarn workspace @omnivore/text-to-speech-handler build +RUN yarn workspace @omnivore/content-handler build +RUN yarn workspace @omnivore/liqe build +RUN yarn workspace @omnivore/api build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/api/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +FROM node:22.12 as runner +LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" + +RUN apt-get update && apt-get install -y netcat-openbsd + +WORKDIR /app + +ENV NODE_ENV production + +COPY --from=builder /app/packages/api/dist /app/packages/api/dist +COPY --from=builder /app/packages/readabilityjs/ /app/packages/readabilityjs/ +COPY --from=builder /app/packages/api/package.json /app/packages/api/package.json +COPY --from=builder /app/packages/api/node_modules /app/packages/api/node_modules +COPY --from=builder /app/node_modules /app/node_modules +COPY --from=builder /app/package.json /app/package.json +COPY --from=builder /app/packages/text-to-speech/ /app/packages/text-to-speech/ +COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/ +COPY --from=builder /app/packages/liqe/ /app/packages/liqe/ +COPY --from=builder /app/packages/utils/ /app/packages/utils/ + +CMD ["yarn", "workspace", "@omnivore/api", "start_queue_processor"] diff --git a/packages/api/src/data_source.ts b/packages/api/src/data_source.ts index cb3ab45496..1a06fa0d90 100644 --- a/packages/api/src/data_source.ts +++ b/packages/api/src/data_source.ts @@ -23,6 +23,7 @@ export const appDataSource = new DataSource({ max: env.pg.pool.max, idleTimeoutMillis: 10000, // 10 seconds }, + replication: env.pg.replication ? { master: { @@ -42,5 +43,15 @@ export const appDataSource = new DataSource({ }, ], } - : undefined, + : { + defaultMode: 'master', + master: { + host: env.pg.host, + port: env.pg.port, + username: env.pg.userName, + password: env.pg.password, + database: env.pg.dbName, + }, + slaves: [], + }, }) diff --git a/packages/api/src/export-processor.ts b/packages/api/src/export-processor.ts index 92b712cd1e..aad66f4fa6 100644 --- a/packages/api/src/export-processor.ts +++ b/packages/api/src/export-processor.ts @@ -73,7 +73,8 @@ import { CACHED_READING_POSITION_PREFIX } from './services/cached_reading_positi import { logger } from './utils/logger' import { getQueue } from './queue-processor' -export const EXPORT_QUEUE_NAME = 'omnivore-export-queue' +export const EXPORT_QUEUE_NAME = + process.env['EXPORT_QUEUE_NAME'] ?? 'omnivore-export-queue' export const createWorker = (connection: ConnectionOptions) => new Worker( diff --git a/packages/api/src/jobs/export.ts b/packages/api/src/jobs/export.ts index 2c6e02d8db..bbeeece561 100644 --- a/packages/api/src/jobs/export.ts +++ b/packages/api/src/jobs/export.ts @@ -1,10 +1,6 @@ import archiver, { Archiver } from 'archiver' import { v4 as uuidv4 } from 'uuid' -import { - ContentReaderType, - LibraryItem, - LibraryItemState, -} from '../entity/library_item' +import { LibraryItem, LibraryItemState } from '../entity/library_item' import { TaskState } from '../generated/graphql' import { findExportById, saveExport } from '../services/export' import { findHighlightsByLibraryItemId } from '../services/highlights' @@ -17,12 +13,11 @@ import { sendExportJobEmail } from '../services/send_emails' import { findActiveUser } from '../services/user' import { logger } from '../utils/logger' import { highlightToMarkdown } from '../utils/parser' -import { - contentFilePath, - createGCSFile, - generateUploadFilePathName, -} from '../utils/uploads' -import { batch } from 'googleapis/build/src/apis/batch' +import { env } from '../env' +import { storage } from '../repository/storage/storage' +import { File } from '../repository/storage/StorageClient' +import { Readable } from 'stream' +import { contentFilePath, generateUploadFilePathName } from '../utils/uploads' import { getRepository } from '../repository' import { UploadFile } from '../entity/upload_file' @@ -31,6 +26,12 @@ export interface ExportJobData { exportId: string } +const bucketName = env.fileUpload.gcsUploadBucket + +const createGCSFile = (filename: string): File => { + return storage.createFile(bucketName, filename) +} + export const EXPORT_JOB_NAME = 'export' const itemStateMappping = (state: LibraryItemState) => { @@ -61,7 +62,7 @@ const uploadContent = async ( const file = createGCSFile(filePath) // check if file is already uploaded - const [exists] = await file.exists() + const exists = await file.exists() if (!exists) { logger.info(`File not found: ${filePath}`) @@ -81,10 +82,14 @@ const uploadContent = async ( contentType: 'text/html', private: true, }) + archive.append(Readable.from(item.readableContent), { + name: `content/${libraryItem.slug}.html`, + }) } // append the existing file to the archive - archive.append(file.createReadStream(), { + const content = await file.download() + archive.append(Readable.from(content.toString()), { name: `content/${libraryItem.slug}.html`, }) } @@ -97,17 +102,19 @@ const uploadPdfContent = async ( id: libraryItem.uploadFileId, }) if (!upload || !upload.fileName) { - console.log(`upload does not have a filename: ${upload}`) + console.log( + `upload does not have a filename: ${upload?.fileName ?? 'empty'}` + ) return } const filePath = generateUploadFilePathName(upload.id, upload.fileName) const file = createGCSFile(filePath) - const [exists] = await file.exists() + const exists = await file.exists() if (exists) { console.log(`adding PDF file: ${filePath}`) // append the existing file to the archive - archive.append(file.createReadStream(), { + archive.append(await file.download(), { name: `content/${libraryItem.slug}.pdf`, }) } @@ -238,9 +245,18 @@ export const exportJob = async (jobData: ExportJobData) => { // Create a write stream const writeStream = file.createWriteStream({ - metadata: { - contentType: 'application/zip', - }, + contentType: 'application/zip', + }) + + const finishedPromise = new Promise((resolve, reject) => { + if (writeStream.closed) { + resolve() + } + writeStream.on('finish', () => { + logger.info('File successfully written to GCS') + resolve() + }) + writeStream.on('error', reject) }) // Handle any errors in the streams @@ -248,10 +264,6 @@ export const exportJob = async (jobData: ExportJobData) => { logger.error('Error writing to GCS:', err) }) - writeStream.on('finish', () => { - logger.info('File successfully written to GCS') - }) - // Initialize archiver for zipping files const archive = archiver('zip', { zlib: { level: 9 }, // Compression level @@ -264,7 +276,6 @@ export const exportJob = async (jobData: ExportJobData) => { // Pipe the archiver output to the write stream archive.pipe(writeStream) - let cursor = 0 try { // fetch data from the database @@ -305,17 +316,14 @@ export const exportJob = async (jobData: ExportJobData) => { } // Ensure that the writeStream has finished - await new Promise((resolve, reject) => { - writeStream.on('finish', resolve) - writeStream.on('error', reject) - }) + await finishedPromise logger.info(`export completed, exported ${cursor} items`, { userId, }) // generate a temporary signed url for the zip file - const [signedUrl] = await file.getSignedUrl({ + const signedUrl = await storage.signedUrl(bucketName, fullPath, { action: 'read', expires: Date.now() + 168 * 60 * 60 * 1000, // one week }) diff --git a/packages/api/src/jobs/process-youtube-video.ts b/packages/api/src/jobs/process-youtube-video.ts index 2d704b0e83..6870460b71 100644 --- a/packages/api/src/jobs/process-youtube-video.ts +++ b/packages/api/src/jobs/process-youtube-video.ts @@ -281,24 +281,32 @@ export const processYouTubeVideo = async ( updatedLibraryItem.publishedAt = new Date(video.uploadDate) } - // if ('getTranscript' in video && duration > 0 && duration < 1801) { - // // If the video has a transcript available, put a placehold in and - // // enqueue a job to process the full transcript - // const updatedContent = await addTranscriptToReadableContent( - // libraryItem.originalUrl, - // libraryItem.readableContent, - // TRANSCRIPT_PLACEHOLDER_TEXT - // ) - - // if (updatedContent) { - // updatedLibraryItem.readableContent = updatedContent - // } - - // await enqueueProcessYouTubeTranscript({ - // videoId, - // ...jobData, - // }) - // } + if ( + 'getTranscript' in video && + duration > 0 && + duration < + Number( + process.env['YOUTUBE_MAXIMUM_VIDEO_DURATION_TRANSCRIPT'] ?? 1801 + ) && + process.env['OPENAI_API_KEY'] + ) { + // If the video has a transcript available, put a placehold in and + // enqueue a job to process the full transcript + const updatedContent = await addTranscriptToReadableContent( + libraryItem.originalUrl, + libraryItem.readableContent, + TRANSCRIPT_PLACEHOLDER_TEXT + ) + + if (updatedContent) { + updatedLibraryItem.readableContent = updatedContent + } + + await enqueueProcessYouTubeTranscript({ + videoId, + ...jobData, + }) + } if (updatedLibraryItem !== {}) { await updateLibraryItem( diff --git a/packages/api/src/queue-processor.ts b/packages/api/src/queue-processor.ts index 62afd000a5..ad0eff8616 100644 --- a/packages/api/src/queue-processor.ts +++ b/packages/api/src/queue-processor.ts @@ -56,7 +56,10 @@ import { PROCESS_YOUTUBE_VIDEO_JOB_NAME, } from './jobs/process-youtube-video' import { pruneTrashJob, PRUNE_TRASH_JOB } from './jobs/prune_trash' -import { refreshAllFeeds } from './jobs/rss/refreshAllFeeds' +import { + REFRESH_ALL_FEEDS_JOB_NAME, + refreshAllFeeds, +} from './jobs/rss/refreshAllFeeds' import { refreshFeed } from './jobs/rss/refreshFeed' import { savePageJob } from './jobs/save_page' import { @@ -159,25 +162,25 @@ export const createWorker = (connection: ConnectionOptions) => async (job: Job) => { const executeJob = async (job: Job) => { switch (job.name) { - // case 'refresh-all-feeds': { - // const queue = await getQueue() - // const counts = await queue?.getJobCounts('prioritized') - // if (counts && counts.wait > 1000) { - // return - // } - // return await refreshAllFeeds(appDataSource) - // } - // case 'refresh-feed': { - // return await refreshFeed(job.data) - // } + case 'refresh-all-feeds': { + const queue = await getQueue() + const counts = await queue?.getJobCounts('prioritized') + if (counts && counts.wait > 1000) { + return + } + return await refreshAllFeeds(appDataSource) + } + case 'refresh-feed': { + return await refreshFeed(job.data) + } case 'save-page': { return savePageJob(job.data, job.attemptsMade) } - // case 'update-pdf-content': { - // return updatePDFContentJob(job.data) - // } - // case THUMBNAIL_JOB: - // return findThumbnail(job.data) + case 'update-pdf-content': { + return updatePDFContentJob(job.data) + } + case THUMBNAIL_JOB: + return findThumbnail(job.data) case TRIGGER_RULE_JOB_NAME: return triggerRule(job.data) case UPDATE_LABELS_JOB: @@ -194,10 +197,13 @@ export const createWorker = (connection: ConnectionOptions) => return exportItem(job.data) // case AI_SUMMARIZE_JOB_NAME: // return aiSummarize(job.data) - // case PROCESS_YOUTUBE_VIDEO_JOB_NAME: - // return processYouTubeVideo(job.data) - // case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME: - // return processYouTubeTranscript(job.data) + case PROCESS_YOUTUBE_VIDEO_JOB_NAME: + return processYouTubeVideo(job.data) + case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME: + if (process.env['OPENAI_API_KEY']) { + return processYouTubeTranscript(job.data) + } + break case EXPORT_ALL_ITEMS_JOB_NAME: return exportAllItems(job.data) case SEND_EMAIL_JOB: @@ -218,8 +224,8 @@ export const createWorker = (connection: ConnectionOptions) => // return updateHome(job.data) // case SCORE_LIBRARY_ITEM_JOB: // return scoreLibraryItem(job.data) - // case GENERATE_PREVIEW_CONTENT_JOB: - // return generatePreviewContent(job.data) + case GENERATE_PREVIEW_CONTENT_JOB: + return generatePreviewContent(job.data) case PRUNE_TRASH_JOB: return pruneTrashJob(job.data) case EXPIRE_FOLDERS_JOB_NAME: @@ -260,6 +266,17 @@ const setupCronJobs = async () => { }, } ) + + await queue.add( + REFRESH_ALL_FEEDS_JOB_NAME, + {}, + { + priority: getJobPriority(REFRESH_ALL_FEEDS_JOB_NAME), + repeat: { + every: 14_400_000, // 4 Hours + }, + } + ) } const main = async () => { diff --git a/packages/api/src/repository/storage/GcsStorageClient.ts b/packages/api/src/repository/storage/GcsStorageClient.ts new file mode 100644 index 0000000000..37d14daf16 --- /dev/null +++ b/packages/api/src/repository/storage/GcsStorageClient.ts @@ -0,0 +1,88 @@ +import { + SignedUrlParameters, + StorageClient, + File, + SaveOptions, + SaveData, +} from './StorageClient' +import { Storage, File as GCSFile } from '@google-cloud/storage' + +export class GcsStorageClient implements StorageClient { + private storage: Storage + + constructor(keyFilename: string | undefined) { + this.storage = new Storage({ + keyFilename, + }) + } + + private convertFileToGeneric(gcsFile: GCSFile): File { + return { + isPublic: async () => { + const [isPublic] = await gcsFile.isPublic() + return isPublic + }, + exists: async () => (await gcsFile.exists())[0], + download: async () => (await gcsFile.download())[0], + bucket: gcsFile.bucket.name, + publicUrl: () => gcsFile.publicUrl(), + getMetadataMd5: async () => { + const [metadata] = await gcsFile.getMetadata() + return metadata.md5Hash + }, + createWriteStream: (saveOptions: SaveOptions) => + gcsFile.createWriteStream({ + metadata: { contentType: saveOptions.contentType }, + }), + save: (saveData: SaveData, saveOptions: SaveOptions) => + gcsFile.save(saveData, saveOptions), + key: gcsFile.name, + } + } + + downloadFile(bucket: string, filePath: string): Promise { + const file = this.storage.bucket(bucket).file(filePath) + return Promise.resolve(this.convertFileToGeneric(file)) + } + + createFile(bucket: string, filePath: string): File { + return this.convertFileToGeneric(this.storage.bucket(bucket).file(filePath)) + } + + async getFilesFromPrefix(bucket: string, prefix: string): Promise { + const [filesWithPrefix] = await this.storage + .bucket(bucket) + .getFiles({ prefix }) + + return filesWithPrefix.map((it: GCSFile) => this.convertFileToGeneric(it)) + } + + async signedUrl( + bucket: string, + filePath: string, + options: SignedUrlParameters + ): Promise { + const [url] = await this.storage + .bucket(bucket) + .file(filePath) + .getSignedUrl({ ...options, version: 'v4' }) + + return url + } + + upload( + bucket: string, + filePath: string, + data: Buffer, + options: { + contentType?: string + public?: boolean + timeout?: number + } + ): Promise { + return this.storage + .bucket(bucket) + .file(filePath) + .save(data, { timeout: 30000, ...options }) + } +} diff --git a/packages/api/src/repository/storage/S3StorageClient.ts b/packages/api/src/repository/storage/S3StorageClient.ts new file mode 100644 index 0000000000..ef673123f7 --- /dev/null +++ b/packages/api/src/repository/storage/S3StorageClient.ts @@ -0,0 +1,261 @@ +import { + SignedUrlParameters, + StorageClient, + File, + SaveOptions, + SaveData, +} from './StorageClient' +import { Upload } from '@aws-sdk/lib-storage' +import { + GetObjectCommand, + GetObjectCommandOutput, + S3Client, + ListObjectsV2Command, + PutObjectCommand, + HeadObjectCommand, + S3ServiceException, +} from '@aws-sdk/client-s3' +import { getSignedUrl } from '@aws-sdk/s3-request-presigner' +import { Readable } from 'stream' +import * as stream from 'node:stream' + +// While this is listed as S3, for self hosting we will use MinIO, which is +// S3 Compatible. +export class S3StorageClient implements StorageClient { + BlankFile = class implements File { + bucket: string + key: string + s3Client: S3StorageClient + downloadedFile: File | undefined + + constructor(s3StorageClass: S3StorageClient, bucket: string, file: string) { + this.bucket = bucket + this.key = file + this.s3Client = s3StorageClass + } + + isPublic() { + return Promise.resolve(true) + } + + publicUrl() { + return `${this.s3Client.localUrl ?? ''}/${this.bucket}/${this.key}` + } + + async download(): Promise { + this.downloadedFile = await this.s3Client.downloadFile( + this.bucket, + this.key + ) + return this.downloadedFile.download() + } + + async exists() { + try { + await this.s3Client.s3Client.send( + new HeadObjectCommand({ + Bucket: this.bucket, + Key: this.key, + }) + ) + + return true + } catch (e) { + if ( + e instanceof S3ServiceException && + e.$metadata.httpStatusCode == 404 + ) { + return false + } + + throw e + } + } + + save(saveData: SaveData, saveOptions: SaveOptions): Promise { + return this.s3Client.upload(this.bucket, this.key, saveData, saveOptions) + } + + createWriteStream(saveOptions: SaveOptions) { + return this.s3Client.createS3UploadStream( + this.bucket, + this.key, + saveOptions + ) + } + + getMetadataMd5() { + return this.downloadedFile?.getMetadataMd5() || Promise.resolve('') + } + } + + private s3Client: S3Client + private signingS3Client: S3Client + + private urlOverride: string | undefined + private localUrl: string | undefined + + constructor(localUrl: string | undefined, urlOverride: string | undefined) { + this.localUrl = localUrl + this.urlOverride = urlOverride + this.s3Client = new S3Client({ + forcePathStyle: true, + endpoint: urlOverride, + }) + + this.signingS3Client = new S3Client({ + forcePathStyle: true, + endpoint: localUrl, + }) + } + + private createS3UploadStream = ( + bucket: string, + key: string, + saveOptions: SaveOptions + ) => { + const passThroughStream = new stream.PassThrough() + + const upload = new Upload({ + client: this.s3Client, + params: { + Bucket: bucket, + Key: key, + Body: passThroughStream, + ContentType: saveOptions.contentType, + }, + }) + + void upload.done().then((res) => { + console.log(`Successfully Uploaded File ${res.Key ?? ''}`) + }) + + return passThroughStream + } + + private convertFileToGeneric( + s3File: GetObjectCommandOutput, + bucket: string, + key: string + ): File { + return { + exists: () => { + return Promise.resolve(s3File.$metadata.httpStatusCode == 200) + }, + save: async () => Promise.resolve(), + isPublic: async () => Promise.resolve(true), + download: async () => this.getFileFromReadable(s3File.Body as Readable), + getMetadataMd5: () => Promise.resolve(s3File.ETag), + createWriteStream: (saveOptions: SaveOptions) => + this.createS3UploadStream(bucket, key, saveOptions), + publicUrl: () => `${this.localUrl ?? ''}/${bucket}/${key}`, + bucket, + key, + } + } + + private getFileFromReadable(stream: Readable): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + stream.on('data', (chunk) => chunks.push(chunk)) + stream.once('end', () => resolve(Buffer.concat(chunks))) + stream.once('error', reject) + }) + } + + async downloadFile(bucket: string, filePath: string): Promise { + const s3File = await this.s3Client.send( + new GetObjectCommand({ + Bucket: bucket, + Key: filePath, // path to the file you want to download, + }) + ) + + return this.convertFileToGeneric(s3File, bucket, filePath) + } + + createFile(bucket: string, filePath: string): File { + return new this.BlankFile(this, bucket, filePath) as unknown as File + } + + async getFilesFromPrefix(bucket: string, prefix: string): Promise { + const s3PrefixedFiles = await this.s3Client.send( + new ListObjectsV2Command({ + Bucket: bucket, + Prefix: prefix, // path to the file you want to download, + }) + ) + + const prefixKeys = s3PrefixedFiles.CommonPrefixes || [] + + return prefixKeys + .map(({ Prefix }) => Prefix) + .map((key: string | undefined) => { + return { + key: key || '', + exists: () => Promise.resolve(true), + isPublic: async () => Promise.resolve(true), + download: async () => { + const s3File = await this.s3Client.send( + new GetObjectCommand({ + Bucket: bucket, + Key: key, // path to the file you want to download, + }) + ) + + return this.getFileFromReadable(s3File.Body as Readable) + }, + save: () => Promise.resolve(), + createWriteStream: (saveOptions: SaveOptions) => + new stream.PassThrough(), + getMetadataMd5: () => Promise.resolve(key), + bucket: bucket, + publicUrl: () => `${this.localUrl ?? ''}/${bucket}/${key ?? ''}`, + } + }) + } + + async signedUrl( + bucket: string, + filePath: string, + options: SignedUrlParameters + ): Promise { + const command = + options.action == 'read' + ? new GetObjectCommand({ + Bucket: bucket, + Key: filePath, // path to the file you want to download, + }) + : new PutObjectCommand({ + Bucket: bucket, + Key: filePath, // path to the file you want to download, + }) + + // eslint-disable-next-line @typescript-eslint/no-unsafe-call + const url = await getSignedUrl(this.signingS3Client, command, { + expiresIn: 900, + }) + + return url + } + + async upload( + bucket: string, + filePath: string, + data: SaveData, + options: { + contentType?: string + public?: boolean + timeout?: number + } + ): Promise { + await this.s3Client.send( + new PutObjectCommand({ + Bucket: bucket, + Key: filePath, + Body: data.toString(), + ContentType: options.contentType, + }) + ) + } +} diff --git a/packages/api/src/repository/storage/StorageClient.ts b/packages/api/src/repository/storage/StorageClient.ts new file mode 100644 index 0000000000..12257e0b81 --- /dev/null +++ b/packages/api/src/repository/storage/StorageClient.ts @@ -0,0 +1,49 @@ +import { PipelineSource, Writable } from 'stream' + +export type SignedUrlParameters = { + action: 'read' | 'write' | 'delete' | 'resumable' + expires: number +} + +export type SaveData = string | Buffer | PipelineSource +export type SaveOptions = { + contentType?: string + gzip?: string | boolean + resumable?: boolean + timeout?: number + validation?: string | boolean + private?: boolean | undefined +} + +export type File = { + isPublic: () => Promise + publicUrl: () => string + download: () => Promise + exists: () => Promise + save: (saveData: SaveData, saveOptions: SaveOptions) => Promise + createWriteStream: (saveOptions: SaveOptions) => Writable + getMetadataMd5: () => Promise + bucket: string + key: string +} + +export interface StorageClient { + downloadFile(bucket: string, filePath: string): Promise + + createFile(bucket: string, filePath: string): File + + getFilesFromPrefix(bucket: string, filePrefix: string): Promise + + upload( + bucket: string, + filePath: string, + data: Buffer, + options: { contentType?: string; public?: boolean; timeout?: number } + ): Promise + + signedUrl( + bucket: string, + filePath: string, + options: SignedUrlParameters + ): Promise +} diff --git a/packages/api/src/repository/storage/storage.ts b/packages/api/src/repository/storage/storage.ts new file mode 100644 index 0000000000..708946520b --- /dev/null +++ b/packages/api/src/repository/storage/storage.ts @@ -0,0 +1,10 @@ +import { env } from '../../env' +import { S3StorageClient } from './S3StorageClient' +import { GcsStorageClient } from './GcsStorageClient' + +export const storage = env.fileUpload.useLocalStorage + ? new S3StorageClient( + env.fileUpload.localMinioUrl, + env.fileUpload.internalMinioUrl + ) + : new GcsStorageClient(env.fileUpload?.gcsUploadSAKeyFilePath ?? undefined) diff --git a/packages/api/src/resolvers/article/index.ts b/packages/api/src/resolvers/article/index.ts index a2f6fb75f5..f557873ad1 100644 --- a/packages/api/src/resolvers/article/index.ts +++ b/packages/api/src/resolvers/article/index.ts @@ -510,12 +510,12 @@ export const saveArticleReadingProgressResolver = authorized< } } - if (env.redis.cache && env.redis.mq) { - if (force) { - // clear any cached values. - await clearCachedReadingPosition(uid, id) - } + if (env.redis.cache && force) { + // clear any cached values. + await clearCachedReadingPosition(uid, id) + } + if (env.redis.cache && env.redis.mq && !force) { // If redis caching and queueing are available we delay this write const updatedProgress = await dataSources.readingProgress.updateReadingProgress(uid, id, { diff --git a/packages/api/src/routers/auth/auth_router.ts b/packages/api/src/routers/auth/auth_router.ts index ed88d92377..6bb17db28c 100644 --- a/packages/api/src/routers/auth/auth_router.ts +++ b/packages/api/src/routers/auth/auth_router.ts @@ -29,6 +29,7 @@ import { import { analytics } from '../../utils/analytics' import { comparePassword, + generateVerificationToken, hashPassword, setAuthInCookie, verifyToken, @@ -544,7 +545,7 @@ export function authRouter() { try { // hash password const hashedPassword = await hashPassword(password) - await createUser({ + const [user] = await createUser({ email: trimmedEmail, provider: 'EMAIL', sourceUserId: trimmedEmail, @@ -553,12 +554,17 @@ export function authRouter() { pictureUrl, bio, password: hashedPassword, - pendingConfirmation: true, + pendingConfirmation: !env.dev.autoVerify, }) - res.redirect( - `${env.client.url}/auth/verify-email?message=SIGNUP_SUCCESS` - ) + if (env.dev.autoVerify) { + const token = await generateVerificationToken({ id: user.id }) + res.redirect(`${env.client.url}/auth/confirm-email/${token}`) + } else { + res.redirect( + `${env.client.url}/auth/verify-email?message=SIGNUP_SUCCESS` + ) + } } catch (e) { logger.info('email-signup exception:', e) if (isErrorWithCode(e)) { diff --git a/packages/api/src/routers/auth/mobile/sign_up.ts b/packages/api/src/routers/auth/mobile/sign_up.ts index 11bf3002db..c9fb8c2f80 100644 --- a/packages/api/src/routers/auth/mobile/sign_up.ts +++ b/packages/api/src/routers/auth/mobile/sign_up.ts @@ -13,6 +13,7 @@ import { } from '../auth_types' import { decodeGoogleToken } from '../google_auth' import { createPendingUserToken, suggestedUsername } from '../jwt_helpers' +import { env } from '../../../env' export async function createMobileSignUpResponse( isAndroid: boolean, @@ -66,7 +67,7 @@ export async function createMobileEmailSignUpResponse( name: name.trim(), username: username.trim().toLowerCase(), password: hashedPassword, - pendingConfirmation: true, + pendingConfirmation: !env.dev.autoVerify, }) return { diff --git a/packages/api/src/services/newsletters.ts b/packages/api/src/services/newsletters.ts index 2812490577..9964744f0d 100644 --- a/packages/api/src/services/newsletters.ts +++ b/packages/api/src/services/newsletters.ts @@ -113,7 +113,9 @@ const createRandomEmailAddress = (userName: string, length: number): string => { when rand is sdfsdf-: jacksonh-sdfsdf-e@inbox.omnivore.app when rand is abcdef: jacksonh-abcdefe@inbox.omnivore.app */ - return `${userName}-${nanoid(length)}e@${inbox}.omnivore.app` + return `${userName}-${nanoid(length)}e@${ + env.email.domain || `@${inbox}.omnivore.app` + }` } export const findNewsletterEmailById = async ( diff --git a/packages/api/src/services/upload_file.ts b/packages/api/src/services/upload_file.ts index 0b32d01ac3..bdc1bd4298 100644 --- a/packages/api/src/services/upload_file.ts +++ b/packages/api/src/services/upload_file.ts @@ -137,7 +137,7 @@ export const uploadFile = async ( itemType, uploadFile: { id: uploadFileData.id }, slug: generateSlug(uploadFilePathName), - state: LibraryItemState.Processing, + state: LibraryItemState.Succeeded, contentReader: contentReaderForLibraryItem(itemType, uploadFileId), }, uid diff --git a/packages/api/src/util.ts b/packages/api/src/util.ts index 8d0eefc5cc..db6b418756 100755 --- a/packages/api/src/util.ts +++ b/packages/api/src/util.ts @@ -73,6 +73,7 @@ export interface BackendEnv { } dev: { isLocal: boolean + autoVerify: boolean } queue: { location: string @@ -94,6 +95,12 @@ export interface BackendEnv { gcsUploadSAKeyFilePath: string gcsUploadPrivateBucket: string dailyUploadLimit: number + useLocalStorage: boolean + localMinioUrl: string + internalMinioUrl: string + } + email: { + domain: string } sender: { message: string @@ -197,10 +204,15 @@ const nullableEnvVars = [ 'PG_REPLICA_USER', 'PG_REPLICA_PASSWORD', 'PG_REPLICA_DB', + 'AUTO_VERIFY', 'INTERCOM_WEB_SECRET', 'INTERCOM_IOS_SECRET', 'INTERCOM_ANDROID_SECRET', 'EXPORT_TASK_HANDLER_URL', + 'LOCAL_MINIO_URL', + 'GCS_USE_LOCAL_HOST', + 'LOCAL_EMAIL_DOMAIN', + 'AWS_S3_ENDPOINT_URL', ] // Allow some vars to be null/empty const envParser = @@ -240,6 +252,7 @@ export function getEnv(): BackendEnv { pool: { max: parseInt(parse('PG_POOL_MAX'), 10), }, + replication: parse('PG_REPLICATION') === 'true', replica: { host: parse('PG_REPLICA_HOST'), @@ -249,6 +262,9 @@ export function getEnv(): BackendEnv { dbName: parse('PG_REPLICA_DB'), }, } + const email = { + domain: parse('LOCAL_EMAIL_DOMAIN'), + } const server = { jwtSecret: parse('JWT_SECRET'), ssoJwtSecret: parse('SSO_JWT_SECRET'), @@ -288,6 +304,7 @@ export function getEnv(): BackendEnv { } const dev = { isLocal: parse('API_ENV') == 'local', + autoVerify: parse('AUTO_VERIFY') === 'true', } const queue = { location: parse('PUPPETEER_QUEUE_LOCATION'), @@ -318,6 +335,9 @@ export function getEnv(): BackendEnv { dailyUploadLimit: parse('GCS_UPLOAD_DAILY_LIMIT') ? parseInt(parse('GCS_UPLOAD_DAILY_LIMIT'), 10) : 5, // default to 5 + useLocalStorage: parse('GCS_USE_LOCAL_HOST') == 'true', + localMinioUrl: parse('LOCAL_MINIO_URL'), + internalMinioUrl: parse('AWS_S3_ENDPOINT_URL'), } const sender = { message: parse('SENDER_MESSAGE'), @@ -374,6 +394,7 @@ export function getEnv(): BackendEnv { return { pg, client, + email, server, google, posthog, diff --git a/packages/api/src/utils/uploads.ts b/packages/api/src/utils/uploads.ts index c4b71b5cab..a9d6a0426b 100644 --- a/packages/api/src/utils/uploads.ts +++ b/packages/api/src/utils/uploads.ts @@ -1,12 +1,12 @@ /* eslint-disable @typescript-eslint/no-unsafe-member-access */ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ -import { File, GetSignedUrlConfig, Storage } from '@google-cloud/storage' import axios from 'axios' import { ContentReaderType } from '../entity/library_item' import { env } from '../env' import { PageType } from '../generated/graphql' import { ContentFormat } from '../jobs/upload_content' import { logger } from './logger' +import { storage } from '../repository/storage/storage' export const contentReaderForLibraryItem = ( itemType: string, @@ -31,14 +31,12 @@ export const contentReaderForLibraryItem = ( * the default app engine service account on the IAM page. We also need to * enable IAM related APIs on the project. */ -export const storage = env.fileUpload?.gcsUploadSAKeyFilePath - ? new Storage({ keyFilename: env.fileUpload.gcsUploadSAKeyFilePath }) - : new Storage() + const bucketName = env.fileUpload.gcsUploadBucket const maxContentLength = 10 * 1024 * 1024 // 10MB export const countOfFilesWithPrefix = async (prefix: string) => { - const [files] = await storage.bucket(bucketName).getFiles({ prefix }) + const files = await storage.getFilesFromPrefix(bucketName, prefix) return files.length } @@ -48,40 +46,29 @@ export const generateUploadSignedUrl = async ( selectedBucket?: string ): Promise => { // These options will allow temporary uploading of file with requested content type - const options: GetSignedUrlConfig = { + const options = { version: 'v4', - action: 'write', + action: 'write' as const, expires: Date.now() + 15 * 60 * 1000, // 15 minutes contentType: contentType, } logger.info('signed url for: ', options) - // Get a v4 signed URL for uploading file - const [url] = await storage - .bucket(selectedBucket || bucketName) - .file(filePathName) - .getSignedUrl(options) - return url + return storage.signedUrl(selectedBucket || bucketName, filePathName, options) } export const generateDownloadSignedUrl = async ( filePathName: string, config?: { - bucketName?: string expires?: number } ): Promise => { - const options: GetSignedUrlConfig = { - version: 'v4', - action: 'read', - expires: config?.expires ?? Date.now() + 240 * 60 * 1000, // four hours + const options = { + action: 'read' as const, + expires: Date.now() + 240 * 60 * 1000, // four hours + ...config, } - const [url] = await storage - .bucket(config?.bucketName || bucketName) - .file(filePathName) - .getSignedUrl(options) - logger.info(`generating download signed url: ${url}`) - return url + return storage.signedUrl(bucketName, filePathName, options) } export const getStorageFileDetails = async ( @@ -89,10 +76,10 @@ export const getStorageFileDetails = async ( fileName: string ): Promise<{ md5Hash: string; fileUrl: string }> => { const filePathName = generateUploadFilePathName(id, fileName) - const file = storage.bucket(bucketName).file(filePathName) - const [metadata] = await file.getMetadata() + const file = await storage.downloadFile(bucketName, filePathName) + const metadataMd5 = await file.getMetadataMd5() // GCS returns MD5 Hash in base64 encoding, we convert it here to hex string - const md5Hash = Buffer.from(metadata.md5Hash || '', 'base64').toString('hex') + const md5Hash = Buffer.from(metadataMd5 || '', 'base64').toString('hex') return { md5Hash, fileUrl: file.publicUrl() } } @@ -110,17 +97,10 @@ export const uploadToBucket = async ( options?: { contentType?: string; public?: boolean; timeout?: number }, selectedBucket?: string ): Promise => { - await storage - .bucket(selectedBucket || bucketName) - .file(filePath) - .save(data, { timeout: 30000, ...options }) // default timeout 30s -} - -export const createGCSFile = ( - filename: string, - selectedBucket = bucketName -): File => { - return storage.bucket(selectedBucket).file(filename) + await storage.upload(selectedBucket || bucketName, filePath, data, { + timeout: 30000, + ...options, + }) } export const downloadFromUrl = async ( @@ -154,16 +134,14 @@ export const uploadToSignedUrl = async ( } export const isFileExists = async (filePath: string): Promise => { - const [exists] = await storage.bucket(bucketName).file(filePath).exists() + const file = await storage.downloadFile(bucketName, filePath) + const exists = await file.exists() return exists } export const downloadFromBucket = async (filePath: string): Promise => { - const file = storage.bucket(bucketName).file(filePath) - - // Download the file contents - const [data] = await file.download() - return data + const file = await storage.downloadFile(bucketName, filePath) + return file.download() } export const contentFilePath = ({ diff --git a/packages/api/test/hooks.ts b/packages/api/test/hooks.ts index 6ab516a718..f1799ccd28 100644 --- a/packages/api/test/hooks.ts +++ b/packages/api/test/hooks.ts @@ -1,13 +1,13 @@ -import { Storage } from '@google-cloud/storage' -import sinon from 'sinon' -import * as uploads from '../src/utils/uploads' -import { MockStorage } from './mock_storage' - -export const mochaHooks = { - beforeEach() { - // Mock cloud storage - sinon - .stub(uploads, 'storage') - .value(new MockStorage() as unknown as Storage) - }, -} +// import { Storage } from '@google-cloud/storage' +// import sinon from 'sinon' +// import * as uploads from '../src/utils/uploads' +// import { MockStorage } from './mock_storage' +// +// export const mochaHooks = { +// beforeEach() { +// // Mock cloud storage +// sinon +// .stub(uploads, 'storage') +// .value(new MockStorage() as unknown as Storage) +// }, +// } diff --git a/packages/content-fetch/.eslintrc b/packages/content-fetch/.eslintrc index 301be97955..d0558f7855 100644 --- a/packages/content-fetch/.eslintrc +++ b/packages/content-fetch/.eslintrc @@ -4,6 +4,8 @@ "project": "tsconfig.json" }, "rules": { + "@typescript-eslint/no-unsafe-assignment": 0, + "@typescript-eslint/no-unnecessary-type-assertion": 0, "@typescript-eslint/no-floating-promises": [ "error", { diff --git a/packages/content-fetch/Dockerfile b/packages/content-fetch/Dockerfile index dc43e8bb81..1a3d99f4cd 100644 --- a/packages/content-fetch/Dockerfile +++ b/packages/content-fetch/Dockerfile @@ -1,27 +1,22 @@ -FROM node:18.16 +FROM node:22.12-alpine AS build LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" -# Installs latest Chromium package. -RUN apt-get update && apt-get install -y \ - chromium \ - ca-certificates \ - nodejs \ - yarn \ +# Installs latest Chromium package and other dependencies. +RUN apk -U upgrade \ +&& apk add --no-cache \ g++ \ make \ - python3 + python3 \ + py3-pip && \ + rm -rf /var/cache/apk/* WORKDIR /app -ENV CHROMIUM_PATH /usr/bin/chromium +ENV CHROMIUM_PATH=/usr/bin/chromium +ENV FIREFOX_PATH=/usr/bin/firefox ENV LAUNCH_HEADLESS=true -COPY package.json . -COPY yarn.lock . -COPY tsconfig.json . -COPY .prettierrc . -COPY .eslintrc . - +COPY package.json yarn.lock tsconfig.json .prettierrc .eslintrc ./ COPY /packages/content-fetch/package.json ./packages/content-fetch/package.json COPY /packages/content-handler/package.json ./packages/content-handler/package.json COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json @@ -29,21 +24,50 @@ COPY /packages/utils/package.json ./packages/utils/package.json RUN yarn install --pure-lockfile -ADD /packages/content-fetch ./packages/content-fetch -ADD /packages/content-handler ./packages/content-handler -ADD /packages/puppeteer-parse ./packages/puppeteer-parse -ADD /packages/utils ./packages/utils -RUN yarn workspace @omnivore/utils build -RUN yarn workspace @omnivore/content-handler build -RUN yarn workspace @omnivore/puppeteer-parse build -RUN yarn workspace @omnivore/content-fetch build +COPY /packages/content-fetch ./packages/content-fetch +COPY /packages/content-handler ./packages/content-handler +COPY /packages/puppeteer-parse ./packages/puppeteer-parse +COPY /packages/utils ./packages/utils + +RUN yarn workspace @omnivore/utils build && \ + yarn workspace @omnivore/content-handler build && \ + yarn workspace @omnivore/puppeteer-parse build && \ + yarn workspace @omnivore/content-fetch build && \ + rm -rf /app/packages/content-fetch/node_modules /app/node_modules && \ + yarn install --pure-lockfile --production + +# Running stage +FROM node:22.12-alpine + +RUN echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/community >> /etc/apk/repositories \ +&& echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/main >> /etc/apk/repositories \ +&& echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories \ +&& apk -U upgrade \ +&& apk add --no-cache \ + firefox@edge \ + freetype@edge \ + ttf-freefont@edge \ + nss@edge \ + libstdc++@edge \ + sqlite-libs@edge \ + chromium@edge \ + firefox-esr@edge \ + ca-certificates@edge \ + rm -rf /var/cache/apk/* + +WORKDIR /app + +ENV CHROMIUM_PATH=/usr/bin/chromium +ENV FIREFOX_PATH=/usr/bin/firefox +ENV LAUNCH_HEADLESS=true -# After building, fetch the production dependencies -RUN rm -rf /app/packages/content-fetch/node_modules -RUN rm -rf /app/node_modules -RUN yarn install --pure-lockfile --production +COPY --from=build /app /app EXPOSE 8080 -CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"] +# In Firefox we can't use the adblocking sites. Adding them to the hosts file of the docker seems to work. +COPY /packages/content-fetch/start.sh . +RUN wget https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \ + chmod +x start.sh +CMD ["./start.sh"] \ No newline at end of file diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index 8ef69acbe1..34c1479246 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -305,7 +305,7 @@ export const processFetchContentJob = async ( const savedDate = savedAt ? new Date(savedAt) : new Date() const { finalUrl, title, content, contentType } = fetchResult - if (content) { + if (content && process.env['SKIP_UPLOAD_ORIGINAL'] !== 'true') { await uploadOriginalContent(users, content, savedDate.getTime()) } diff --git a/packages/content-fetch/start.sh b/packages/content-fetch/start.sh new file mode 100644 index 0000000000..783ecfa157 --- /dev/null +++ b/packages/content-fetch/start.sh @@ -0,0 +1,3 @@ +#!/bin/sh +cat hosts >> /etc/hosts +yarn workspace @omnivore/content-fetch start \ No newline at end of file diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index a75af64df2..bf531676fb 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -39,6 +39,7 @@ import { WikipediaHandler } from './websites/wikipedia-handler' import { YoutubeHandler } from './websites/youtube-handler' import { ZhihuHandler } from './websites/zhihu-handler' import { TikTokHandler } from './websites/tiktok-handler' +import { RawContentHandler } from './websites/raw-handler' const validateUrlString = (url: string): boolean => { const u = new URL(url) @@ -66,6 +67,7 @@ const contentHandlers: ContentHandler[] = [ new DerstandardHandler(), new ImageHandler(), new MediumHandler(), + new RawContentHandler(), new PdfHandler(), new ScrapingBeeHandler(), new TDotCoHandler(), diff --git a/packages/content-handler/src/websites/medium-handler.ts b/packages/content-handler/src/websites/medium-handler.ts index 111bbe787a..2c6a427506 100644 --- a/packages/content-handler/src/websites/medium-handler.ts +++ b/packages/content-handler/src/websites/medium-handler.ts @@ -1,4 +1,6 @@ import { ContentHandler, PreHandleResult } from '../content-handler' +import axios from 'axios' +import { parseHTML } from 'linkedom' export class MediumHandler extends ContentHandler { constructor() { @@ -11,13 +13,52 @@ export class MediumHandler extends ContentHandler { return u.hostname.endsWith('medium.com') } + addImages(document: Document): Document { + const pictures = document.querySelectorAll('picture') + + pictures.forEach((pict) => { + const source = pict.querySelector('source') + if (source) { + const srcSet = source.getAttribute('srcSet') + + const sources = (srcSet || '') + .split(', ') + .map((src) => src.split(' ')) + .sort((a, b) => + Number(a[1].replace('w', '')) > Number(b[1].replace('w', '')) + ? -1 + : 1 + ) + + // This should be the largest image in the source set. + if (sources && sources.length && Array.isArray(sources[0])) { + const url = sources[0][0] + const img = document.createElement('img') + img.src = url + pict.after(img) + pict.remove() + } + } + }) + + return document + } + async preHandle(url: string): Promise { console.log('prehandling medium url', url) try { const res = new URL(url) res.searchParams.delete('source') - return Promise.resolve({ url: res.toString() }) + + const response = await axios.get(res.toString()) + const dom = parseHTML(response.data).document + const imageAddedDom = this.addImages(dom) + return { + title: dom.title, + content: imageAddedDom.body.outerHTML, + url: res.toString(), + } } catch (error) { console.error('error prehandling medium url', error) throw error diff --git a/packages/content-handler/src/websites/raw-handler.ts b/packages/content-handler/src/websites/raw-handler.ts new file mode 100644 index 0000000000..fb98decbcd --- /dev/null +++ b/packages/content-handler/src/websites/raw-handler.ts @@ -0,0 +1,33 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' +import axios from 'axios' +import { parseHTML } from 'linkedom' + +export class RawContentHandler extends ContentHandler { + constructor() { + super() + this.name = 'RawContentHandler' + } + + shouldPreHandle(url: string): boolean { + const u = new URL(url) + const hostnames = [ + 'medium.com', + 'fastcompany.com', + 'fortelabs.com', + 'theverge.com', + ] + + return hostnames.some((h) => u.hostname.endsWith(h)) + } + + async preHandle(url: string): Promise { + try { + const response = await axios.get(url) + const dom = parseHTML(response.data).document + return { title: dom.title, content: response.data as string, url: url } + } catch (error) { + console.error('error prehandling URL', error) + throw error + } + } +} diff --git a/packages/db/Dockerfile b/packages/db/Dockerfile index b69e911c41..d86bb14134 100644 --- a/packages/db/Dockerfile +++ b/packages/db/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16 +FROM node:22.12 WORKDIR /app diff --git a/packages/discover/Dockerfile b/packages/discover/Dockerfile index 6c1f45ed0c..bb36001edf 100644 --- a/packages/discover/Dockerfile +++ b/packages/discover/Dockerfile @@ -1,4 +1,4 @@ - FROM node:18.16 as builder + FROM node:22.12 as builder WORKDIR /app @@ -17,7 +17,7 @@ COPY /packages/discover/tsconfig.json ./packages/discover/tsconfig.json RUN yarn install --pure-lockfile RUN yarn workspace @omnivore/discover build -FROM node:18.16 as runner +FROM node:22.12 as runner WORKDIR /app diff --git a/packages/discover/src/lib/inputSources/articles/rss/rssConverters/atom.ts b/packages/discover/src/lib/inputSources/articles/rss/rssConverters/atom.ts index 74fbe6616b..81d00d07b5 100644 --- a/packages/discover/src/lib/inputSources/articles/rss/rssConverters/atom.ts +++ b/packages/discover/src/lib/inputSources/articles/rss/rssConverters/atom.ts @@ -3,9 +3,7 @@ /* eslint-disable @typescript-eslint/no-unsafe-return */ /* eslint-disable @typescript-eslint/restrict-template-expressions */ -import { OmnivoreArticle } from '../../../../../types/OmnivoreArticle' import { slugify } from 'voca' -import { Observable, tap } from 'rxjs' import { fromArrayLike } from 'rxjs/internal/observable/innerFrom' import { mapOrNull } from '../../../../utils/reactive' import { diff --git a/packages/export-handler/Dockerfile b/packages/export-handler/Dockerfile index ca84cfc008..8695c4bf0f 100644 --- a/packages/export-handler/Dockerfile +++ b/packages/export-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine WORKDIR /app diff --git a/packages/import-handler/Dockerfile b/packages/import-handler/Dockerfile index cafb9007ae..27e6dc3b22 100644 --- a/packages/import-handler/Dockerfile +++ b/packages/import-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine WORKDIR /app diff --git a/packages/import-handler/Dockerfile-collector b/packages/import-handler/Dockerfile-collector index 6f3401a7b5..bc4ddd8fb7 100644 --- a/packages/import-handler/Dockerfile-collector +++ b/packages/import-handler/Dockerfile-collector @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine WORKDIR /app diff --git a/packages/import-handler/package.json b/packages/import-handler/package.json index b274864c63..eee039a1f6 100644 --- a/packages/import-handler/package.json +++ b/packages/import-handler/package.json @@ -9,7 +9,6 @@ "keywords": [], "license": "Apache-2.0", "scripts": { - "test": "yarn mocha -r ts-node/register --config mocha-config.json", "test:typecheck": "tsc --noEmit", "lint": "eslint src --ext ts,js,tsx,jsx", "compile": "tsc", diff --git a/packages/inbound-email-handler/Dockerfile b/packages/inbound-email-handler/Dockerfile index dfb9b46720..cbe31ace32 100644 --- a/packages/inbound-email-handler/Dockerfile +++ b/packages/inbound-email-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/integration-handler/Dockerfile-exporter b/packages/integration-handler/Dockerfile-exporter index 60eee9d5ff..87dbf8988a 100644 --- a/packages/integration-handler/Dockerfile-exporter +++ b/packages/integration-handler/Dockerfile-exporter @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/integration-handler/Dockerfile-importer b/packages/integration-handler/Dockerfile-importer index e3a790b8a3..e031bd7836 100644 --- a/packages/integration-handler/Dockerfile-importer +++ b/packages/integration-handler/Dockerfile-importer @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/local-mail-watcher/.eslintrc b/packages/local-mail-watcher/.eslintrc new file mode 100644 index 0000000000..30bce838f0 --- /dev/null +++ b/packages/local-mail-watcher/.eslintrc @@ -0,0 +1,13 @@ +{ + "extends": "../../.eslintrc", + "parserOptions": { + "project": "tsconfig.json" + }, + "rules": { + "@typescript-eslint/no-unsafe-argument": "off", + "@typescript-eslint/no-explicit-any": "off", + "@typescript-eslint/strictNullChecks": "off", + "@typescript-eslint/no-unsafe-member-access": "off", + "@typescript-eslint/no-unsafe-assignment": "off" + } +} diff --git a/packages/local-mail-watcher/.gitignore b/packages/local-mail-watcher/.gitignore new file mode 100644 index 0000000000..b442f8ba9f --- /dev/null +++ b/packages/local-mail-watcher/.gitignore @@ -0,0 +1,131 @@ +.idea/ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* \ No newline at end of file diff --git a/packages/local-mail-watcher/Dockerfile b/packages/local-mail-watcher/Dockerfile new file mode 100644 index 0000000000..7dfee57df5 --- /dev/null +++ b/packages/local-mail-watcher/Dockerfile @@ -0,0 +1,40 @@ + FROM node:22.12 as builder + +WORKDIR /app + +RUN apt-get update && apt-get install -y g++ make python3 + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/local-mail-watcher/src ./packages/local-mail-watcher/src +COPY /packages/local-mail-watcher/package.json ./packages/local-mail-watcher/package.json +COPY /packages/local-mail-watcher/tsconfig.json ./packages/local-mail-watcher/tsconfig.json +COPY /packages/utils/package.json ./packages/utils/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/utils ./packages/utils + +RUN yarn workspace @omnivore/utils build +RUN yarn workspace @omnivore/local-mail-watcher build + +FROM node:22.12 as runner + +WORKDIR /app + +ENV NODE_ENV production + +COPY --from=builder /app/packages/local-mail-watcher/dist /app/packages/local-mail-watcher/dist +COPY --from=builder /app/packages/local-mail-watcher/package.json /app/packages/local-mail-watcher/package.json +COPY --from=builder /app/packages/local-mail-watcher/node_modules /app/packages/local-mail-watcher/node_modules +COPY --from=builder /app/packages/utils/ /app/packages/utils/ + + +COPY --from=builder /app/node_modules /app/node_modules +COPY --from=builder /app/package.json /app/package.json + +CMD ["yarn", "workspace", "@omnivore/local-mail-watcher", "start"] diff --git a/packages/local-mail-watcher/Dockerfile-watcher b/packages/local-mail-watcher/Dockerfile-watcher new file mode 100644 index 0000000000..56a382d18a --- /dev/null +++ b/packages/local-mail-watcher/Dockerfile-watcher @@ -0,0 +1,39 @@ + FROM node:22.12 as builder + +WORKDIR /app + +RUN apt-get update && apt-get install -y g++ make python3 + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/local-mail-watcher/src ./packages/local-mail-watcher/src +COPY /packages/local-mail-watcher/package.json ./packages/local-mail-watcher/package.json +COPY /packages/local-mail-watcher/tsconfig.json ./packages/local-mail-watcher/tsconfig.json +COPY /packages/utils/package.json ./packages/utils/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/utils ./packages/utils + +RUN yarn workspace @omnivore/utils build +RUN yarn workspace @omnivore/local-mail-watcher build + +FROM node:22.12 as runner + +WORKDIR /app + +ENV NODE_ENV production + +COPY --from=builder /app/packages/local-mail-watcher/dist /app/packages/local-mail-watcher/dist +COPY --from=builder /app/packages/local-mail-watcher/package.json /app/packages/local-mail-watcher/package.json +COPY --from=builder /app/packages/local-mail-watcher/node_modules /app/packages/local-mail-watcher/node_modules +COPY --from=builder /app/packages/utils/ /app/packages/utils/ + +COPY --from=builder /app/node_modules /app/node_modules +COPY --from=builder /app/package.json /app/package.json + +CMD ["yarn", "workspace", "@omnivore/local-mail-watcher", "start-watcher"] diff --git a/packages/local-mail-watcher/package.json b/packages/local-mail-watcher/package.json new file mode 100644 index 0000000000..de4490816e --- /dev/null +++ b/packages/local-mail-watcher/package.json @@ -0,0 +1,39 @@ +{ + "name": "@omnivore/local-mail-watcher", + "version": "0.0.1", + "scripts": { + "build": "tsc", + "dev": "ts-node-dev --files src/index.ts", + "start": "node dist/index.js", + "start-watcher": "node dist/watcher.js", + "lint": "eslint src --ext ts,js,tsx,jsx", + "lint:fix": "eslint src --fix --ext ts,js,tsx,jsx", + "test:typecheck": "tsc --noEmit" + }, + "dependencies": { + "chokidar": "^4.0.1", + "mailparser": "^3.7.1", + "axios": "^1.7.7", + "express": "^4.21.1", + "bullmq": "^5.22.0", + "@omnivore/utils": "1.0.0" + }, + "devDependencies": { + "@types/html-to-text": "^9.0.2", + "@types/jsdom": "^21.1.3", + "@types/mailparser": "^3.4.5", + "@types/axios" : "^0.14.4", + "@types/node": "^20.8.4", + "@types/express": "^5.0.0", + "@types/pg": "^8.10.5", + "@types/pg-format": "^1.0.3", + "@types/urlsafe-base64": "^1.0.28", + "@types/uuid": "^9.0.1", + "@types/voca": "^1.4.3", + "ts-node": "^10.9.1", + "tslib": "^2.6.2" + }, + "volta": { + "extends": "../../package.json" + } +} diff --git a/packages/local-mail-watcher/src/env.ts b/packages/local-mail-watcher/src/env.ts new file mode 100755 index 0000000000..9ff09bbc84 --- /dev/null +++ b/packages/local-mail-watcher/src/env.ts @@ -0,0 +1,66 @@ +interface redisConfig { + url?: string + cert?: string +} + +interface WatcherEnv { + filesystem: { + filePath: string + } + redis: { + mq: redisConfig + cache: redisConfig + } + sns: { + snsArn: string + } + apiKey: string + apiEndpoint: string +} + +const envParser = + (env: { [key: string]: string | undefined }) => + (varName: string, throwOnUndefined = false): string | undefined => { + const value = env[varName] + if (typeof value === 'string' && value) { + return value + } + + if (throwOnUndefined) { + throw new Error( + `Missing ${varName} with a non-empty value in process environment` + ) + } + + return + } + +export function getEnv(): WatcherEnv { + const parse = envParser(process.env) + const filesystem = { + filePath: parse('MAIL_FILE_PATH')!, + } + const redis = { + mq: { + url: parse('MQ_REDIS_URL'), + cert: parse('MQ_REDIS_CERT')?.replace(/\\n/g, '\n'), // replace \n with new line + }, + cache: { + url: parse('REDIS_URL'), + cert: parse('REDIS_CERT')?.replace(/\\n/g, '\n'), // replace \n with new line + }, + } + const sns = { + snsArn: parse('SNS_ARN') || '', + } + + return { + apiKey: parse('WATCHER_API_KEY')!, + apiEndpoint: parse('WATCHER_API_ENDPOINT')!, + sns, + filesystem, + redis, + } +} + +export const env = getEnv() diff --git a/packages/local-mail-watcher/src/index.ts b/packages/local-mail-watcher/src/index.ts new file mode 100644 index 0000000000..a0ab3b2d16 --- /dev/null +++ b/packages/local-mail-watcher/src/index.ts @@ -0,0 +1,122 @@ +import { RedisDataSource } from '@omnivore/utils' +import express, { Express, Request, Response } from 'express' + +import { env } from './env' +import { getQueue } from './lib/queue' +import { SnsMessage } from './types/SNS' +import { simpleParser } from 'mailparser' +import axios from 'axios' +import { convertToMailObject } from './lib/emailApi' + +console.log('Starting worker...') + +const app: Express = express() + +app.use(express.text({ limit: '50mb' })) +// Force JSON for SNS +app.use((req, res, next) => { + req.headers['content-type'] = 'application/json' + next() +}) + +app.use(express.json({ limit: '50mb' })) +app.use(express.urlencoded({ limit: '50mb', extended: true })) + +// create redis source +const redisDataSource = new RedisDataSource({ + cache: { + url: process.env.REDIS_URL, + cert: process.env.REDIS_CERT, + }, + mq: { + url: process.env.MQ_REDIS_URL, + cert: process.env.MQ_REDIS_CERT, + }, +}) +const queue = getQueue(redisDataSource.queueRedisClient) + +const addEmailEventToQueue = async (req: Request, res: Response) => { + const apiKey = req.headers['x-api-key'] + + if (!apiKey) { + res.status(401).send('Unauthorized: API key is missing') + return + } + + if (apiKey != env.apiKey) { + res.status(401).send('Unauthorized: Invalid API Key') + return + } + + await ( + await queue + ).add('save-newsletter', req.body, { + priority: 1, + attempts: 1, + delay: 500, + }) + res.sendStatus(200) +} + +// respond healthy to auto-scaler. +app.get('/_ah/health', (_req: Request, res: Response) => { + res.sendStatus(200) +}) + +app.post('/mail', addEmailEventToQueue) + +app.post('/sns', async (req, res) => { + const bodyString = req.body as string + const snsMessage = JSON.parse(bodyString) as SnsMessage + + console.log(`Received SNS Message`, snsMessage) + console.log(`Sns Topic ARN ${snsMessage['TopicArn']}`) + + if (snsMessage.TopicArn != env.sns.snsArn) { + console.log( + `Topic ARN: ${snsMessage.TopicArn} Doesnt Match ${env.sns.snsArn}, failing...` + ) + res.status(401).send() + return + } + + if (snsMessage.Type == 'SubscriptionConfirmation') { + console.log('Subscribing to topic') + await axios.get(snsMessage.SubscribeURL) + res.status(200).send() + return + } + + if (snsMessage.Type == 'Notification') { + const message = JSON.parse(snsMessage.Message) as { + notificationType: string + content: string + } + if (message.notificationType != 'Received') { + console.log('Not an email, failing...') + res.status(400).send() + } + + const mailContent = await simpleParser(message.content) + const mail = convertToMailObject(mailContent) + console.log(mail) + await ( + await queue + ).add('save-newsletter', mail, { + priority: 1, + attempts: 1, + delay: 500, + }) + res.sendStatus(200) + + res.status(200).send() + return + } + + res.status(400).send() +}) + +const port = process.env.PORT || 8080 +const server = app.listen(port, () => { + console.log('Mail Server started') +}) diff --git a/packages/local-mail-watcher/src/lib/emailApi.ts b/packages/local-mail-watcher/src/lib/emailApi.ts new file mode 100644 index 0000000000..c460e5067e --- /dev/null +++ b/packages/local-mail-watcher/src/lib/emailApi.ts @@ -0,0 +1,25 @@ +import { EmailContents } from '../types/EmailContents' +import axios from 'axios' +import { env } from '../env' +import { ParsedMail } from 'mailparser' + +export const sendToEmailApi = (data: EmailContents) => { + return axios.post(env.apiEndpoint, data, { + headers: { + ['x-api-key']: env.apiKey, + 'Content-Type': 'application/json', + }, + timeout: 5000, + }) +} + +export const convertToMailObject = (it: ParsedMail): EmailContents => { + return { + from: it.from?.value[0]?.address || '', + to: (Array.isArray(it.to) ? it.to[0].text : it.to?.text) || '', + subject: it.subject || '', + html: it.html || '', + text: it.text || '', + headers: it.headers, + } +} diff --git a/packages/local-mail-watcher/src/lib/queue.ts b/packages/local-mail-watcher/src/lib/queue.ts new file mode 100644 index 0000000000..93a5b0034f --- /dev/null +++ b/packages/local-mail-watcher/src/lib/queue.ts @@ -0,0 +1,27 @@ +import { RedisDataSource } from '@omnivore/utils' +import { Queue, RedisClient } from 'bullmq' + +export const QUEUE = 'omnivore-backend-queue' + +export const getQueue = async ( + connection: RedisClient, + queueName = QUEUE +): Promise => { + const queue = new Queue(queueName, { + connection, + defaultJobOptions: { + backoff: { + type: 'exponential', + delay: 2000, // 2 seconds + }, + removeOnComplete: { + age: 3600, // keep up to 1 hour + }, + removeOnFail: { + age: 24 * 3600, // keep up to 1 day + }, + }, + }) + await queue.waitUntilReady() + return queue +} diff --git a/packages/local-mail-watcher/src/types/EmailContents.ts b/packages/local-mail-watcher/src/types/EmailContents.ts new file mode 100644 index 0000000000..1ddef34f28 --- /dev/null +++ b/packages/local-mail-watcher/src/types/EmailContents.ts @@ -0,0 +1,20 @@ +import { HeaderValue } from 'mailparser' + +export type EmailContents = { + from: string + to: string + subject: string + html: string + text: string + headers: Map + unsubMailTo?: string + unsubHttpUrl?: string + forwardedFrom?: string + replyTo?: string + confirmationCode?: string + uploadFile?: { + fileName: string + contentType: string + id: string + } +} diff --git a/packages/local-mail-watcher/src/types/SNS.ts b/packages/local-mail-watcher/src/types/SNS.ts new file mode 100644 index 0000000000..be2551984e --- /dev/null +++ b/packages/local-mail-watcher/src/types/SNS.ts @@ -0,0 +1,7 @@ +export type SnsMessage = { + Type: string + TopicArn: string + SubscribeURL: string + content: string + Message: string +} diff --git a/packages/local-mail-watcher/src/watcher.ts b/packages/local-mail-watcher/src/watcher.ts new file mode 100644 index 0000000000..4c924cef8a --- /dev/null +++ b/packages/local-mail-watcher/src/watcher.ts @@ -0,0 +1,22 @@ +import chokidar from 'chokidar' +import { simpleParser } from 'mailparser' +import * as fs from 'node:fs' +import { convertToMailObject, sendToEmailApi } from './lib/emailApi' +import { env } from './env' + +chokidar.watch(env.filesystem.filePath).on('add', (path, _event) => { + console.log(path) + const contents = fs.readFileSync(path).toString() + void simpleParser(contents) + .then(convertToMailObject) + .then(async (emailData) => { + await sendToEmailApi(emailData) + console.log('Sent to email API') + }) + .then(() => { + if (process.env['DELETE_FILE'] == 'true') { + fs.unlinkSync(path) + } + console.log('Deleted File') + }) +}) diff --git a/packages/local-mail-watcher/tsconfig.json b/packages/local-mail-watcher/tsconfig.json new file mode 100644 index 0000000000..8d6ee874ea --- /dev/null +++ b/packages/local-mail-watcher/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "./../../tsconfig.json", + "compileOnSave": false, + "include": ["./src/**/*"], + "compilerOptions": { + "outDir": "dist", + "typeRoots": ["./../../node_modules/pgvector/types"] + } +} diff --git a/packages/local-mail-watcher/tslint.json b/packages/local-mail-watcher/tslint.json new file mode 100644 index 0000000000..db7169cd90 --- /dev/null +++ b/packages/local-mail-watcher/tslint.json @@ -0,0 +1,51 @@ +{ + "extends": "tslint:recommended", + "rulesDirectory": ["codelyzer"], + "rules": { + "array-type": false, + "arrow-parens": false, + "deprecation": { + "severity": "warn" + }, + "import-blacklist": [true, "rxjs/Rx"], + "interface-name": false, + "max-classes-per-file": false, + "max-line-length": [true, 140], + "member-access": false, + "member-ordering": [ + true, + { + "order": [ + "static-field", + "instance-field", + "static-method", + "instance-method" + ] + } + ], + "no-consecutive-blank-lines": false, + "no-console": [true, "debug", "info", "time", "timeEnd", "trace"], + "no-empty": false, + "no-inferrable-types": [true, "ignore-params"], + "no-non-null-assertion": true, + "no-redundant-jsdoc": true, + "no-switch-case-fall-through": true, + "no-use-before-declare": true, + "no-var-requires": false, + "object-literal-key-quotes": [true, "as-needed"], + "object-literal-sort-keys": false, + "ordered-imports": false, + "quotemark": [true, "single"], + "trailing-comma": false, + "no-output-on-prefix": true, + "no-inputs-metadata-property": true, + "no-outputs-metadata-property": true, + "no-host-metadata-property": true, + "no-input-rename": true, + "no-output-rename": true, + "use-life-cycle-interface": true, + "use-pipe-transform-interface": true, + "component-class-suffix": true, + "directive-class-suffix": true + } +} diff --git a/packages/pdf-handler/.eslintrc b/packages/pdf-handler/.eslintrc index e006282a6e..422c9cd8ff 100644 --- a/packages/pdf-handler/.eslintrc +++ b/packages/pdf-handler/.eslintrc @@ -2,5 +2,10 @@ "extends": "../../.eslintrc", "parserOptions": { "project": "tsconfig.json" + }, + "rules": { + "@typescript-eslint/no-unsafe-assignment": 0, + "@typescript-eslint/no-unsafe-argument": 0, + "@typescript-eslint/restrict-template-expressions": 0 } -} \ No newline at end of file +} diff --git a/packages/pdf-handler/Dockerfile b/packages/pdf-handler/Dockerfile index e3d44404ca..980bfaf849 100644 --- a/packages/pdf-handler/Dockerfile +++ b/packages/pdf-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/pdf-handler/package.json b/packages/pdf-handler/package.json index 12e20bcc38..eb001daac4 100644 --- a/packages/pdf-handler/package.json +++ b/packages/pdf-handler/package.json @@ -36,7 +36,7 @@ "bullmq": "^5.1.4", "concurrently": "^7.0.0", "dotenv": "^8.2.0", - "pdfjs-dist": "^2.9.359" + "pdfjs-dist": "^2.16.105" }, "volta": { "extends": "../../package.json" diff --git a/packages/pdf-handler/src/pdf.ts b/packages/pdf-handler/src/pdf.ts index 0585b742bf..b3087a2e74 100644 --- a/packages/pdf-handler/src/pdf.ts +++ b/packages/pdf-handler/src/pdf.ts @@ -3,12 +3,13 @@ /* eslint-disable @typescript-eslint/no-unsafe-member-access */ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ /* eslint-disable @typescript-eslint/no-unsafe-argument */ -import { getDocument as _getDocument } from 'pdfjs-dist/legacy/build/pdf' + import { - TextItem, - PDFPageProxy, + getDocument as _getDocument, PDFDocumentProxy, -} from 'pdfjs-dist/types/display/api' + PDFPageProxy, +} from 'pdfjs-dist/legacy/build/pdf' +import { TextItem } from 'pdfjs-dist/types/src/display/api' interface Page { lines: string[] @@ -72,7 +73,7 @@ const getMetadataItem = async ( .getMetadata() .then((metadata) => metadata.info as MetadataInfo) .then((info) => { - return info[key] + return info[key] as string }) } @@ -122,7 +123,7 @@ export const readPdfText = async ( const parsePage = async (pdfPage: PDFPageProxy): Promise => { const rawContent = await pdfPage.getTextContent() return parsePageItems( - rawContent.items.filter((item): item is TextItem => 'str' in item) + rawContent.items.filter((item: any): item is TextItem => 'str' in item) ) } @@ -156,6 +157,7 @@ const parsePageItems = (pdfItems: TextItem[]): Page => { if (nextY != undefined) { const currentLineHeight: number = lineData[currentY].reduce( (finalValue, current) => + // eslint-disable-next-line @typescript-eslint/no-unsafe-return finalValue > current.height ? finalValue : current.height, -1 ) diff --git a/packages/pdf-handler/test/pdf/pdf.test.ts b/packages/pdf-handler/test/pdf/pdf.test.ts index 0019584226..f4b3c7d05e 100644 --- a/packages/pdf-handler/test/pdf/pdf.test.ts +++ b/packages/pdf-handler/test/pdf/pdf.test.ts @@ -21,7 +21,7 @@ describe('open a simple PDF with a set title', () => { const doc = await getDocument('./test/pdf/data/pdf-simple-test.pdf') const result = await getDocumentText(doc) expect(result).to.equal( - 'This is the page title \n \nThis is some more text \n' + 'This is the page title\n\nThis is some more text\n' ) }) }) @@ -30,8 +30,9 @@ describe('open a complex PDF with no title', () => { it('should return some initial content as the title', async () => { const doc = await getDocument('./test/pdf/data/pdf-complex-test.pdf') const result = await getDocumentTitle(doc) + console.log(result); expect(result).to.startWith( - 'Improving communications around vaccine breakthrough and vaccine effectiveness' + 'Improving communications' ) }) @@ -47,6 +48,7 @@ describe('open a PDF with metadata set', () => { const parsed = await parsePdf( new URL('file://' + __dirname + '/data/welcome_to_your_library.pdf') ) + expect(parsed.title).to.eq('Welcome to your Omnivore Library') expect(parsed.author).to.eq('Jackson Harper') expect(parsed.description).to.eq('This is the description of my PDF') diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 0683a93ee4..13dca1ca56 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -9,7 +9,7 @@ ], "dependencies": { "@omnivore/content-handler": "1.0.0", - "puppeteer-core": "^22.12.1", + "puppeteer-core": "^23.6.1", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-adblocker": "^2.13.6", "puppeteer-extra-plugin-stealth": "^2.11.2" diff --git a/packages/puppeteer-parse/src/browser.ts b/packages/puppeteer-parse/src/browser.ts index c89e92fa00..5145da236c 100644 --- a/packages/puppeteer-parse/src/browser.ts +++ b/packages/puppeteer-parse/src/browser.ts @@ -3,8 +3,10 @@ import puppeteer from 'puppeteer-extra' import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker' import StealthPlugin from 'puppeteer-extra-plugin-stealth' -puppeteer.use(StealthPlugin()) -puppeteer.use(AdblockerPlugin({ blockTrackers: true })) +if (process.env['USE_FIREFOX'] != 'true') { + puppeteer.use(StealthPlugin()) + puppeteer.use(AdblockerPlugin({ blockTrackers: true })) +} let browserInstance: Browser | null = null @@ -51,15 +53,22 @@ export const getBrowser = async (): Promise => { isMobile: false, width: 1920, }, - executablePath: process.env.CHROMIUM_PATH, + ignoreHTTPSErrors: true, + executablePath: + process.env.USE_FIREFOX == 'true' + ? process.env.FIREFOX_PATH + : process.env.CHROMIUM_PATH, // run in shell mode if headless - headless: process.env.LAUNCH_HEADLESS === 'true' ? 'shell' : false, - timeout: 10_000, // 10 seconds - dumpio: true, // show console logs in the terminal + headless: true, + browser: process.env['USE_FIREFOX'] == 'true' ? 'firefox' : 'chrome', + product: process.env['USE_FIREFOX'] == 'true' ? 'firefox' : 'chrome', + timeout: 30000, + dumpio: true, + // filter out targets targetFilter: (target: Target) => target.type() !== 'other' || !!target.url(), - })) as Browser + })) as unknown as Browser const version = await browserInstance.version() console.log('Browser started', version) diff --git a/packages/puppeteer-parse/src/index.ts b/packages/puppeteer-parse/src/index.ts index 853c9a6ef3..c50ee74736 100644 --- a/packages/puppeteer-parse/src/index.ts +++ b/packages/puppeteer-parse/src/index.ts @@ -144,6 +144,52 @@ function getUrl(urlStr: string) { return parsed.href } +const waitForDOMToSettle = (page: Page, timeoutMs = 5000, debounceMs = 1000) => + page.evaluate( + (timeoutMs, debounceMs) => { + const debounce = (func: (...args: unknown[]) => void, ms = 1000) => { + let timeout: NodeJS.Timeout + console.log(`Debouncing in ${ms}`) + return (...args: unknown[]) => { + console.log('in debounce, clearing timeout again') + clearTimeout(timeout) + timeout = setTimeout(() => { + func.apply(this, args) + }, ms) + } + } + return new Promise((resolve) => { + const mainTimeout = setTimeout(() => { + observer.disconnect() + console.log( + 'Timed out whilst waiting for DOM to settle. Using what we have.' + ) + resolve() + }, timeoutMs) + + const debouncedResolve = debounce(() => { + observer.disconnect() + clearTimeout(mainTimeout) + resolve() + }, debounceMs) + + const observer = new MutationObserver(() => { + debouncedResolve() + }) + + const config = { + attributes: true, + childList: true, + subtree: true, + } + + observer.observe(document.body, config) + }) + }, + timeoutMs, + debounceMs + ) + async function retrievePage( url: string, logRecord: Record, @@ -177,105 +223,128 @@ async function retrievePage( } // set timezone for the page - if (timezone) { - await page.emulateTimezone(timezone) - } + if (process.env['USE_FIREFOX'] !== 'true') { + if (timezone) { + await page.emulateTimezone(timezone) + } - const client = await page.createCDPSession() + const client = await page.createCDPSession() - const downloadPath = path.resolve('./download_dir/') - await client.send('Page.setDownloadBehavior', { - behavior: 'allow', - downloadPath, - }) + const downloadPath = path.resolve('./download_dir/') + await client.send('Page.setDownloadBehavior', { + behavior: 'allow', + downloadPath, + }) - // intercept request when response headers was received - await client.send('Network.setRequestInterception', { - patterns: [ - { - urlPattern: '*', - resourceType: 'Document', - interceptionStage: 'HeadersReceived', - }, - ], - }) + // intercept request when response headers was received + await client.send('Network.setRequestInterception', { + patterns: [ + { + urlPattern: '*', + resourceType: 'Document', + interceptionStage: 'HeadersReceived', + }, + ], + }) - client.on( - 'Network.requestIntercepted', - (e: Protocol.Network.RequestInterceptedEvent) => { - ;(async () => { - const headers = e.responseHeaders || {} + client.on( + 'Network.requestIntercepted', + (e: Protocol.Network.RequestInterceptedEvent) => { + ;(async () => { + const headers = e.responseHeaders || {} - const [contentType] = ( - headers['content-type'] || - headers['Content-Type'] || - '' - ) - .toLowerCase() - .split(';') - const obj: Protocol.Network.ContinueInterceptedRequestRequest = { - interceptionId: e.interceptionId, - } + const [contentType] = ( + headers['content-type'] || + headers['Content-Type'] || + '' + ) + .toLowerCase() + .split(';') + const obj: Protocol.Network.ContinueInterceptedRequestRequest = { + interceptionId: e.interceptionId, + } - if ( - e.responseStatusCode && - e.responseStatusCode >= 200 && - e.responseStatusCode < 300 - ) { - // We only check content-type on success responses - // as it doesn't matter what the content type is for things - // like redirects - if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) { - obj['errorReason'] = 'BlockedByClient' + if ( + e.responseStatusCode && + e.responseStatusCode >= 200 && + e.responseStatusCode < 300 + ) { + // We only check content-type on success responses + // as it doesn't matter what the content type is for things + // like redirects + if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) { + obj['errorReason'] = 'BlockedByClient' + } } - } - try { - await client.send('Network.continueInterceptedRequest', obj) - } catch { - // ignore - } - })() - } - ) + try { + await client.send('Network.continueInterceptedRequest', obj) + } catch { + // ignore + } + })() + } + ) + } /* * Disallow MathJax from running in Puppeteer and modifying the document, * we shall instead run it in our frontend application to transform any * mathjax content when present. */ - await page.setRequestInterception(true) + let requestCount = 0 + const failedRequests = new Set() + page.removeAllListeners('request') page.on('request', (request) => { ;(async () => { - if (request.resourceType() === 'font') { + if (request.isInterceptResolutionHandled()) return + // since .requestType() is not FF compatible, look for font files. + if (request.url().toLowerCase().includes('.woff2')) { // Disallow fonts from loading return request.abort() } + if (requestCount++ > 100) { return request.abort() } - if ( - request.resourceType() === 'script' && - request.url().toLowerCase().indexOf('mathjax') > -1 - ) { + + if (failedRequests.has(request.url())) { + return request.abort() + } + + if (request.url().toLowerCase().indexOf('mathjax') > -1) { return request.abort() } await request.continue() })() }) + await page.setRequestInterception(true) page.on('response', (response) => { + if (!response.ok()) { + console.log('Failed request', response.url()) + failedRequests.add(response.url()) + } + if (response.headers()['content-type'] === 'application/pdf') { lastPdfUrl = response.url() } }) + console.log('Trying to load page, for 30 seconds') + const response = await page.goto(url, { timeout: 30 * 1000, - waitUntil: ['networkidle0'], + waitUntil: ['load'], }) + + console.log('Waited for content to load, waiting for DOM to settle.') + await waitForDOMToSettle(page) + // Just wait for a few seconds to allow the dom to resolve. + // await new Promise((r) => setTimeout(r, 2500)) + if (!response) { throw new Error('No response from page') } diff --git a/packages/rss-handler/Dockerfile b/packages/rss-handler/Dockerfile index d7f222ba07..8b3c10b385 100644 --- a/packages/rss-handler/Dockerfile +++ b/packages/rss-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/rule-handler/Dockerfile b/packages/rule-handler/Dockerfile index a4d84a92bd..b5559026be 100644 --- a/packages/rule-handler/Dockerfile +++ b/packages/rule-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/text-to-speech/Dockerfile b/packages/text-to-speech/Dockerfile index 17a501d5c5..a075211d06 100644 --- a/packages/text-to-speech/Dockerfile +++ b/packages/text-to-speech/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16 +FROM node:22.12 # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/thumbnail-handler/Dockerfile b/packages/thumbnail-handler/Dockerfile index f28b55ea7d..c2c642af7a 100644 --- a/packages/thumbnail-handler/Dockerfile +++ b/packages/thumbnail-handler/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.16-alpine +FROM node:22.12-alpine # Run everything after as non-privileged user. WORKDIR /app diff --git a/packages/web/.eslintrc b/packages/web/.eslintrc index c5a8fdcd83..30287e0a1f 100644 --- a/packages/web/.eslintrc +++ b/packages/web/.eslintrc @@ -21,6 +21,7 @@ "ignorePatterns": ["next.config.js", "jest.config.js"], "rules": { "functional/no-mixed-type": 0, - "react/react-in-jsx-scope": 0 + "react/react-in-jsx-scope": 0, + "@typescript-eslint/ban-ts-comment" : 0 } } diff --git a/packages/web/Dockerfile b/packages/web/Dockerfile index 5846e937fa..d9ea7d133e 100644 --- a/packages/web/Dockerfile +++ b/packages/web/Dockerfile @@ -1,7 +1,7 @@ # Note this docker file is meant for local testing # and not for production. -FROM node:18.16-alpine as builder +FROM node:22.12-alpine as builder ENV NODE_OPTIONS=--max-old-space-size=8192 ARG APP_ENV ARG BASE_URL @@ -12,7 +12,7 @@ ENV NEXT_PUBLIC_BASE_URL=$BASE_URL ENV NEXT_PUBLIC_SERVER_BASE_URL=$SERVER_BASE_URL ENV NEXT_PUBLIC_HIGHLIGHTS_BASE_URL=$HIGHLIGHTS_BASE_URL -RUN apk add g++ make python3 +RUN apk add g++ make python3 py3-setuptools WORKDIR /app @@ -32,7 +32,7 @@ RUN echo "module.exports = {}" > ./packages/web/next.config.js RUN yarn workspace @omnivore/web build -FROM node:18.16-alpine as runner +FROM node:22.12-alpine as builder LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" ENV NODE_ENV production diff --git a/packages/web/Dockerfile-self b/packages/web/Dockerfile-self new file mode 100644 index 0000000000..c98c72f866 --- /dev/null +++ b/packages/web/Dockerfile-self @@ -0,0 +1,52 @@ +# Note this docker file is meant for local testing +# and not for production. + +FROM node:22.12-alpine as builder +ENV NODE_OPTIONS=--max-old-space-size=8192 +ARG APP_ENV +ARG BASE_URL +ARG SERVER_BASE_URL +ARG HIGHLIGHTS_BASE_URL +ENV NEXT_PUBLIC_APP_ENV=$APP_ENV +ENV NEXT_PUBLIC_BASE_URL=$BASE_URL +ENV NEXT_PUBLIC_SERVER_BASE_URL=$SERVER_BASE_URL +ENV NEXT_PUBLIC_HIGHLIGHTS_BASE_URL=$HIGHLIGHTS_BASE_URL + +RUN apk add g++ make python3 py3-setuptools + +WORKDIR /app + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/web/package.json ./packages/web/package.json + +RUN yarn install --pure-lockfile +ADD /packages/web ./packages/web + +COPY ./packages/web/next.config.self.js ./packages/web/next.config.js +RUN yarn workspace @omnivore/web build + + +FROM node:22.12-alpine as runner +LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" + +ENV NODE_ENV production +ENV PORT=8080 +ENV NEXT_TELEMETRY_DISABLED 1 + +WORKDIR /app + +COPY --from=builder /app/packages/web/next.config.js /app/packages/web/next.config.js +COPY --from=builder /app/packages/web/public/ /app/packages/web/public/ +COPY --from=builder /app/packages/web/.next/ /app/packages/web/.next/ +COPY --from=builder /app/packages/web/package.json /app/packages/web/package.json +COPY --from=builder /app/packages/web/node_modules /app/packages/web/node_modules +COPY --from=builder /app/node_modules /app/node_modules +COPY --from=builder /app/package.json /app/package.json + +EXPOSE 8080 +CMD ["yarn", "workspace", "@omnivore/web", "start"] diff --git a/packages/web/components/elements/ShutdownBanner.tsx b/packages/web/components/elements/ShutdownBanner.tsx deleted file mode 100644 index ea7f3ea2ef..0000000000 --- a/packages/web/components/elements/ShutdownBanner.tsx +++ /dev/null @@ -1,52 +0,0 @@ -import { usePersistedState } from '../../lib/hooks/usePersistedState' -import { CloseButton } from './CloseButton' -import { HStack, SpanBox } from './LayoutPrimitives' - -export const ShutdownBanner = () => { - const [ - showMaintenanceMode, - setShowMaintenanceMode, - isLoadingShowMaintenanceMode, - ] = usePersistedState({ - key: 'show-shutdown-mode', - isSessionStorage: true, - initialValue: true, - }) - return ( - <> - {!isLoadingShowMaintenanceMode && showMaintenanceMode && ( - - Omnivore is shutting down on Nov. 30th. - - Read More - - - { - setShowMaintenanceMode(false) - }} - /> - - )} - - ) -} diff --git a/packages/web/components/patterns/HighlightBar.tsx b/packages/web/components/patterns/HighlightBar.tsx index 4fc42080db..5e21a393ee 100644 --- a/packages/web/components/patterns/HighlightBar.tsx +++ b/packages/web/components/patterns/HighlightBar.tsx @@ -45,9 +45,10 @@ export function HighlightBar(props: HighlightBarProps): JSX.Element { borderRadius: '5px', border: '1px solid $thHighlightBar', boxShadow: `0px 4px 4px 0px rgba(0, 0, 0, 0.15)`, - + zIndex: 999, ...(props.displayAtBottom && { bottom: 'calc(38px + env(safe-area-inset-bottom, 40px))', + left: `calc(50% - ${(props.isNewHighlight ? 295 : 323) / 2}px)` }), ...(props.displayAtBottom && { '@smDown': { diff --git a/packages/web/components/templates/AddLinkModal.tsx b/packages/web/components/templates/AddLinkModal.tsx index 309bf2024e..0eec8a252e 100644 --- a/packages/web/components/templates/AddLinkModal.tsx +++ b/packages/web/components/templates/AddLinkModal.tsx @@ -62,17 +62,16 @@ export function AddLinkModal(props: AddLinkModalProps): JSX.Element { }} > - {/* */} + /> {selectedTab == 'link' && } - {/* {selectedTab == 'feed' && } + {selectedTab == 'feed' && } {selectedTab == 'opml' && } {selectedTab == 'pdf' && } - {selectedTab == 'import' && } */} @@ -550,6 +549,7 @@ const UploadPad = (props: UploadPadProps): JSX.Element => { withCredentials: false, headers: { 'Content-Type': file.file.type, + 'origin': 'http://localhost:3000' }, onUploadProgress: (p) => { if (!p.total) { diff --git a/packages/web/components/templates/AuthLayout.tsx b/packages/web/components/templates/AuthLayout.tsx index b64d3cfb61..bc93888946 100644 --- a/packages/web/components/templates/AuthLayout.tsx +++ b/packages/web/components/templates/AuthLayout.tsx @@ -1,5 +1,4 @@ import { Box, VStack, HStack } from '../elements/LayoutPrimitives' -import { ShutdownBanner } from '../elements/ShutdownBanner' import { OmnivoreNameLogo } from '../elements/images/OmnivoreNameLogo' import { theme } from '../tokens/stitches.config' import { GoogleReCaptchaProvider } from '@google-recaptcha/react' diff --git a/packages/web/components/templates/LoginLayout.tsx b/packages/web/components/templates/LoginLayout.tsx index 7c27b390d2..468de57771 100644 --- a/packages/web/components/templates/LoginLayout.tsx +++ b/packages/web/components/templates/LoginLayout.tsx @@ -10,12 +10,10 @@ import type { LoginFormProps } from './LoginForm' import { OmnivoreNameLogo } from '../elements/images/OmnivoreNameLogo' import featureFullWidthImage from '../../public/static/images/login/login-feature-image-full.png' -import { ShutdownBanner } from '../elements/ShutdownBanner' export function LoginLayout(props: LoginFormProps): JSX.Element { return ( <> - } largerLayoutNode={} diff --git a/packages/web/components/templates/NavigationLayout.tsx b/packages/web/components/templates/NavigationLayout.tsx index d5bbbf4a2f..df5c1a4e46 100644 --- a/packages/web/components/templates/NavigationLayout.tsx +++ b/packages/web/components/templates/NavigationLayout.tsx @@ -21,9 +21,6 @@ import useWindowDimensions from '../../lib/hooks/useGetWindowDimensions' import { useHandleAddUrl } from '../../lib/hooks/useHandleAddUrl' import { useGetViewer } from '../../lib/networking/viewer/useGetViewer' import { useQueryClient } from '@tanstack/react-query' -import { usePersistedState } from '../../lib/hooks/usePersistedState' -import { CloseButton } from '../elements/CloseButton' -import { ShutdownBanner } from '../elements/ShutdownBanner' export type NavigationSection = | 'home' @@ -207,7 +204,6 @@ const Header = (props: HeaderProps): JSX.Element => { height: '58px', }} > -