Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1922986 - Create script that exports BMO data as JSON suitable for import into a BigQuery instance in GCP #2370

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ jobs:
name: run bmo specific tests
command: |
[[ -f build_info/only_version_changed.txt ]] && exit 0
docker-compose -f docker-compose.test.yml run --build bmo.test test_bmo -q -f t/bmo/*.t
docker-compose -f docker-compose.test.yml run --build bmo.test test_bmo -q -f t/bmo/*.t extensions/*/t/bmo/*.t
- *store_log

workflows:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
- name: Build Docker test images
run: docker-compose -f docker-compose.test.yml build
- name: Run bmo specific tests
run: docker-compose -f docker-compose.test.yml run -e CI=1 bmo.test test_bmo -q -f t/bmo/*.t
run: docker-compose -f docker-compose.test.yml run -e CI=1 bmo.test test_bmo -q -f t/bmo/*.t extensions/*/t/bmo/*.t

test_selenium_1:
runs-on: ubuntu-latest
Expand Down
6 changes: 6 additions & 0 deletions conf/checksetup_answers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ $answer{'sitemapindex_google_host'} = 'gcs';
$answer{'sitemapindex_google_bucket'} = 'sitemapindex';
$answer{'sitemapindex_google_service_account'} = 'test';

$answer{'bmo_etl_enabled'} = 1;
$answer{'bmo_etl_base_url'} = 'http://bq:9050';
$answer{'bmo_etl_service_account'} = 'test';
$answer{'bmo_etl_project_id'} = 'test';
$answer{'bmo_etl_dataset_id'} = 'bugzilla';

$answer{'duo_uri'} = 'http://localhost:8001';
$answer{'duo_client_id'} = '6rZ3KnrL04uyGjLd8foO';
$answer{'duo_client_secret'} = '3vg6cm0Gj0DpC6ZJACXdZ1NrVRi1AhkwjfXnlFaJ';
11 changes: 11 additions & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ services:
- memcached
- s3
- gcs
- bq

externalapi.test:
build: *build_bmo
Expand Down Expand Up @@ -69,3 +70,13 @@ services:
- ./docker/gcs/attachments:/data/attachments
- ./docker/gcs/sitemapindex:/data/sitemapindex
- ./docker/gcs/mining:/data/mining

bq:
build:
context: ./docker/bigquery
dockerfile: Dockerfile
ports:
- 9050:9050
working_dir: /work
command: |
--project=test --data-from-yaml=/data.yaml
28 changes: 28 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ services:
- memcached
- s3
- gcs
- bigquery
- externalapi.test
- bq
ports:
- 8000:8000

Expand Down Expand Up @@ -136,17 +138,43 @@ services:
- bmo-gcs-sitemapindex:/data/sitemapindex
- bmo-gcs-mining:/data/mining

bigquery:
platform: linux/x86_64
image: ghcr.io/goccy/bigquery-emulator:latest
ports:
- 9050:9050
volumes:
- bmo-bigquery-data:/work
- ./docker/bigquery/data.yaml:/work/data.yaml
working_dir: /work
command: |
--project=test --data-from-yaml=/work/data.yaml --log-level=debug

externalapi.test:
platform: linux/x86_64
build: *bmo_build
entrypoint: perl /app/external_test_api.pl daemon -l http://*:8001
ports:
- 8001:8001

bq:
platform: linux/x86_64
build:
context: ./docker/bigquery
dockerfile: Dockerfile
ports:
- 9050:9050
volumes:
- bmo-bq-data:/work
working_dir: /work
command: |
--project=test --data-from-yaml=/data.yaml --log-level=debug

dklawren marked this conversation as resolved.
Show resolved Hide resolved
volumes:
bmo-mysql-db:
bmo-data-dir:
bmo-s3-data:
bmo-gcs-attachments:
bmo-gcs-sitemapindex:
bmo-gcs-mining:
bmo-bq-data:
3 changes: 3 additions & 0 deletions docker/bigquery/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM ghcr.io/goccy/bigquery-emulator:0.6.5

COPY data.yaml /data.yaml
185 changes: 185 additions & 0 deletions docker/bigquery/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
projects:
- id: test
datasets:
- id: bugzilla
tables:
- id: bugs
columns:
- name: id
type: INTEGER
- name: assignee_id
type: INTEGER
- name: url
type: STRING
- name: severity
type: STRING
- name: status
type: STRING
- name: type
type: STRING
- name: crash_signature
type: STRING
- name: component
type: STRING
- name: creation_ts
type: TIMESTAMP
- name: updated_ts
type: TIMESTAMP
- name: op_sys
type: STRING
- name: priority
type: STRING
- name: product
type: STRING
- name: platform
type: STRING
- name: reporter_id
type: INTEGER
- name: resolution
type: STRING
- name: summary
type: STRING
- name: whiteboard
type: STRING
- name: milestone
type: STRING
- name: version
type: STRING
- name: team_name
type: STRING
- name: group
type: STRING
- name: classification
type: STRING
- name: is_public
type: BOOLEAN
- name: comment_count
type: INTEGER
- name: cc_count
type: INTEGER
- name: vote_count
type: INTEGER
- name: snapshot_date
type: DATE
- id: attachments
columns:
- name: id
type: INT64
- name: bug_id
type: INT64
- name: creation_ts
type: TIMESTAMP
- name: description
type: STRING
- name: filename
type: STRING
- name: is_obsolete
type: BOOL
- name: content_type
type: STRING
- name: updated_ts
type: TIMESTAMP
- name: submitter_id
type: INT64
- name: snapshot_date
type: DATE
- id: flags
columns:
- name: attachment_id
type: INT64
- name: bug_id
type: INT64
- name: creation_ts
type: TIMESTAMP
- name: updated_ts
type: TIMESTAMP
- name: requestee_id
type: INT64
- name: setter_id
type: INT64
- name: name
type: STRING
- name: value
type: STRING
- name: snapshot_date
type: DATE
- id: tracking_flags
columns:
- name: bug_id
type: INT64
- name: name
type: STRING
- name: value
type: STRING
- name: snapshot_date
type: DATE
- id: keywords
columns:
- name: bug_id
type: INT64
- name: keyword
type: STRING
- name: snapshot_date
type: DATE
- id: see_also
columns:
- name: bug_id
type: INT64
- name: url
type: STRING
- name: snapshot_date
type: DATE
- id: bug_mentors
columns:
- name: bug_id
type: INT64
- name: user_id
type: INT64
- name: snapshot_date
type: DATE
- id: bug_dependencies
columns:
- name: bug_id
type: INT64
- name: depends_on_id
type: INT64
- name: snapshot_date
type: DATE
- id: bug_regressions
columns:
- name: bug_id
type: INT64
- name: regresses_id
type: INT64
- name: snapshot_date
type: DATE
- id: bug_duplicates
columns:
- name: bug_id
type: INT64
- name: duplicate_of_id
type: INT64
- name: snapshot_date
type: DATE
- id: users
columns:
- name: id
type: INT64
- name: last_seen
type: TIMESTAMP
- name: email
type: STRING
- name: nick
type: STRING
- name: name
type: STRING
- name: is_staff
type: BOOL
- name: is_trusted
type: BOOL
- name: ldap_email
type: STRING
- name: is_new
type: BOOL
- name: snapshot_date
type: DATE
42 changes: 42 additions & 0 deletions extensions/BMO/Extension.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,21 @@ sub db_schema_abstract_schema {
],
INDEXES => [job_last_run_name_idx => {FIELDS => ['name'], TYPE => 'UNIQUE',},],
};
$args->{schema}->{bmo_etl_cache} = {
FIELDS => [
id => {TYPE => 'INT3', NOTNULL => 1,},
snapshot_date => {TYPE => 'DATETIME', NOTNULL => 1,},
table_name => {TYPE => 'VARCHAR(100)', NOTNULL => 1,},
data => {TYPE => 'LONGBLOB', NOTNULL => 1,},
],
INDEXES =>
[bmo_etl_cache_idx => {FIELDS => ['id', 'snapshot_date', 'table_name']}],
};
$args->{schema}->{bmo_etl_locked} = {
FIELDS => [
value => {TYPE => 'VARCHAR(20)', NOTNULL => 1,},
],
};
}

sub install_update_db {
Expand Down Expand Up @@ -2588,6 +2603,33 @@ sub config_modify_panels {
name => 'enable_triaged_keyword',
type => 'b',
};
push @{$args->{panels}->{reports}->{params}},
{
name => 'bmo_etl_enabled',
type => 'b',
default => 0,
};
push @{$args->{panels}->{reports}->{params}},
{
name => 'bmo_etl_base_url',
type => 't',
};
push @{$args->{panels}->{reports}->{params}},
{
name => 'bmo_etl_service_account',
type => 't',
};
push @{$args->{panels}->{reports}->{params}},
{
name => 'bmo_etl_project_id',
type => 't',
};
push @{$args->{panels}->{reports}->{params}},
{
name => 'bmo_etl_dataset_id',
type => 't',
};

}

sub comment_after_add_tag {
Expand Down
Loading
Loading