-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdgaml_build_domain_corpus.yml
118 lines (104 loc) · 3.43 KB
/
dgaml_build_domain_corpus.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
---
# The purpose of this Ansible playbook is to create and populate an Elasticsearch
# index with "normal" and "dga" domain, to use as a corpus for Machine Learning
#
# All this was inspired by a blog series from @Winterflower
# See: https://github.com/elastic/examples/tree/master/Machine%20Learning/DGA%20Detection
#
- hosts: all
vars:
workdir: "{{ lookup('env', 'TMPDIR')|default('/var/tmp')|regex_replace('/+$','') }}/dgaml-demo"
corpus_index_prefix: "domain_corpus"
day_range: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28] #Valid for all possible months
month_range: [1,2,3,4,5,6,7,8,9,10,11,12]
num_max: 4294967295
ansible_connection: local
ansible_python_interpreter: "{{ ansible_playbook_python }}"
tasks:
- name: Fail check
fail:
msg: Requires el_cloud_id and el_cloud_pw
when: not el_cloud_id or not el_cloud_pw
tags:
- always
- name: Set variables
set_fact:
elasticsearch_url: "https://{{ el_cloud_info[1] }}.{{ el_cloud_info[0] }}"
kibana_url: "https://{{ el_cloud_info[2] }}.{{ el_cloud_info[0] }}"
corpus_index: "{{ corpus_index|default(corpus_index_prefix~'-'~ansible_date_time.date) }}"
vars:
el_cloud_info: "{{ (el_cloud_id|regex_replace('^[^:]*:','')|b64decode).split('$')|list }}"
failed_when: el_cloud_info|length < 3
tags:
- always
- name: "Check working dir: {{ workdir }}"
file: path="{{ workdir }}" state=directory mode=0775
tags:
- always
- name: Remove old index
uri:
url: "{{ elasticsearch_url }}/{{ corpus_index }}"
method: DELETE
headers:
Accept: "application/json"
url_username: elastic
url_password: "{{ el_cloud_pw }}"
force_basic_auth: true
timeout: 60
status_code: [200,201,404]
return_content: false
register: result
changed_when: result.status != 404
tags:
- rem_old
- name: Dump result
debug: var=result verbosity=1
tags:
- rem_old
- name: Ingest Alexa Top 1M
import_tasks: "tasks/ingest_top1m.yml"
vars:
top1m_url: http://s3.amazonaws.com/alexa-static/top-1m.csv.zip
top1m_name: Alexa
tags:
- alexa
- name: Ingest Umbrella Top 1M
import_tasks: "tasks/ingest_top1m.yml"
vars:
top1m_url: http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip
top1m_name: Umbrella
tags:
- umbrella
#NOTE This should always be ingested last, so that DGA domains in the Top1M are marked as DGA
- name: Ingest DGAs
import_tasks: "tasks/ingest_dga_domains.yml"
vars:
# Big Thanks to "@baderj" for https://github.com/baderj/domain_generation_algorithms
dga_gen_url: https://github.com/baderj/domain_generation_algorithms/archive/master.zip
tags:
- dga
- name: "Add Kibana index pattern for {{ corpus_index }}"
uri:
url: "{{ kibana_url }}/api/saved_objects/index-pattern/{{ corpus_index }}"
method: POST
body_format: json
body:
attributes:
title: "{{ corpus_index }}"
headers:
kbn-xsrf: true
Accept: "application/json"
url_username: elastic
url_password: "{{ el_cloud_pw }}"
force_basic_auth: true
timeout: 60
status_code: [200,201,409]
return_content: false
changed_when: result.status != 409
register: result
tags:
- index_pattern
- name: Dump result
debug: var=result verbosity=1
tags:
- index_pattern