From 08028aab98e5906f56f7a9eabdd4f399910f8f61 Mon Sep 17 00:00:00 2001 From: Thomas Dupas Date: Fri, 30 Sep 2022 19:20:59 +0200 Subject: [PATCH] Fixes #191: add ability to auto-discover nameservers --- .../sidnlabs/entrada/ScheduledExecution.java | 33 ++++++++++++++++--- .../nl/sidnlabs/entrada/file/FileManager.java | 2 ++ .../entrada/file/HDFSFileManagerImpl.java | 6 ++++ .../entrada/file/LocalFileManagerImpl.java | 16 +++++++++ .../entrada/file/S3FileManagerImpl.java | 26 +++++++++++++++ src/main/resources/application.properties | 1 + 6 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/main/java/nl/sidnlabs/entrada/ScheduledExecution.java b/src/main/java/nl/sidnlabs/entrada/ScheduledExecution.java index 3626f23..baeffaa 100644 --- a/src/main/java/nl/sidnlabs/entrada/ScheduledExecution.java +++ b/src/main/java/nl/sidnlabs/entrada/ScheduledExecution.java @@ -2,6 +2,8 @@ import java.util.Arrays; import java.util.List; +import java.util.Objects; +import nl.sidnlabs.entrada.file.FileManagerFactory; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -27,17 +29,23 @@ public class ScheduledExecution { @Value("${entrada.nameservers}") private String servers; + @Value("${entrada.location.input}") + private String inputLocation; + @Autowired private GeoIPService geoIPService; private Timer processTimer; + private FileManagerFactory fileManagerFactory; + public ScheduledExecution(ServerContext serverCtx, ApplicationContext ctx, MeterRegistry registry, - SharedContext sharedContext, List fileManagers) { + SharedContext sharedContext, FileManagerFactory fileManagerFactory, List fileManagers) { this.serverCtx = serverCtx; this.ctx = ctx; this.sharedContext = sharedContext; + this.fileManagerFactory = fileManagerFactory; this.fileManagers = fileManagers; processTimer = registry.timer("processor.execution.time"); } @@ -64,11 +72,24 @@ public void run() { if (StringUtils.isBlank(servers)) { // no individual servers configured, assume the pcap data is in the input location root dir runForServer("", ctx.getBean(PacketProcessor.class)); + } else if (Objects.equals(servers, "auto")) { + // auto scanning for folder configured, deducing server name from folder names + FileManager fm = fileManagerFactory.getFor(inputLocation); + + log.info("Scan for directories in: {}", inputLocation); + + inputLocation = StringUtils + .appendIfMissing(inputLocation, System.getProperty("file.separator"), + System.getProperty("file.separator")); + + List folders = fm.folders(inputLocation); + + log.info("Server directories found to process: {}", folders); + runForServer(folders.stream()); + } else { // individual servers configured, process each server directory - Arrays - .stream(StringUtils.split(servers, ",")) - .forEach(s -> runForServer(s, ctx.getBean(PacketProcessor.class))); + runForServer(Arrays.stream(StringUtils.split(servers, ","))); } // cleanup filesystems, make sure all cached data and locked files are cleanup up @@ -79,6 +100,10 @@ public void run() { log.info("Completed loading name server data"); } + private void runForServer(Stream servers) { + servers.forEach(s -> runForServer(s, ctx.getBean(PacketProcessor.class))); + } + private void runForServer(String server, PacketProcessor processor) { log.info("Start loading data for: {}", server); diff --git a/src/main/java/nl/sidnlabs/entrada/file/FileManager.java b/src/main/java/nl/sidnlabs/entrada/file/FileManager.java index deb3522..10763a5 100644 --- a/src/main/java/nl/sidnlabs/entrada/file/FileManager.java +++ b/src/main/java/nl/sidnlabs/entrada/file/FileManager.java @@ -22,6 +22,8 @@ public interface FileManager { List files(String location, boolean recursive, String... filter); + List folders(String location); + Optional open(String location); /** diff --git a/src/main/java/nl/sidnlabs/entrada/file/HDFSFileManagerImpl.java b/src/main/java/nl/sidnlabs/entrada/file/HDFSFileManagerImpl.java index 1f3d45c..1c3dfbe 100644 --- a/src/main/java/nl/sidnlabs/entrada/file/HDFSFileManagerImpl.java +++ b/src/main/java/nl/sidnlabs/entrada/file/HDFSFileManagerImpl.java @@ -141,6 +141,12 @@ public List files(String dir, boolean recursive, String... filter) { return Collections.emptyList(); } + @Override + public List folders(String dir) { +// TODO: see how to list first level directories on hadoop; since we don't run on hadoop we can't test it + return Collections.emptyList(); + } + private boolean checkFilter(String file, List filters) { if (filters.isEmpty()) { return true; diff --git a/src/main/java/nl/sidnlabs/entrada/file/LocalFileManagerImpl.java b/src/main/java/nl/sidnlabs/entrada/file/LocalFileManagerImpl.java index 3ae3aec..d25c96a 100644 --- a/src/main/java/nl/sidnlabs/entrada/file/LocalFileManagerImpl.java +++ b/src/main/java/nl/sidnlabs/entrada/file/LocalFileManagerImpl.java @@ -70,6 +70,22 @@ public List files(String dir, boolean recursive, String... filter) { .collect(Collectors.toList()); } + @Override + public List folders(String dir) { + + File fDir = new File(dir); + if (!fDir.isDirectory()) { + log.error("{} is not a valid directory", dir); + return Collections.emptyList(); + } + + return Arrays + .stream(fDir.listFiles()) + .filter(File::isDirectory) + .map(File::getName) + .collect(Collectors.toList()); + } + private boolean checkFilter(String file, List filters) { if (filters.isEmpty()) { return true; diff --git a/src/main/java/nl/sidnlabs/entrada/file/S3FileManagerImpl.java b/src/main/java/nl/sidnlabs/entrada/file/S3FileManagerImpl.java index 6f80654..7cd9961 100644 --- a/src/main/java/nl/sidnlabs/entrada/file/S3FileManagerImpl.java +++ b/src/main/java/nl/sidnlabs/entrada/file/S3FileManagerImpl.java @@ -112,6 +112,32 @@ public List files(String location, boolean recursive, String... filter) .collect(Collectors.toList()); } + @Override + public List folders(String location) { + List folders = new ArrayList<>(); + + Optional details = S3Details.from(location); + if (!details.isPresent()) { + return folders; + } + + ListObjectsV2Request lor = new ListObjectsV2Request() + .withBucketName(details.get().getBucket()) + .withPrefix(details.get().getKey()) + .withDelimiter("/"); + + ListObjectsV2Result listing = amazonS3.listObjectsV2(lor); + listing.getCommonPrefixes().stream().forEach(os -> folders.add( + StringUtils.replace( + StringUtils.removeEnd(os, "/"), + details.get().getKey(), + "" + ) + )); + + return folders; + } + private boolean checkFilter(String file, List filters) { if (filters.isEmpty()) { return true; diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 41b6ad1..6e82ca0 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -152,6 +152,7 @@ entrada.engine=local # List of name server sub-directories in the inout directory # each server sub-directories can have format _ # the ns and anycast_site parts will be extracted and save with the DNS data +# if set to auto it will process all folders in directory and deduce server name from folder name entrada.nameservers= # name of the entrada database and tables that should be created entrada.database.name=entrada