From 15593a81dd93698909b73a172abd751d6f6ea7ed Mon Sep 17 00:00:00 2001 From: dailiwei <43886815+LiweiDai@users.noreply.github.com> Date: Mon, 19 Aug 2019 04:59:19 -0500 Subject: [PATCH 1/6] txt data preparation for VOC, consistent with author's code, see description below. --- voc_07_12_xml_to_txt_wh.py | 83 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 voc_07_12_xml_to_txt_wh.py diff --git a/voc_07_12_xml_to_txt_wh.py b/voc_07_12_xml_to_txt_wh.py new file mode 100644 index 00000000..3208d99c --- /dev/null +++ b/voc_07_12_xml_to_txt_wh.py @@ -0,0 +1,83 @@ +import xml.etree.ElementTree as ET +import os + +sets = [('2019', 'train', 'train_list', 'train_l'), ('2019', 'val', 'val_list', 'val_l'), + ('2019', 'test', 'test_list', 'test_l')] + +classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] + + +def convert_annotation(year, image_id, list_file, jpg_label): + in_file = open('my_imgs&labels/%s/labels/%s/%s.xml' % (year, jpg_label, image_id)) + tree = ET.parse(in_file) + root = tree.getroot() + xmlsize = root.find('size') + d = (int(xmlsize.find('width').text), int(xmlsize.find('height').text)) + list_file.write(" " + " ".join([str(c) for c in d])) + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult) == 1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), + int(xmlbox.find('ymax').text)) + list_file.write(" " + str(cls_id) + " " + " ".join([str(a) for a in b])) + + +wd = os.getcwd() + +for year, image_set, name_list, jpg_label in sets: + data_base_dir = ("my_imgs&labels/%s/imgs/%s" % (year, image_set)) + file_list = [] + write_file_name = ('my_imgs&labels/%s/imgs/%s/%s.txt' % (year, image_set, name_list)) + write_file = open(write_file_name, "w") + for file in os.listdir(data_base_dir): + if file.endswith(".jpg"): + index = file.rfind('.') + file = file[:index] + file_list.append(file) + number_of_lines = len(file_list) + for current_line in range(number_of_lines): + write_file.write(file_list[current_line] + '\n') + write_file.close() + image_ids = open('my_imgs&labels/%s/imgs/%s/%s.txt' % (year, image_set, name_list)).read().strip().split() + list_file = open('my_imgs&labels/%s/final_datas_wh/%s.txt' % (year, image_set), 'w') + line_ind = 0 + for image_id in image_ids: + list_file.write('%d %s/my_imgs&labels/%s/imgs/%s/%s.jpg' % (line_ind, wd, year, image_set, image_id)) + convert_annotation(year, image_id, list_file, jpg_label) + list_file.write('\n') + line_ind += 1 + list_file.close() + +txt_path_train = './my_imgs&labels/2019/final_datas_wh/train.txt' +txt_path_val = './my_imgs&labels/2019/final_datas_wh/val.txt' +txt_path_test = './my_imgs&labels/2019/final_datas_wh/test.txt' + +with open(txt_path_train, 'r') as fileread: + while True: + line = fileread.readline() + if not line: + break + cur_line_num = line.strip().split(' ') + if len(cur_line_num) < 5: + print(cur_line_num[0]) +with open(txt_path_val, 'r') as fileread: + while True: + line = fileread.readline() + if not line: + break + cur_line_num = line.strip().split(' ') + if len(cur_line_num) < 5: + print(cur_line_num[0]) +with open(txt_path_test, 'r') as fileread: + while True: + line = fileread.readline() + if not line: + break + cur_line_num = line.strip().split(' ') + if len(cur_line_num) < 5: + print(cur_line_num[0]) From 9060c1ea9ed73c418d7b0794ff163dcf5e488e63 Mon Sep 17 00:00:00 2001 From: dailiwei <43886815+LiweiDai@users.noreply.github.com> Date: Mon, 19 Aug 2019 18:17:25 +0800 Subject: [PATCH 2/6] Update voc_07_12_xml_to_txt_wh.py --- voc_07_12_xml_to_txt_wh.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/voc_07_12_xml_to_txt_wh.py b/voc_07_12_xml_to_txt_wh.py index 3208d99c..f9ef41c0 100644 --- a/voc_07_12_xml_to_txt_wh.py +++ b/voc_07_12_xml_to_txt_wh.py @@ -81,3 +81,12 @@ def convert_annotation(year, image_id, list_file, jpg_label): cur_line_num = line.strip().split(' ') if len(cur_line_num) < 5: print(cur_line_num[0]) + + # file path like: + # my_imgs%labels->2019->{final_datas_wh, imgs, labels}->final_datas_wh->{train.txt, val.txt, test.txt} + # ->imgs->{train, val, test} + # ->labels->{train_l, val_l, test_l} + # final_datas_wh is what you need in the end. + # imgs is where you put your images in, and labels is the same thing. + # note that you must be sure of making the same name of every iamge and label. + # finally, you got the txt files, enjoy your life!!! From 523489a956193c4cb5569ba6037ea23112cde72e Mon Sep 17 00:00:00 2001 From: dailiwei <43886815+LiweiDai@users.noreply.github.com> Date: Mon, 19 Aug 2019 18:18:21 +0800 Subject: [PATCH 3/6] Update voc_07_12_xml_to_txt_wh.py --- voc_07_12_xml_to_txt_wh.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/voc_07_12_xml_to_txt_wh.py b/voc_07_12_xml_to_txt_wh.py index f9ef41c0..c5f22518 100644 --- a/voc_07_12_xml_to_txt_wh.py +++ b/voc_07_12_xml_to_txt_wh.py @@ -84,8 +84,8 @@ def convert_annotation(year, image_id, list_file, jpg_label): # file path like: # my_imgs%labels->2019->{final_datas_wh, imgs, labels}->final_datas_wh->{train.txt, val.txt, test.txt} - # ->imgs->{train, val, test} - # ->labels->{train_l, val_l, test_l} + # ->imgs->{train, val, test} + # ->labels->{train_l, val_l, test_l} # final_datas_wh is what you need in the end. # imgs is where you put your images in, and labels is the same thing. # note that you must be sure of making the same name of every iamge and label. From b2d3444e950343d752c07e225f7d3bcb3dabc859 Mon Sep 17 00:00:00 2001 From: dailiwei <43886815+LiweiDai@users.noreply.github.com> Date: Mon, 19 Aug 2019 18:19:20 +0800 Subject: [PATCH 4/6] Update voc_07_12_xml_to_txt_wh.py --- voc_07_12_xml_to_txt_wh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/voc_07_12_xml_to_txt_wh.py b/voc_07_12_xml_to_txt_wh.py index c5f22518..b733283e 100644 --- a/voc_07_12_xml_to_txt_wh.py +++ b/voc_07_12_xml_to_txt_wh.py @@ -88,5 +88,5 @@ def convert_annotation(year, image_id, list_file, jpg_label): # ->labels->{train_l, val_l, test_l} # final_datas_wh is what you need in the end. # imgs is where you put your images in, and labels is the same thing. - # note that you must be sure of making the same name of every iamge and label. + # note that you must be sure of making the same name of every image and label. # finally, you got the txt files, enjoy your life!!! From 111e1bd2b9c4024bb33677ee8ab0ff58fbaebe9f Mon Sep 17 00:00:00 2001 From: dailiwei <43886815+LiweiDai@users.noreply.github.com> Date: Tue, 20 Aug 2019 17:46:27 +0800 Subject: [PATCH 5/6] Update voc_07_12_xml_to_txt_wh.py --- voc_07_12_xml_to_txt_wh.py | 1 + 1 file changed, 1 insertion(+) diff --git a/voc_07_12_xml_to_txt_wh.py b/voc_07_12_xml_to_txt_wh.py index b733283e..e0b37e12 100644 --- a/voc_07_12_xml_to_txt_wh.py +++ b/voc_07_12_xml_to_txt_wh.py @@ -90,3 +90,4 @@ def convert_annotation(year, image_id, list_file, jpg_label): # imgs is where you put your images in, and labels is the same thing. # note that you must be sure of making the same name of every image and label. # finally, you got the txt files, enjoy your life!!! + # more info: https://blog.csdn.net/qq_43322615/article/details/94567969 From 7141a6dc6ebd5c78e88d3408a42ac1adf0a3c20a Mon Sep 17 00:00:00 2001 From: dailiwei <43886815+LiweiDai@users.noreply.github.com> Date: Tue, 20 Aug 2019 17:50:23 +0800 Subject: [PATCH 6/6] Update voc_07_12_xml_to_txt_wh.py --- voc_07_12_xml_to_txt_wh.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/voc_07_12_xml_to_txt_wh.py b/voc_07_12_xml_to_txt_wh.py index e0b37e12..7e179766 100644 --- a/voc_07_12_xml_to_txt_wh.py +++ b/voc_07_12_xml_to_txt_wh.py @@ -57,6 +57,8 @@ def convert_annotation(year, image_id, list_file, jpg_label): txt_path_val = './my_imgs&labels/2019/final_datas_wh/val.txt' txt_path_test = './my_imgs&labels/2019/final_datas_wh/test.txt' +# next codelines is for 'unclear boxes' postprecessing. +# the lines shown is where you should delete, it's my suggestion. with open(txt_path_train, 'r') as fileread: while True: line = fileread.readline()