From 4252501dc506699be75b26fdda52fabd46aa1b5a Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Tue, 29 Oct 2024 02:11:30 -0400 Subject: [PATCH 1/4] WIP types --- src/gsheets_read.cpp | 18 ++++++++++++++++-- src/gsheets_requests.cpp | 2 +- test/sql/types.test | 22 ++++++++++++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 test/sql/types.test diff --git a/src/gsheets_read.cpp b/src/gsheets_read.cpp index 1002b9c..408a644 100644 --- a/src/gsheets_read.cpp +++ b/src/gsheets_read.cpp @@ -5,6 +5,7 @@ #include "duckdb/main/secret/secret_manager.hpp" #include "gsheets_requests.hpp" #include +#include namespace duckdb { @@ -40,9 +41,11 @@ void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataC const string& value = first_data_row[col]; if (value == "true" || value == "false") { column_types[col] = LogicalType::BOOLEAN; + } else if (value.find_first_not_of("0123456789") == string::npos) { + column_types[col] = LogicalType::INTEGER; } else if (value.find_first_not_of("0123456789.+-eE") == string::npos) { column_types[col] = LogicalType::DOUBLE; - } + } } } @@ -50,11 +53,20 @@ void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataC const auto& row = sheet_data.values[i]; for (idx_t col = 0; col < column_count; col++) { if (col < row.size()) { - const string& value = row[col]; + string value = row[col]; + + // Remove commas from the value for numeric types + if (column_types[col].id() != LogicalTypeId::VARCHAR) { + value.erase(std::remove(value.begin(), value.end(), ','), value.end()); + } + switch (column_types[col].id()) { case LogicalTypeId::BOOLEAN: output.SetValue(col, row_count, Value::BOOLEAN(value == "true")); break; + case LogicalTypeId::INTEGER: + output.SetValue(col, row_count, Value::INTEGER(std::stoi(value))); + break; case LogicalTypeId::DOUBLE: output.SetValue(col, row_count, Value::DOUBLE(std::stod(value))); break; @@ -147,6 +159,8 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind const string& value = first_data_row[i]; if (value == "true" || value == "false") { return_types.push_back(LogicalType::BOOLEAN); + } else if (value.find_first_not_of("0123456789") == string::npos) { + return_types.push_back(LogicalType::INTEGER); } else if (value.find_first_not_of("0123456789.+-eE") == string::npos) { return_types.push_back(LogicalType::DOUBLE); } else { diff --git a/src/gsheets_requests.cpp b/src/gsheets_requests.cpp index 2ac154a..e5e0ce1 100644 --- a/src/gsheets_requests.cpp +++ b/src/gsheets_requests.cpp @@ -98,7 +98,7 @@ namespace duckdb if (method == HttpMethod::POST) { path += ":append"; - path += "?valueInputOption=RAW"; + path += "?valueInputOption=USER_ENTERED"; } return perform_https_request(host, path, token, method, body); diff --git a/test/sql/types.test b/test/sql/types.test new file mode 100644 index 0000000..f1e1337 --- /dev/null +++ b/test/sql/types.test @@ -0,0 +1,22 @@ +# name: test/sql/types.test +# description: test types +# group: [gsheets] + +require-env TOKEN + +require gsheets + +# Create a secret NB must substitute a token, do not commit! +statement ok +create secret test_secret (type gsheet, token '${TOKEN}'); + +# Read a Google Sheet with different types +query IIIIII +from 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=12288084#gid=12288084'; +---- +Text Integers Decimal Precision Boolean Date/Time Error +SELECT * FROM users; 1 0.0 TRUE 9/26/2016 0:00:00 #DIV/0! +"🦆" 456789876 -6152000.452 TRUE 9/26/2017 0:00:00 #NAME? +DROP TABLE users; 123 1.123 FALSE 9/26/2017 12:00:00 #N/A +"SELECT * FROM tables WHERE name = 'DuckDB'" -129812490 119812123.0 FALSE 9/26/0100 0:00:00 #DIV/0! +Hello DuckDB 56 0.0 FALSE 10/28/2024 9:17:23 #REF! From 216ba12e934b82eef251434a89335707cfe70f42 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Tue, 29 Oct 2024 02:19:50 -0400 Subject: [PATCH 2/4] fix types result --- test/sql/types.test | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/sql/types.test b/test/sql/types.test index f1e1337..a94ed0c 100644 --- a/test/sql/types.test +++ b/test/sql/types.test @@ -14,9 +14,8 @@ create secret test_secret (type gsheet, token '${TOKEN}'); query IIIIII from 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=12288084#gid=12288084'; ---- -Text Integers Decimal Precision Boolean Date/Time Error SELECT * FROM users; 1 0.0 TRUE 9/26/2016 0:00:00 #DIV/0! -"🦆" 456789876 -6152000.452 TRUE 9/26/2017 0:00:00 #NAME? +🦆 456789876 -6152000.452 TRUE 9/26/2017 0:00:00 #NAME? DROP TABLE users; 123 1.123 FALSE 9/26/2017 12:00:00 #N/A -"SELECT * FROM tables WHERE name = 'DuckDB'" -129812490 119812123.0 FALSE 9/26/0100 0:00:00 #DIV/0! -Hello DuckDB 56 0.0 FALSE 10/28/2024 9:17:23 #REF! +SELECT * FROM tables WHERE name = 'DuckDB' -129812490 119812123.0 FALSE 9/26/0100 0:00:00 #DIV/0! +Hello DuckDB 56 0.0 FALSE 10/28/2024 9:17:23 #REF! \ No newline at end of file From b1b7497f16fc4aaaa97a1442533289a6b3c9edf5 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Tue, 29 Oct 2024 02:34:05 -0400 Subject: [PATCH 3/4] various integer types --- src/gsheets_copy.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/gsheets_copy.cpp b/src/gsheets_copy.cpp index 1eda39a..5a6ed7f 100644 --- a/src/gsheets_copy.cpp +++ b/src/gsheets_copy.cpp @@ -110,9 +110,27 @@ namespace duckdb case LogicalTypeId::INTEGER: row.push_back(to_string(FlatVector::GetData(col)[r])); break; + case LogicalTypeId::TINYINT: + row.push_back(to_string(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::SMALLINT: + row.push_back(to_string(FlatVector::GetData(col)[r])); + break; case LogicalTypeId::BIGINT: row.push_back(to_string(FlatVector::GetData(col)[r])); break; + case LogicalTypeId::UTINYINT: + row.push_back(to_string(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::USMALLINT: + row.push_back(to_string(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::UINTEGER: + row.push_back(to_string(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::UBIGINT: + row.push_back(to_string(FlatVector::GetData(col)[r])); + break; case LogicalTypeId::DOUBLE: row.push_back(to_string(FlatVector::GetData(col)[r])); break; @@ -120,7 +138,7 @@ namespace duckdb row.push_back(FlatVector::GetData(col)[r] ? "TRUE" : "FALSE"); break; default: - row.push_back("Type not implemented"); + row.push_back("Type " + col.GetType().ToString() + " not implemented"); break; } } From 7badf16d24734289ea0d03548688b05dbe4f09c6 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Tue, 29 Oct 2024 20:18:25 -0400 Subject: [PATCH 4/4] timestamps --- src/gsheets_copy.cpp | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/gsheets_copy.cpp b/src/gsheets_copy.cpp index 5a6ed7f..edb77ec 100644 --- a/src/gsheets_copy.cpp +++ b/src/gsheets_copy.cpp @@ -96,6 +96,40 @@ namespace duckdb vector> values; values.push_back(headers); + // TODO: This is a bit of a hack to get the types to work. + // BOOLEAN + // TINYINT + // SMALLINT + // INTEGER + // BIGINT + // DATE + // TIME + // TIMESTAMP_SEC + // TIMESTAMP_MS + // TIMESTAMP + // TIMESTAMP_NS + // DECIMAL + // FLOAT + // DOUBLE + // CHAR + // VARCHAR + // BLOB + // INTERVAL + // UTINYINT + // USMALLINT + // UINTEGER + // UBIGINT + // TIMESTAMP_TZ + // TIME_TZ + // BIT + // STRING_LITERAL + // INTEGER_LITERAL + // VARINT + // UHUGEINT + // HUGEINT + // POINTER + // VALIDITY + // UUID for (idx_t r = 0; r < input.size(); r++) { @@ -137,6 +171,21 @@ namespace duckdb case LogicalTypeId::BOOLEAN: row.push_back(FlatVector::GetData(col)[r] ? "TRUE" : "FALSE"); break; + case LogicalTypeId::TIMESTAMP: + row.push_back(Timestamp::ToString(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::TIMESTAMP_SEC: + row.push_back(Timestamp::ToString(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::TIMESTAMP_MS: + row.push_back(Timestamp::ToString(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::TIMESTAMP_NS: + row.push_back(Timestamp::ToString(FlatVector::GetData(col)[r])); + break; + case LogicalTypeId::TIMESTAMP_TZ: + row.push_back(Timestamp::ToString(FlatVector::GetData(col)[r])); + break; default: row.push_back("Type " + col.GetType().ToString() + " not implemented"); break;