Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP types #33

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 68 additions & 1 deletion src/gsheets_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,40 @@ namespace duckdb

vector<vector<string>> values;
values.push_back(headers);
// TODO: This is a bit of a hack to get the types to work.
// BOOLEAN
// TINYINT
// SMALLINT
// INTEGER
// BIGINT
// DATE
// TIME
// TIMESTAMP_SEC
// TIMESTAMP_MS
// TIMESTAMP
// TIMESTAMP_NS
// DECIMAL
// FLOAT
// DOUBLE
// CHAR
// VARCHAR
// BLOB
// INTERVAL
// UTINYINT
// USMALLINT
// UINTEGER
// UBIGINT
// TIMESTAMP_TZ
// TIME_TZ
// BIT
// STRING_LITERAL
// INTEGER_LITERAL
// VARINT
// UHUGEINT
// HUGEINT
// POINTER
// VALIDITY
// UUID

for (idx_t r = 0; r < input.size(); r++)
{
Expand All @@ -110,17 +144,50 @@ namespace duckdb
case LogicalTypeId::INTEGER:
row.push_back(to_string(FlatVector::GetData<int32_t>(col)[r]));
break;
case LogicalTypeId::TINYINT:
row.push_back(to_string(FlatVector::GetData<int8_t>(col)[r]));
break;
case LogicalTypeId::SMALLINT:
row.push_back(to_string(FlatVector::GetData<int16_t>(col)[r]));
break;
case LogicalTypeId::BIGINT:
row.push_back(to_string(FlatVector::GetData<int64_t>(col)[r]));
break;
case LogicalTypeId::UTINYINT:
row.push_back(to_string(FlatVector::GetData<uint8_t>(col)[r]));
break;
case LogicalTypeId::USMALLINT:
row.push_back(to_string(FlatVector::GetData<uint16_t>(col)[r]));
break;
case LogicalTypeId::UINTEGER:
row.push_back(to_string(FlatVector::GetData<uint32_t>(col)[r]));
break;
case LogicalTypeId::UBIGINT:
row.push_back(to_string(FlatVector::GetData<uint64_t>(col)[r]));
break;
case LogicalTypeId::DOUBLE:
row.push_back(to_string(FlatVector::GetData<double>(col)[r]));
break;
case LogicalTypeId::BOOLEAN:
row.push_back(FlatVector::GetData<bool>(col)[r] ? "TRUE" : "FALSE");
break;
case LogicalTypeId::TIMESTAMP:
row.push_back(Timestamp::ToString(FlatVector::GetData<timestamp_t>(col)[r]));
break;
case LogicalTypeId::TIMESTAMP_SEC:
row.push_back(Timestamp::ToString(FlatVector::GetData<timestamp_sec_t>(col)[r]));
break;
case LogicalTypeId::TIMESTAMP_MS:
row.push_back(Timestamp::ToString(FlatVector::GetData<timestamp_ms_t>(col)[r]));
break;
case LogicalTypeId::TIMESTAMP_NS:
row.push_back(Timestamp::ToString(FlatVector::GetData<timestamp_ns_t>(col)[r]));
break;
case LogicalTypeId::TIMESTAMP_TZ:
row.push_back(Timestamp::ToString(FlatVector::GetData<timestamp_tz_t>(col)[r]));
break;
default:
row.push_back("Type not implemented");
row.push_back("Type " + col.GetType().ToString() + " not implemented");
break;
}
}
Expand Down
18 changes: 16 additions & 2 deletions src/gsheets_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "duckdb/main/secret/secret_manager.hpp"
#include "gsheets_requests.hpp"
#include <json.hpp>
#include <algorithm>

namespace duckdb {

Expand Down Expand Up @@ -40,21 +41,32 @@ void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataC
const string& value = first_data_row[col];
if (value == "true" || value == "false") {
column_types[col] = LogicalType::BOOLEAN;
} else if (value.find_first_not_of("0123456789") == string::npos) {
column_types[col] = LogicalType::INTEGER;
} else if (value.find_first_not_of("0123456789.+-eE") == string::npos) {
column_types[col] = LogicalType::DOUBLE;
}
}
}
}

for (idx_t i = start_index; i < sheet_data.values.size() && row_count < STANDARD_VECTOR_SIZE; i++) {
const auto& row = sheet_data.values[i];
for (idx_t col = 0; col < column_count; col++) {
if (col < row.size()) {
const string& value = row[col];
string value = row[col];

// Remove commas from the value for numeric types
if (column_types[col].id() != LogicalTypeId::VARCHAR) {
value.erase(std::remove(value.begin(), value.end(), ','), value.end());
}

switch (column_types[col].id()) {
case LogicalTypeId::BOOLEAN:
output.SetValue(col, row_count, Value::BOOLEAN(value == "true"));
break;
case LogicalTypeId::INTEGER:
output.SetValue(col, row_count, Value::INTEGER(std::stoi(value)));
break;
case LogicalTypeId::DOUBLE:
output.SetValue(col, row_count, Value::DOUBLE(std::stod(value)));
break;
Expand Down Expand Up @@ -147,6 +159,8 @@ unique_ptr<FunctionData> ReadSheetBind(ClientContext &context, TableFunctionBind
const string& value = first_data_row[i];
if (value == "true" || value == "false") {
return_types.push_back(LogicalType::BOOLEAN);
} else if (value.find_first_not_of("0123456789") == string::npos) {
return_types.push_back(LogicalType::INTEGER);
} else if (value.find_first_not_of("0123456789.+-eE") == string::npos) {
return_types.push_back(LogicalType::DOUBLE);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/gsheets_requests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ namespace duckdb

if (method == HttpMethod::POST) {
path += ":append";
path += "?valueInputOption=RAW";
path += "?valueInputOption=USER_ENTERED";
}

return perform_https_request(host, path, token, method, body);
Expand Down
21 changes: 21 additions & 0 deletions test/sql/types.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# name: test/sql/types.test
# description: test types
# group: [gsheets]

require-env TOKEN

require gsheets

# Create a secret NB must substitute a token, do not commit!
statement ok
create secret test_secret (type gsheet, token '${TOKEN}');

# Read a Google Sheet with different types
query IIIIII
from 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=12288084#gid=12288084';
----
SELECT * FROM users; 1 0.0 TRUE 9/26/2016 0:00:00 #DIV/0!
🦆 456789876 -6152000.452 TRUE 9/26/2017 0:00:00 #NAME?
DROP TABLE users; 123 1.123 FALSE 9/26/2017 12:00:00 #N/A
SELECT * FROM tables WHERE name = 'DuckDB' -129812490 119812123.0 FALSE 9/26/0100 0:00:00 #DIV/0!
Hello DuckDB 56 0.0 FALSE 10/28/2024 9:17:23 #REF!
Loading