Skip to content

Commit 65f88c6

Browse files
authored
Fix conversion bug when string data comes from binlog (#1571)
* Fix conversion bug when string data comes from binlog Fixes #1568 When using gh-ost to migrate a table from latin1 to utf8mb3 character encoding, the initial data copy works correctly, but new data with special characters inserted during the migration via binlog replication fails with "Incorrect string value" errors. The reason for this is that the data is a binary byte array when converted from the binlog, so the character set conversion is not applied. This fix updates the character set conversion logic to apply to both string and []uint8 types when the column has a character set conversion. I added a new test for latin1 input to this method and confirmed that the reproduction from the linked issue is fixed with this change. * Update whitespace formatting
1 parent 70b65bd commit 65f88c6

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

go/sql/types.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,22 @@ type Column struct {
5353
}
5454

5555
func (this *Column) convertArg(arg interface{}, isUniqueKeyColumn bool) interface{} {
56+
var arg2Bytes []byte
5657
if s, ok := arg.(string); ok {
57-
arg2Bytes := []byte(s)
58-
// convert to bytes if character string without charsetConversion.
58+
arg2Bytes = []byte(s)
59+
} else if b, ok := arg.([]uint8); ok {
60+
arg2Bytes = b
61+
} else {
62+
arg2Bytes = nil
63+
}
64+
65+
if arg2Bytes != nil {
5966
if this.Charset != "" && this.charsetConversion == nil {
6067
arg = arg2Bytes
6168
} else {
6269
if encoding, ok := charsetEncodingMap[this.Charset]; ok {
63-
arg, _ = encoding.NewDecoder().String(s)
70+
decodedBytes, _ := encoding.NewDecoder().Bytes(arg2Bytes)
71+
arg = string(decodedBytes)
6472
}
6573
}
6674

go/sql/types_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,19 @@ func TestBinaryToString(t *testing.T) {
4949

5050
require.Equal(t, "1b99", cv.StringColumn(0))
5151
}
52+
53+
func TestConvertArgCharsetDecoding(t *testing.T) {
54+
latin1Bytes := []uint8{0x47, 0x61, 0x72, 0xe7, 0x6f, 0x6e, 0x20, 0x21}
55+
56+
col := Column{
57+
Charset: "latin1",
58+
charsetConversion: &CharacterSetConversion{
59+
FromCharset: "latin1",
60+
ToCharset: "utf8mb4",
61+
},
62+
}
63+
64+
// Should decode []uint8
65+
str := col.convertArg(latin1Bytes, false)
66+
require.Equal(t, "Garçon !", str)
67+
}

0 commit comments

Comments
 (0)