Skip to content

Commit 11ed9c9

Browse files
committed
Fixed GC bug when escaping strings
1 parent a406d56 commit 11ed9c9

File tree

3 files changed

+101
-93
lines changed

3 files changed

+101
-93
lines changed

lib/postgresql.ml

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -518,17 +518,8 @@ end
518518

519519
(* Escaping *)
520520

521-
external unescape_bytea_stub : string -> string = "PQunescapeBytea_stub"
522-
external unescape_bytea_9x_stub : string -> string = "PQunescapeBytea9x_stub"
523-
524-
let unescape_bytea str =
525-
let str_len = String.length str in
526-
if str_len < 2 || str.[0] <> '\\' || str.[1] <> 'x' then
527-
unescape_bytea_stub str
528-
else
529-
(* This is the new Postgresql 9.0 hex format for encoding bytea. Older
530-
clients do not recognize this format, but servers send it by default. *)
531-
unescape_bytea_9x_stub str
521+
external unescape_bytea : string -> string = "PQunescapeBytea_stub"
522+
532523

533524
(* Query results *)
534525

lib/postgresql.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ exception Oid of oid
173173

174174
(** {6 Utility functions}*)
175175

176-
val unescape_bytea : string -> string
176+
external unescape_bytea : string -> string = "PQunescapeBytea_stub"
177177
(** [unescape_bytea str] unescapes binary string [str]. This function
178178
supports the new hex format for encoding bytea strings (introduced
179179
in Postgresql 9.0) even if the local libpq library is from an

lib/postgresql_stubs.c

Lines changed: 98 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
#include <string.h>
4545
#include <ctype.h>
46+
#include <stdbool.h>
4647

4748
#include <caml/mlvalues.h>
4849
#include <caml/alloc.h>
@@ -142,9 +143,6 @@ CAMLprim value PQocaml_init(value __unused v_unit)
142143
return Val_unit;
143144
}
144145

145-
static inline value unescape_bytea_9x(const char *s);
146-
static inline value unescape_bytea(const char *s);
147-
148146

149147
/* Conversion functions */
150148

@@ -689,6 +687,83 @@ CAMLprim value PQgetvalue_stub(value v_res, value v_tup_num, value v_field_num)
689687
CAMLreturn(v_str);
690688
}
691689

690+
691+
/* Unescaping - auxiliary routines */
692+
693+
static inline bool is_bytea_hex_protocol(const char * str)
694+
{
695+
return (str[0] == '\\' && str[1] == 'x');
696+
}
697+
698+
static inline int is_hex_digit(char c)
699+
{
700+
return (
701+
(c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
702+
}
703+
704+
static inline void raise_invalid_hex_encoding()
705+
{
706+
caml_failwith("Postgresql: invalid hex encoding");
707+
}
708+
709+
static size_t bytea_hex_pairs(const char *str)
710+
{
711+
size_t n_hex_pairs = 0;
712+
713+
/* Length calculation and encoding verification */
714+
while (*str != '\0') {
715+
if (isspace(*str)) str++;
716+
else if (is_hex_digit(*str)) {
717+
str++;
718+
if (is_hex_digit(*str)) { str++; n_hex_pairs++; }
719+
else raise_invalid_hex_encoding();
720+
}
721+
else raise_invalid_hex_encoding();
722+
}
723+
724+
return n_hex_pairs;
725+
}
726+
727+
static value unescape_bytea(const char *str)
728+
{
729+
/* Old protocol */
730+
size_t res_len;
731+
char *buf = (char *) PQunescapeBytea((unsigned char*) str, &res_len);
732+
if (buf == NULL) caml_failwith("Postgresql: illegal bytea string");
733+
else {
734+
value v_res = caml_alloc_string(res_len);
735+
memcpy(String_val(v_res), buf, res_len);
736+
PQfreemem(buf);
737+
return v_res;
738+
}
739+
}
740+
741+
static inline int unhexdigit(char c)
742+
{
743+
if (c >= '0' && c <= '9') return (c - '0');
744+
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
745+
else if (c >= 'A' && c <= 'F') return (c - 'A' + 10);
746+
else
747+
/* This should never happen at this point */
748+
caml_failwith("Postgresql: internal error in unhexdigit");
749+
}
750+
751+
static void decode_bytea_hex(const char *src, char *dst, size_t dst_len)
752+
{
753+
char *end = dst + dst_len;
754+
while (dst < end) {
755+
if (isspace(*src)) src++;
756+
else {
757+
*dst = (char) ((unhexdigit(*src) << 4) | unhexdigit(src[1]));
758+
src += 2;
759+
dst++;
760+
}
761+
}
762+
}
763+
764+
765+
/* */
766+
692767
CAMLprim value PQgetescval_stub(value v_res, value v_tup_num, value v_field_num)
693768
{
694769
CAMLparam1(v_res);
@@ -698,9 +773,15 @@ CAMLprim value PQgetescval_stub(value v_res, value v_tup_num, value v_field_num)
698773
size_t tup_num = Long_val(v_tup_num);
699774
char *str = PQgetvalue(res, tup_num, field_num);
700775
if (PQfformat(res, field_num) == 0) {
701-
if (str != NULL && str[0] == '\\' && str[1] == 'x')
702-
v_str = unescape_bytea_9x(str + 2);
703-
else v_str = unescape_bytea(str);
776+
if (str == NULL || strlen(str) < 2 || !is_bytea_hex_protocol(str))
777+
CAMLreturn(unescape_bytea(str));
778+
else {
779+
size_t n_hex_pairs;
780+
str += 2;
781+
n_hex_pairs = bytea_hex_pairs(str);
782+
v_str = caml_alloc_string(n_hex_pairs);
783+
decode_bytea_hex(str, String_val(v_str), n_hex_pairs);
784+
}
704785
} else {
705786
/* Assume binary format! */
706787
size_t len = PQgetlength(res, tup_num, field_num);
@@ -901,85 +982,21 @@ CAMLprim value PQescapeByteaConn_stub(
901982
return v_res;
902983
}
903984

904-
static inline value unescape_bytea(const char *s)
905-
{
906-
size_t len;
907-
value v_res;
908-
char *buf = (char *) PQunescapeBytea((unsigned char*) s, &len);
909-
if (buf == NULL) {
910-
caml_failwith("Postgresql.unescape_bytea: illegal bytea string");
911-
return Val_unit;
912-
}
913-
v_res = caml_alloc_string(len);
914-
memcpy(String_val(v_res), buf, len);
915-
PQfreemem(buf);
916-
return v_res;
917-
}
985+
/* Unescaping */
918986

919987
CAMLprim value PQunescapeBytea_stub(value v_from)
920988
{
921-
return unescape_bytea(String_val(v_from));
922-
}
923-
924-
static inline value raise_invalid_hex_encoding()
925-
{
926-
caml_failwith("Postgresql.unescape_bytea_9x: invalid hex encoding");
927-
return Val_unit;
928-
}
929-
930-
static inline int unhexdigit(char c)
931-
{
932-
if (c >= '0' && c <= '9') return (c - '0');
933-
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
934-
else if (c >= 'A' && c <= 'F') return (c - 'A' + 10);
935-
else return raise_invalid_hex_encoding();
936-
}
937-
938-
static inline int is_hex_digit(char c)
939-
{
940-
return (
941-
(c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
942-
}
943-
944-
static inline value unescape_bytea_9x(const char *str)
945-
{
946-
value v_res;
947-
char *res;
948-
size_t n_hex_pairs = 0;
949-
const char *end = str;
950-
951-
/* Length calculation and encoding verification */
952-
while (*end != '\0') {
953-
if (isspace(*end)) end++;
954-
else if (is_hex_digit(*end)) {
955-
end++;
956-
if (is_hex_digit(*end)) { end++; n_hex_pairs++; }
957-
else return raise_invalid_hex_encoding();
958-
}
959-
else return raise_invalid_hex_encoding();
960-
}
961-
962-
/* Assumption: string has not changed since length calculation above! */
963-
v_res = caml_alloc_string(n_hex_pairs);
964-
res = String_val(v_res);
965-
while (str < end) {
966-
if (isspace(*str)) str++;
967-
else {
968-
*res = (char) ((unhexdigit(*str) << 4) | unhexdigit(str[1]));
969-
str += 2;
970-
res++;
971-
}
972-
}
973-
return v_res;
974-
}
975-
976-
CAMLprim value PQunescapeBytea9x_stub(value v_from)
977-
{
978-
const char *s = String_val(v_from);
979-
if (s != NULL && s[0] == '\\' && s[1] == 'x') return unescape_bytea_9x(s + 2);
989+
const char *from = String_val(v_from);
990+
size_t from_len = caml_string_length(v_from);
991+
if (from_len < 2 || !is_bytea_hex_protocol(from)) return unescape_bytea(from);
980992
else {
981-
caml_failwith("Postgresql.unescape_bytea_9x: hex prefix not found");
982-
return Val_unit;
993+
/* New protocol */
994+
size_t res_len = bytea_hex_pairs(from + 2);
995+
CAMLparam1(v_from);
996+
value v_res = caml_alloc_string(res_len);
997+
/* GC may have happened, have to use String_val on v_from again */
998+
decode_bytea_hex(String_val(v_from) + 2, String_val(v_res), res_len);
999+
CAMLreturn(v_res);
9831000
}
9841001
}
9851002

0 commit comments

Comments
 (0)