/* -*- C -*- * * Copyright (c) 1998 Jochen Wiedmann. All rights reserved. * This program is free software; you can redistribute it and/or * modify it under the same terms as Perl itself. * * **************************************************************************/ #include #include #include #include "ppport.h" #define CSV_XS_TYPE_PV 0 #define CSV_XS_TYPE_IV 1 #define CSV_XS_TYPE_NV 2 #define CSV_XS_SELF \ if (!self || !SvOK(self) || !SvROK(self) \ || SvTYPE(SvRV(self)) != SVt_PVHV) { \ croak("self is not a hash ref"); \ } \ hv = (HV*) SvRV(self); typedef struct { HV* self; unsigned char quoteChar; unsigned char escapeChar; unsigned char sepChar; int binary; int alwaysQuote; char buffer[1024]; STRLEN used; STRLEN size; char* bptr; int useIO; SV* tmp; char* types; STRLEN types_len; } csv_t; static void SetupCsv(csv_t* csv, HV* self) { SV** svp; STRLEN len; char* ptr; csv->quoteChar = '"'; if ((svp = hv_fetch(self, "quote_char", 10, 0)) && *svp) { if (!SvOK(*svp)) { csv->quoteChar = '\0'; } else { ptr = SvPV(*svp, len); csv->quoteChar = len ? *ptr : '\0'; } } csv->escapeChar = '"'; if ((svp = hv_fetch(self, "escape_char", 11, 0)) && *svp) { if (!SvOK(*svp)) { csv->escapeChar = '\0'; } else { ptr = SvPV(*svp, len); csv->escapeChar = len ? *ptr : '\0'; } } csv->sepChar = ','; if ((svp = hv_fetch(self, "sep_char", 8, 0)) && *svp && SvOK(*svp)) { ptr = SvPV(*svp, len); if (len) { csv->sepChar = *ptr; } } csv->types = NULL; if ((svp = hv_fetch(self, "_types", 6, 0)) && *svp && SvOK(*svp)) { STRLEN len; csv->types = SvPV(*svp, len); csv->types_len = len; } csv->binary = 0; if ((svp = hv_fetch(self, "binary", 6, 0)) && *svp) { csv->binary = SvTRUE(*svp); } csv->alwaysQuote = 0; if ((svp = hv_fetch(self, "always_quote", 12, 0)) && *svp) { csv->alwaysQuote = SvTRUE(*svp); } csv->self = self; csv->used = 0; } static int Print(csv_t* csv, SV* dst) { int result; if (csv->useIO) { SV* tmp = newSVpv(csv->buffer, csv->used); dSP; PUSHMARK(sp); EXTEND(sp, 2); PUSHs((dst)); PUSHs(tmp); PUTBACK; result = perl_call_method("print", G_SCALAR); SPAGAIN; if (result) { result = POPi; } PUTBACK; SvREFCNT_dec(tmp); } else { sv_catpvn(SvRV(dst), csv->buffer, csv->used); result = TRUE; } csv->used = 0; return result; } #define CSV_PUT(csv, dst, c) \ if ((csv)->used == sizeof((csv)->buffer)-1) { \ Print((csv), (dst)); \ } \ (csv)->buffer[(csv)->used++] = (c); static int Encode(csv_t* csv, SV* dst, AV* fields, SV* eol) { int i; for (i = 0; i <= av_len(fields); i++) { SV** svp; if (i > 0) { CSV_PUT(csv, dst, csv->sepChar); } if ((svp = av_fetch(fields, i, 0)) && *svp && SvOK(*svp)) { STRLEN len; char* ptr = SvPV(*svp, len); int quoteMe = csv->alwaysQuote; /* * Do we need quoting? We do quote, if the user requested * (alwaysQuote), if binary or blank characters are found * and if the string contains quote or escape characters. */ if (!quoteMe && (quoteMe = (!SvIOK(*svp) && !SvNOK(*svp) && csv->quoteChar))) { char* ptr2, *ptr3; STRLEN l; for (ptr2 = ptr, l = len; l; ++ptr2, --l) { unsigned char c = *ptr2; if (c <= 0x20 || (c >= 0x7f && c <= 0xa0) || (csv->quoteChar && c == csv->quoteChar) || (csv->sepChar && c == csv->sepChar) || (csv->escapeChar && c == csv->escapeChar)) { /* Binary character */ break; } } quoteMe = (l>0); } if (quoteMe) { CSV_PUT(csv, dst, csv->quoteChar); } while (len-- > 0) { char c = *ptr++; int e = 0; if (!csv->binary && (c != '\t' && (c < '\040' || c > '\176'))) { SvREFCNT_inc(*svp); if (!hv_store(csv->self, "_ERROR_INPUT", 12, *svp, 0)) { SvREFCNT_dec(*svp); } return FALSE; } if (csv->quoteChar && c == csv->quoteChar) { e = 1; } else if (csv->escapeChar && c == csv->escapeChar) { e = 1; } else if (c == '\0') { e = 1; c = '0'; } if (e && csv->escapeChar) { CSV_PUT(csv, dst, csv->escapeChar); } CSV_PUT(csv, dst, c); } if (quoteMe) { CSV_PUT(csv, dst, csv->quoteChar); } } } if (eol && SvOK(eol)) { STRLEN len; char* ptr = SvPV(eol, len); while (len--) { CSV_PUT(csv, dst, *ptr++); } } if (csv->used) { Print(csv, dst); } return TRUE; } static void DecodeError(csv_t* csv) { if(csv->tmp) { if (hv_store(csv->self, "_ERROR_INPUT", 12, csv->tmp, 0)) { SvREFCNT_inc(csv->tmp); } } } static int CsvGet(csv_t* csv, SV* src) { if (!csv->useIO) { return EOF; } { int result; dSP; PUSHMARK(sp); EXTEND(sp, 1); PUSHs(src); PUTBACK; result = perl_call_method("getline", G_SCALAR); SPAGAIN; if (result) { csv->tmp = POPs; } else { csv->tmp = NULL; } PUTBACK; } if (csv->tmp && SvOK(csv->tmp)) { csv->bptr = SvPV(csv->tmp, csv->size); csv->used = 0; if (csv->size) { return ((unsigned char) csv->bptr[csv->used++]); } } return EOF; } #define ERROR_INSIDE_QUOTES \ SvREFCNT_dec(insideQuotes); \ DecodeError(csv); \ return FALSE; #define ERROR_INSIDE_FIELD \ SvREFCNT_dec(insideField); \ DecodeError(csv); \ return FALSE; #define CSV_PUT_SV(sv, c) \ len = SvCUR((sv)); \ SvGROW((sv), len+2); \ *SvEND((sv)) = c; \ SvCUR_set((sv), len+1) #define CSV_GET \ ((c_ungetc != EOF) ? c_ungetc : \ ((csv->used < csv->size) ? \ ((unsigned char) csv->bptr[(csv)->used++]) : CsvGet(csv, src))) #define AV_PUSH(fields, sv) \ *SvEND(sv) = '\0'; \ av_push(fields, sv); static int Decode(csv_t* csv, SV* src, AV* fields) { int c; int c_ungetc = EOF; int waitingForField = 1; SV* insideQuotes = NULL; SV* insideField = NULL; STRLEN len; int seenSomething = FALSE; while ((c = CSV_GET) != EOF) { seenSomething = TRUE; restart: if (c == csv->sepChar) { if (waitingForField) { av_push(fields, newSVpv("", 0)); } else if (insideQuotes) { CSV_PUT_SV(insideQuotes, c); } else { AV_PUSH(fields, insideField); insideField = NULL; waitingForField = 1; } } else if (c == '\012') { if (waitingForField) { av_push(fields, newSVpv("", 0)); return TRUE; } else if (insideQuotes) { if (!csv->binary) { ERROR_INSIDE_QUOTES; } CSV_PUT_SV(insideQuotes, c); } else { AV_PUSH(fields, insideField); return TRUE; } } else if (c == '\015') { if (waitingForField) { int c2 = CSV_GET; if (c2 == EOF) { insideField = newSVpv("", 0); waitingForField = 0; goto restart; } else if (c2 == '\012') { c = '\012'; goto restart; } else { c_ungetc = c2; insideField = newSVpv("", 0); waitingForField = 0; goto restart; } } else if (insideQuotes) { if (!csv->binary) { ERROR_INSIDE_QUOTES; } CSV_PUT_SV(insideQuotes, c); } else { int c2 = CSV_GET; if (c2 == '\012') { AV_PUSH(fields, insideField); return TRUE; } else { ERROR_INSIDE_FIELD; } } } else if (c == csv->quoteChar) { if (waitingForField) { insideQuotes = newSVpv("", 0); waitingForField = 0; } else if (insideQuotes) { int c2; if (!csv->escapeChar || c != csv->escapeChar) { /* Field is terminated */ AV_PUSH(fields, insideQuotes); insideQuotes = NULL; waitingForField = 1; c2 = CSV_GET; if (c2 == csv->sepChar) { continue; } else if (c2 == EOF) { return TRUE; } else if (c2 == '\015') { int c3 = CSV_GET; if (c3 == '\012') { return TRUE; } DecodeError(csv); return FALSE; } else if (c2 == '\012') { return TRUE; } else { DecodeError(csv); return FALSE; } } c2 = CSV_GET; if (c2 == EOF) { AV_PUSH(fields, insideQuotes); return TRUE; } else if (c2 == csv->sepChar) { AV_PUSH(fields, insideQuotes); insideQuotes = NULL; waitingForField = 1; } else if (c2 == '0') { CSV_PUT_SV(insideQuotes, (int) '\0'); } else if (c2 == csv->quoteChar || c2 == csv->sepChar) { CSV_PUT_SV(insideQuotes, c2); } else if (c2 == '\012') { AV_PUSH(fields, insideQuotes); return TRUE; } else if (c2 == '\015') { int c3 = CSV_GET; if (c3 == '\012') { AV_PUSH(fields, insideQuotes); return TRUE; } ERROR_INSIDE_QUOTES; } else { ERROR_INSIDE_QUOTES; } } else if (csv->quoteChar && csv->quoteChar != csv->escapeChar) { if (!csv->binary && (c != '\011' && (c < '\040' || c > '\176'))) { ERROR_INSIDE_FIELD; } CSV_PUT_SV(insideField, c); } else { ERROR_INSIDE_FIELD; } } else if (csv->escapeChar && c == csv->escapeChar) { /* This means quoteChar != escapeChar */ if (waitingForField) { insideField = newSVpv("", 0); waitingForField = 0; } else if (insideQuotes) { int c2 = CSV_GET; if (c2 == EOF) { ERROR_INSIDE_QUOTES; } else if (c2 == '0') { CSV_PUT_SV(insideQuotes, (int) '\0'); } else if (c2 == csv->quoteChar || c2 == csv->sepChar || c2 == csv->escapeChar) { /* c2 == csv->escapeChar added 28-06-1999, * Pavel Kotala */ CSV_PUT_SV(insideQuotes, c2); } else { ERROR_INSIDE_QUOTES; } } else if (insideField) { int c2 = CSV_GET; if (c2 == EOF) { ERROR_INSIDE_FIELD; } else { CSV_PUT_SV(insideField, c2); } } else { ERROR_INSIDE_FIELD; } } else { if (waitingForField) { insideField = newSVpv("", 0); waitingForField = 0; goto restart; } else if (insideQuotes) { if (!csv->binary && (c != '\011' && (c < '\040' || c > '\176'))) { ERROR_INSIDE_QUOTES; } CSV_PUT_SV(insideQuotes, c); } else { if (!csv->binary && (c != '\011' && (c < '\040' || c > '\176'))) { ERROR_INSIDE_FIELD; } CSV_PUT_SV(insideField, c); } } } if (waitingForField) { if (seenSomething) { av_push(fields, newSVpv("", 0)); } } else if (insideQuotes) { ERROR_INSIDE_QUOTES; } else if (insideField) { AV_PUSH(fields, insideField); } return TRUE; } static int xsDecode(HV* hv, AV* av, SV* src, bool useIO) { csv_t csv; int result; SetupCsv(&csv, hv); if ((csv.useIO = useIO)) { csv.tmp = NULL; csv.size = 0; } else { STRLEN size; csv.tmp = src; csv.bptr = SvPV(src, size); csv.size = size; } result = Decode(&csv, src, av); if (result && csv.types) { I32 i, len = av_len(av); SV** svp; for (i = 0; i <= len && i <= csv.types_len; i++) { if ((svp = av_fetch(av, i, 0)) && *svp && SvOK(*svp)) { switch (csv.types[i]) { case CSV_XS_TYPE_IV: sv_setiv(*svp, SvIV(*svp)); break; case CSV_XS_TYPE_NV: sv_setnv(*svp, SvIV(*svp)); break; } } } } return result; } static int xsEncode(HV* hv, AV* av, SV* io, bool useIO, SV* eol) { csv_t csv; SetupCsv(&csv, hv); csv.useIO = useIO; return Encode(&csv, io, av, eol); } MODULE = Text::CSV_XS PACKAGE = Text::CSV_XS PROTOTYPES: ENABLE SV* Encode(self, dst, fields, useIO, eol) SV* self SV* dst SV* fields bool useIO SV* eol PROTOTYPE: $$$$ PPCODE: { HV* hv; AV* av; CSV_XS_SELF; if (!fields || !SvOK(fields) || !SvROK(fields) || SvTYPE(SvRV(fields)) != SVt_PVAV) { croak("fields is not an array ref"); } else { av = (AV*) SvRV(fields); } ST(0) = xsEncode(hv, av, dst, useIO, eol) ? &PL_sv_yes : &PL_sv_undef; XSRETURN(1); } SV* Decode(self, src, fields, useIO) SV* self SV* src SV* fields bool useIO PROTOTYPE: $$$$ PPCODE: { HV* hv; AV* av; int result; CSV_XS_SELF; if (!fields || !SvOK(fields) || !SvROK(fields) || SvTYPE(SvRV(fields)) != SVt_PVAV) { croak("fields is not an array ref"); } else { av = (AV*) SvRV(fields); } ST(0) = xsDecode(hv, av, src, useIO) ? &PL_sv_yes : &PL_sv_no; XSRETURN(1); } void print(self, io, fields) SV* self SV* io SV* fields PROTOTYPE: $$$ PPCODE: { HV* hv; AV* av; SV* eol; SV** svp; CSV_XS_SELF; if (!fields || !SvOK(fields) || !SvROK(fields) || SvTYPE(SvRV(fields)) != SVt_PVAV) { croak("Expected fields to be an array ref"); } av = (AV*) SvRV(fields); if ((svp = hv_fetch(hv, "eol", 3, FALSE))) { eol = *svp; } else { eol = &PL_sv_undef; } ST(0) = xsEncode(hv, av, io, 1, eol) ? &PL_sv_yes : &PL_sv_no; XSRETURN(1); } void getline(self, io) SV* self SV* io PROTOTYPE: $;$ PPCODE: { HV* hv; AV* av; SV* rv; CSV_XS_SELF; hv_delete(hv, "_ERROR_INPUT", 12, G_DISCARD); av = newAV(); ST(0) = xsDecode(hv, av, io, 1) ? sv_2mortal(newRV_noinc((SV*) av)) : &PL_sv_undef; XSRETURN(1); }