blob: 46430d4bb8f3262ab1ff0711b71e627e6d064c27 [file] [log] [blame]
rjw6c1fd8f2022-11-30 14:33:01 +08001package Text::CSV_PP;
2
3################################################################################
4#
5# Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
6#
7################################################################################
8require 5.005;
9
10use strict;
11use Exporter ();
12use vars qw($VERSION @ISA @EXPORT_OK);
13use Carp;
14
15$VERSION = '1.95';
16@ISA = qw(Exporter);
17@EXPORT_OK = qw(csv);
18
19sub PV { 0 }
20sub IV { 1 }
21sub NV { 2 }
22
23sub IS_QUOTED () { 0x0001; }
24sub IS_BINARY () { 0x0002; }
25sub IS_ERROR () { 0x0004; }
26sub IS_MISSING () { 0x0010; }
27
28sub HOOK_ERROR () { 0x0001; }
29sub HOOK_AFTER_PARSE () { 0x0002; }
30sub HOOK_BEFORE_PRINT () { 0x0004; }
31
32sub useIO_EOF () { 0x0010; }
33
34my $ERRORS = {
35 # Generic errors
36 1000 => "INI - constructor failed",
37 1001 => "INI - sep_char is equal to quote_char or escape_char",
38 1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB",
39 1003 => "INI - \\r or \\n in main attr not allowed",
40 1004 => "INI - callbacks should be undef or a hashref",
41 1005 => "INI - EOL too long",
42 1006 => "INI - SEP too long",
43 1007 => "INI - QUOTE too long",
44 1008 => "INI - SEP undefined",
45
46 1010 => "INI - the header is empty",
47 1011 => "INI - the header contains more than one valid separator",
48 1012 => "INI - the header contains an empty field",
49 1013 => "INI - the header contains nun-unique fields",
50 1014 => "INI - header called on undefined stream",
51
52 # Syntax errors
53 1500 => "PRM - Invalid/unsupported arguments(s)",
54
55 # Parse errors
56 2010 => "ECR - QUO char inside quotes followed by CR not part of EOL",
57 2011 => "ECR - Characters after end of quoted field",
58 2012 => "EOF - End of data in parsing input stream",
59 2013 => "ESP - Specification error for fragments RFC7111",
60 2014 => "ENF - Inconsistent number of fields",
61
62 # EIQ - Error Inside Quotes
63 2021 => "EIQ - NL char inside quotes, binary off",
64 2022 => "EIQ - CR char inside quotes, binary off",
65 2023 => "EIQ - QUO character not allowed",
66 2024 => "EIQ - EOF cannot be escaped, not even inside quotes",
67 2025 => "EIQ - Loose unescaped escape",
68 2026 => "EIQ - Binary character inside quoted field, binary off",
69 2027 => "EIQ - Quoted field not terminated",
70
71 # EIF - Error Inside Field
72 2030 => "EIF - NL char inside unquoted verbatim, binary off",
73 2031 => "EIF - CR char is first char of field, not part of EOL",
74 2032 => "EIF - CR char inside unquoted, not part of EOL",
75 2034 => "EIF - Loose unescaped quote",
76 2035 => "EIF - Escaped EOF in unquoted field",
77 2036 => "EIF - ESC error",
78 2037 => "EIF - Binary character in unquoted field, binary off",
79
80 # Combine errors
81 2110 => "ECB - Binary character in Combine, binary off",
82
83 # IO errors
84 2200 => "EIO - print to IO failed. See errno",
85
86 # Hash-Ref errors
87 3001 => "EHR - Unsupported syntax for column_names ()",
88 3002 => "EHR - getline_hr () called before column_names ()",
89 3003 => "EHR - bind_columns () and column_names () fields count mismatch",
90 3004 => "EHR - bind_columns () only accepts refs to scalars",
91 3006 => "EHR - bind_columns () did not pass enough refs for parsed fields",
92 3007 => "EHR - bind_columns needs refs to writable scalars",
93 3008 => "EHR - unexpected error in bound fields",
94 3009 => "EHR - print_hr () called before column_names ()",
95 3010 => "EHR - print_hr () called with invalid arguments",
96
97 # PP Only Error
98 4002 => "EIQ - Unescaped ESC in quoted field",
99 4003 => "EIF - ESC CR",
100 4004 => "EUF - Field is terminated by the escape character (escape_char)",
101
102 0 => "",
103};
104
105BEGIN {
106 if ( $] < 5.006 ) {
107 $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy
108 no strict 'refs';
109 *{"utf8::is_utf8"} = sub { 0; };
110 *{"utf8::decode"} = sub { };
111 }
112 elsif ( $] < 5.008 ) {
113 no strict 'refs';
114 *{"utf8::is_utf8"} = sub { 0; };
115 *{"utf8::decode"} = sub { };
116 *{"utf8::encode"} = sub { };
117 }
118 elsif ( !defined &utf8::is_utf8 ) {
119 require Encode;
120 *utf8::is_utf8 = *Encode::is_utf8;
121 }
122
123 eval q| require Scalar::Util |;
124 if ( $@ ) {
125 eval q| require B |;
126 if ( $@ ) {
127 Carp::croak $@;
128 }
129 else {
130 my %tmap = qw(
131 B::NULL SCALAR
132 B::HV HASH
133 B::AV ARRAY
134 B::CV CODE
135 B::IO IO
136 B::GV GLOB
137 B::REGEXP REGEXP
138 );
139 *Scalar::Util::reftype = sub (\$) {
140 my $r = shift;
141 return undef unless length(ref($r));
142 my $t = ref(B::svref_2object($r));
143 return
144 exists $tmap{$t} ? $tmap{$t}
145 : length(ref($$r)) ? 'REF'
146 : 'SCALAR';
147 };
148 *Scalar::Util::readonly = sub (\$) {
149 my $b = B::svref_2object( $_[0] );
150 $b->FLAGS & 0x00800000; # SVf_READONLY?
151 };
152 }
153 }
154}
155
156################################################################################
157#
158# Common pure perl methods, taken almost directly from Text::CSV_XS.
159# (These should be moved into a common class eventually, so that
160# both XS and PP don't need to apply the same changes.)
161#
162################################################################################
163
164################################################################################
165# version
166################################################################################
167
168sub version {
169 return $VERSION;
170}
171
172################################################################################
173# new
174################################################################################
175
176my %def_attr = (
177 eol => '',
178 sep_char => ',',
179 quote_char => '"',
180 escape_char => '"',
181 binary => 0,
182 decode_utf8 => 1,
183 auto_diag => 0,
184 diag_verbose => 0,
185 strict => 0,
186 blank_is_undef => 0,
187 empty_is_undef => 0,
188 allow_whitespace => 0,
189 allow_loose_quotes => 0,
190 allow_loose_escapes => 0,
191 allow_unquoted_escape => 0,
192 always_quote => 0,
193 quote_empty => 0,
194 quote_space => 1,
195 quote_binary => 1,
196 escape_null => 1,
197 keep_meta_info => 0,
198 verbatim => 0,
199 types => undef,
200 callbacks => undef,
201
202 _EOF => 0,
203 _RECNO => 0,
204 _STATUS => undef,
205 _FIELDS => undef,
206 _FFLAGS => undef,
207 _STRING => undef,
208 _ERROR_INPUT => undef,
209 _COLUMN_NAMES => undef,
210 _BOUND_COLUMNS => undef,
211 _AHEAD => undef,
212);
213
214my %attr_alias = (
215 quote_always => "always_quote",
216 verbose_diag => "diag_verbose",
217 quote_null => "escape_null",
218 );
219
220my $last_new_error = Text::CSV_PP->SetDiag(0);
221my $last_error;
222
223# NOT a method: is also used before bless
224sub _unhealthy_whitespace {
225 my $self = shift;
226 $_[0] or return 0; # no checks needed without allow_whitespace
227
228 my $quo = $self->{quote};
229 defined $quo && length ($quo) or $quo = $self->{quote_char};
230 my $esc = $self->{escape_char};
231
232 (defined $quo && $quo =~ m/^[ \t]/) || (defined $esc && $esc =~ m/^[ \t]/) and
233 return 1002;
234
235 return 0;
236 }
237
238sub _check_sanity {
239 my $self = shift;
240
241 my $eol = $self->{eol};
242 my $sep = $self->{sep};
243 defined $sep && length ($sep) or $sep = $self->{sep_char};
244 my $quo = $self->{quote};
245 defined $quo && length ($quo) or $quo = $self->{quote_char};
246 my $esc = $self->{escape_char};
247
248# use DP;::diag ("SEP: '", DPeek ($sep),
249# "', QUO: '", DPeek ($quo),
250# "', ESC: '", DPeek ($esc),"'");
251
252 # sep_char should not be undefined
253 if (defined $sep && $sep ne "") {
254 length ($sep) > 16 and return 1006;
255 $sep =~ m/[\r\n]/ and return 1003;
256 }
257 else {
258 return 1008;
259 }
260 if (defined $quo) {
261 defined $sep && $quo eq $sep and return 1001;
262 length ($quo) > 16 and return 1007;
263 $quo =~ m/[\r\n]/ and return 1003;
264 }
265 if (defined $esc) {
266 defined $sep && $esc eq $sep and return 1001;
267 $esc =~ m/[\r\n]/ and return 1003;
268 }
269 if (defined $eol) {
270 length ($eol) > 16 and return 1005;
271 }
272
273 return _unhealthy_whitespace ($self, $self->{allow_whitespace});
274 }
275
276sub known_attributes {
277 sort grep !m/^_/ => "sep", "quote", keys %def_attr;
278 }
279
280sub new {
281 $last_new_error = Text::CSV_PP->SetDiag(1000,
282 'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);');
283
284 my $proto = shift;
285 my $class = ref ($proto) || $proto or return;
286 @_ > 0 && ref $_[0] ne "HASH" and return;
287 my $attr = shift || {};
288 my %attr = map {
289 my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
290 exists $attr_alias{$k} and $k = $attr_alias{$k};
291 $k => $attr->{$_};
292 } keys %$attr;
293
294 my $sep_aliased = 0;
295 if (exists $attr{sep}) {
296 $attr{sep_char} = delete $attr{sep};
297 $sep_aliased = 1;
298 }
299 my $quote_aliased = 0;
300 if (exists $attr{quote}) {
301 $attr{quote_char} = delete $attr{quote};
302 $quote_aliased = 1;
303 }
304 for (keys %attr) {
305 if (m/^[a-z]/ && exists $def_attr{$_}) {
306 # uncoverable condition false
307 defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
308 next;
309 }
310# croak?
311 $last_new_error = Text::CSV_PP->SetDiag(1000, "INI - Unknown attribute '$_'");
312 $attr{auto_diag} and error_diag ();
313 return;
314 }
315 if ($sep_aliased and defined $attr{sep_char}) {
316 my @b = unpack "U0C*", $attr{sep_char};
317 if (@b > 1) {
318 $attr{sep} = $attr{sep_char};
319 $attr{sep_char} = "\0";
320 }
321 else {
322 $attr{sep} = undef;
323 }
324 }
325 if ($quote_aliased and defined $attr{quote_char}) {
326 my @b = unpack "U0C*", $attr{quote_char};
327 if (@b > 1) {
328 $attr{quote} = $attr{quote_char};
329 $attr{quote_char} = "\0";
330 }
331 else {
332 $attr{quote} = undef;
333 }
334 }
335
336 my $self = { %def_attr, %attr };
337 if (my $ec = _check_sanity ($self)) {
338 $last_new_error = Text::CSV_PP->SetDiag($ec);
339 $attr{auto_diag} and error_diag ();
340 return;
341 }
342 if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
343 Carp::carp "The 'callbacks' attribute is set but is not a hash: ignored\n";
344 $self->{callbacks} = undef;
345 }
346
347 $last_new_error = Text::CSV_PP->SetDiag(0);
348 defined $\ && !exists $attr{eol} and $self->{eol} = $\;
349 bless $self, $class;
350 defined $self->{types} and $self->types ($self->{types});
351 $self;
352}
353
354# Keep in sync with XS!
355my %_cache_id = ( # Only expose what is accessed from within PM
356 quote_char => 0,
357 escape_char => 1,
358 sep_char => 2,
359 sep => 39, # 39 .. 55
360 binary => 3,
361 keep_meta_info => 4,
362 always_quote => 5,
363 allow_loose_quotes => 6,
364 allow_loose_escapes => 7,
365 allow_unquoted_escape => 8,
366 allow_whitespace => 9,
367 blank_is_undef => 10,
368 eol => 11,
369 quote => 15,
370 verbatim => 22,
371 empty_is_undef => 23,
372 auto_diag => 24,
373 diag_verbose => 33,
374 quote_space => 25,
375 quote_empty => 37,
376 quote_binary => 32,
377 escape_null => 31,
378 decode_utf8 => 35,
379 _has_hooks => 36,
380 _is_bound => 26, # 26 .. 29
381 strict => 58,
382 );
383
384my %_hidden_cache_id = qw(
385 sep_len 38
386 eol_len 12
387 eol_is_cr 13
388 quo_len 16
389 _has_ahead 30
390 has_error_input 34
391);
392
393my %_reverse_cache_id = (
394 map({$_cache_id{$_} => $_} keys %_cache_id),
395 map({$_hidden_cache_id{$_} => $_} keys %_hidden_cache_id),
396);
397
398# A `character'
399sub _set_attr_C {
400 my ($self, $name, $val, $ec) = @_;
401 defined $val or $val = 0;
402 utf8::decode ($val);
403 $self->{$name} = $val;
404 $ec = _check_sanity ($self) and
405 croak ($self->SetDiag ($ec));
406 $self->_cache_set ($_cache_id{$name}, $val);
407 }
408
409# A flag
410sub _set_attr_X {
411 my ($self, $name, $val) = @_;
412 defined $val or $val = 0;
413 $self->{$name} = $val;
414 $self->_cache_set ($_cache_id{$name}, 0 + $val);
415 }
416
417# A number
418sub _set_attr_N {
419 my ($self, $name, $val) = @_;
420 $self->{$name} = $val;
421 $self->_cache_set ($_cache_id{$name}, 0 + $val);
422 }
423
424# Accessor methods.
425# It is unwise to change them halfway through a single file!
426sub quote_char {
427 my $self = shift;
428 if (@_) {
429 $self->_set_attr_C ("quote_char", shift);
430 $self->_cache_set ($_cache_id{quote}, "");
431 }
432 $self->{quote_char};
433 }
434
435sub quote {
436 my $self = shift;
437 if (@_) {
438 my $quote = shift;
439 defined $quote or $quote = "";
440 utf8::decode ($quote);
441 my @b = unpack "U0C*", $quote;
442 if (@b > 1) {
443 @b > 16 and croak ($self->SetDiag (1007));
444 $self->quote_char ("\0");
445 }
446 else {
447 $self->quote_char ($quote);
448 $quote = "";
449 }
450 $self->{quote} = $quote;
451
452 my $ec = _check_sanity ($self);
453 $ec and croak ($self->SetDiag ($ec));
454
455 $self->_cache_set ($_cache_id{quote}, $quote);
456 }
457 my $quote = $self->{quote};
458 defined $quote && length ($quote) ? $quote : $self->{quote_char};
459 }
460
461sub escape_char {
462 my $self = shift;
463 @_ and $self->_set_attr_C ("escape_char", shift);
464 $self->{escape_char};
465 }
466
467sub sep_char {
468 my $self = shift;
469 if (@_) {
470 $self->_set_attr_C ("sep_char", shift);
471 $self->_cache_set ($_cache_id{sep}, "");
472 }
473 $self->{sep_char};
474}
475
476sub sep {
477 my $self = shift;
478 if (@_) {
479 my $sep = shift;
480 defined $sep or $sep = "";
481 utf8::decode ($sep);
482 my @b = unpack "U0C*", $sep;
483 if (@b > 1) {
484 @b > 16 and croak ($self->SetDiag (1006));
485 $self->sep_char ("\0");
486 }
487 else {
488 $self->sep_char ($sep);
489 $sep = "";
490 }
491 $self->{sep} = $sep;
492
493 my $ec = _check_sanity ($self);
494 $ec and croak ($self->SetDiag ($ec));
495
496 $self->_cache_set ($_cache_id{sep}, $sep);
497 }
498 my $sep = $self->{sep};
499 defined $sep && length ($sep) ? $sep : $self->{sep_char};
500 }
501
502sub eol {
503 my $self = shift;
504 if (@_) {
505 my $eol = shift;
506 defined $eol or $eol = "";
507 length ($eol) > 16 and croak ($self->SetDiag (1005));
508 $self->{eol} = $eol;
509 $self->_cache_set ($_cache_id{eol}, $eol);
510 }
511 $self->{eol};
512 }
513
514sub always_quote {
515 my $self = shift;
516 @_ and $self->_set_attr_X ("always_quote", shift);
517 $self->{always_quote};
518 }
519
520sub quote_space {
521 my $self = shift;
522 @_ and $self->_set_attr_X ("quote_space", shift);
523 $self->{quote_space};
524 }
525
526sub quote_empty {
527 my $self = shift;
528 @_ and $self->_set_attr_X ("quote_empty", shift);
529 $self->{quote_empty};
530 }
531
532sub escape_null {
533 my $self = shift;
534 @_ and $self->_set_attr_X ("escape_null", shift);
535 $self->{escape_null};
536 }
537
538sub quote_null { goto &escape_null; }
539
540sub quote_binary {
541 my $self = shift;
542 @_ and $self->_set_attr_X ("quote_binary", shift);
543 $self->{quote_binary};
544 }
545
546sub binary {
547 my $self = shift;
548 @_ and $self->_set_attr_X ("binary", shift);
549 $self->{binary};
550 }
551
552sub strict {
553 my $self = shift;
554 @_ and $self->_set_attr_X ("strict", shift);
555 $self->{strict};
556 }
557
558sub decode_utf8 {
559 my $self = shift;
560 @_ and $self->_set_attr_X ("decode_utf8", shift);
561 $self->{decode_utf8};
562}
563
564sub keep_meta_info {
565 my $self = shift;
566 if (@_) {
567 my $v = shift;
568 !defined $v || $v eq "" and $v = 0;
569 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
570 $self->_set_attr_X ("keep_meta_info", $v);
571 }
572 $self->{keep_meta_info};
573 }
574
575sub allow_loose_quotes {
576 my $self = shift;
577 @_ and $self->_set_attr_X ("allow_loose_quotes", shift);
578 $self->{allow_loose_quotes};
579 }
580
581sub allow_loose_escapes {
582 my $self = shift;
583 @_ and $self->_set_attr_X ("allow_loose_escapes", shift);
584 $self->{allow_loose_escapes};
585 }
586
587sub allow_whitespace {
588 my $self = shift;
589 if (@_) {
590 my $aw = shift;
591 _unhealthy_whitespace ($self, $aw) and
592 croak ($self->SetDiag (1002));
593 $self->_set_attr_X ("allow_whitespace", $aw);
594 }
595 $self->{allow_whitespace};
596 }
597
598sub allow_unquoted_escape {
599 my $self = shift;
600 @_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
601 $self->{allow_unquoted_escape};
602 }
603
604sub blank_is_undef {
605 my $self = shift;
606 @_ and $self->_set_attr_X ("blank_is_undef", shift);
607 $self->{blank_is_undef};
608 }
609
610sub empty_is_undef {
611 my $self = shift;
612 @_ and $self->_set_attr_X ("empty_is_undef", shift);
613 $self->{empty_is_undef};
614 }
615
616sub verbatim {
617 my $self = shift;
618 @_ and $self->_set_attr_X ("verbatim", shift);
619 $self->{verbatim};
620 }
621
622sub auto_diag {
623 my $self = shift;
624 if (@_) {
625 my $v = shift;
626 !defined $v || $v eq "" and $v = 0;
627 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
628 $self->_set_attr_X ("auto_diag", $v);
629 }
630 $self->{auto_diag};
631 }
632
633sub diag_verbose {
634 my $self = shift;
635 if (@_) {
636 my $v = shift;
637 !defined $v || $v eq "" and $v = 0;
638 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
639 $self->_set_attr_X ("diag_verbose", $v);
640 }
641 $self->{diag_verbose};
642 }
643
644################################################################################
645# status
646################################################################################
647
648sub status {
649 $_[0]->{_STATUS};
650}
651
652sub eof {
653 $_[0]->{_EOF};
654}
655
656sub types {
657 my $self = shift;
658
659 if (@_) {
660 if (my $types = shift) {
661 $self->{'_types'} = join("", map{ chr($_) } @$types);
662 $self->{'types'} = $types;
663 }
664 else {
665 delete $self->{'types'};
666 delete $self->{'_types'};
667 undef;
668 }
669 }
670 else {
671 $self->{'types'};
672 }
673}
674
675sub callbacks {
676 my $self = shift;
677 if (@_) {
678 my $cb;
679 my $hf = 0x00;
680 if (defined $_[0]) {
681 grep { !defined } @_ and croak ($self->SetDiag (1004));
682 $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
683 : @_ % 2 == 0 ? { @_ }
684 : croak ($self->SetDiag (1004));
685 foreach my $cbk (keys %$cb) {
686 (!ref $cbk && $cbk =~ m/^[\w.]+$/) && ref $cb->{$cbk} eq "CODE" or
687 croak ($self->SetDiag (1004));
688 }
689 exists $cb->{error} and $hf |= 0x01;
690 exists $cb->{after_parse} and $hf |= 0x02;
691 exists $cb->{before_print} and $hf |= 0x04;
692 }
693 elsif (@_ > 1) {
694 # (undef, whatever)
695 croak ($self->SetDiag (1004));
696 }
697 $self->_set_attr_X ("_has_hooks", $hf);
698 $self->{callbacks} = $cb;
699 }
700 $self->{callbacks};
701 }
702
703################################################################################
704# error_diag
705################################################################################
706
707sub error_diag {
708 my $self = shift;
709 my @diag = (0 + $last_new_error, $last_new_error, 0, 0, 0);
710
711 if ($self && ref $self && # Not a class method or direct call
712 $self->isa (__PACKAGE__) && defined $self->{_ERROR_DIAG}) {
713 $diag[0] = 0 + $self->{_ERROR_DIAG};
714 $diag[1] = $self->{_ERROR_DIAG};
715 $diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
716 $diag[3] = $self->{_RECNO};
717 $diag[4] = $self->{_ERROR_FLD} if exists $self->{_ERROR_FLD};
718
719 $diag[0] && $self && $self->{callbacks} && $self->{callbacks}{error} and
720 return $self->{callbacks}{error}->(@diag);
721 }
722
723 my $context = wantarray;
724
725 unless (defined $context) { # Void context, auto-diag
726 if ($diag[0] && $diag[0] != 2012) {
727 my $msg = "# CSV_PP ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
728 $diag[4] and $msg =~ s/$/ field $diag[4]/;
729
730 unless ($self && ref $self) { # auto_diag
731 # called without args in void context
732 warn $msg;
733 return;
734 }
735
736 if ($self->{diag_verbose} and $self->{_ERROR_INPUT}) {
737 $msg .= "$self->{_ERROR_INPUT}'\n";
738 $msg .= " " x ($diag[2] - 1);
739 $msg .= "^\n";
740 }
741
742 my $lvl = $self->{auto_diag};
743 if ($lvl < 2) {
744 my @c = caller (2);
745 if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
746 my $hints = $c[10];
747 (exists $hints->{autodie} && $hints->{autodie} or
748 exists $hints->{"guard Fatal"} &&
749 !exists $hints->{"no Fatal"}) and
750 $lvl++;
751 # Future releases of autodie will probably set $^H{autodie}
752 # to "autodie @args", like "autodie :all" or "autodie open"
753 # so we can/should check for "open" or "new"
754 }
755 }
756 $lvl > 1 ? die $msg : warn $msg;
757 }
758 return;
759 }
760
761 return $context ? @diag : $diag[1];
762}
763
764sub record_number {
765 return shift->{_RECNO};
766}
767
768################################################################################
769# string
770################################################################################
771
772*string = \&_string;
773sub _string {
774 defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef;
775}
776
777################################################################################
778# fields
779################################################################################
780
781*fields = \&_fields;
782sub _fields {
783 ref($_[0]->{_FIELDS}) ? @{$_[0]->{_FIELDS}} : undef;
784}
785
786################################################################################
787# meta_info
788################################################################################
789
790sub meta_info {
791 $_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef;
792}
793
794sub is_quoted {
795 return unless (defined $_[0]->{_FFLAGS});
796 return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
797
798 $_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0;
799}
800
801sub is_binary {
802 return unless (defined $_[0]->{_FFLAGS});
803 return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
804 $_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0;
805}
806
807sub is_missing {
808 my ($self, $idx, $val) = @_;
809 return unless $self->{keep_meta_info}; # FIXME
810 $idx < 0 || !ref $self->{_FFLAGS} and return;
811 $idx >= @{$self->{_FFLAGS}} and return 1;
812 $self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0;
813}
814
815################################################################################
816# combine
817################################################################################
818*combine = \&_combine;
819sub _combine {
820 my ($self, @fields) = @_;
821 my $str = "";
822 $self->{_FIELDS} = \@fields;
823 $self->{_STATUS} = (@fields > 0) && $self->__combine(\$str, \@fields, 0);
824 $self->{_STRING} = \$str;
825 $self->{_STATUS};
826 }
827
828################################################################################
829# parse
830################################################################################
831*parse = \&_parse;
832sub _parse {
833 my ($self, $str) = @_;
834
835 ref $str and croak ($self->SetDiag (1500));
836
837 my $fields = [];
838 my $fflags = [];
839 $self->{_STRING} = \$str;
840 if (defined $str && $self->__parse ($fields, $fflags, $str, 0)) {
841 $self->{_FIELDS} = $fields;
842 $self->{_FFLAGS} = $fflags;
843 $self->{_STATUS} = 1;
844 }
845 else {
846 $self->{_FIELDS} = undef;
847 $self->{_FFLAGS} = undef;
848 $self->{_STATUS} = 0;
849 }
850 $self->{_STATUS};
851 }
852
853sub column_names {
854 my ( $self, @columns ) = @_;
855
856 @columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
857 @columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef;
858
859 if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) {
860 @columns = @{ $columns[0] };
861 }
862 elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) {
863 croak $self->SetDiag( 3001 );
864 }
865
866 if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) {
867 croak $self->SetDiag( 3003 );
868 }
869
870 $self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ];
871 @{ $self->{_COLUMN_NAMES} };
872}
873
874sub header {
875 my ($self, $fh, @args) = @_;
876
877 $fh or croak ($self->SetDiag (1014));
878
879 my (@seps, %args);
880 for (@args) {
881 if (ref $_ eq "ARRAY") {
882 push @seps, @$_;
883 next;
884 }
885 if (ref $_ eq "HASH") {
886 %args = %$_;
887 next;
888 }
889 croak (q{usage: $csv->header ($fh, [ seps ], { options })});
890 }
891
892 defined $args{detect_bom} or $args{detect_bom} = 1;
893 defined $args{munge_column_names} or $args{munge_column_names} = "lc";
894 defined $args{set_column_names} or $args{set_column_names} = 1;
895
896 defined $args{sep_set} && ref $args{sep_set} eq "ARRAY" and
897 @seps = @{$args{sep_set}};
898
899 my $hdr = <$fh>;
900 defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
901
902 my %sep;
903 @seps or @seps = (",", ";");
904 foreach my $sep (@seps) {
905 index ($hdr, $sep) >= 0 and $sep{$sep}++;
906 }
907
908 keys %sep >= 2 and croak ($self->SetDiag (1011));
909
910 $self->sep (keys %sep);
911 my $enc = "";
912 if ($args{detect_bom}) { # UTF-7 is not supported
913 if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
914 elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
915 elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
916 elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
917 elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
918 elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
919 elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
920 elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
921 elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
922 elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
923
924 if ($enc) {
925 if ($enc =~ m/([13]).le$/) {
926 my $l = 0 + $1;
927 my $x;
928 $hdr .= "\0" x $l;
929 read $fh, $x, $l;
930 }
931 $enc = ":encoding($enc)";
932 binmode $fh, $enc;
933 }
934 }
935
936 $args{munge_column_names} eq "lc" and $hdr = lc $hdr;
937 $args{munge_column_names} eq "uc" and $hdr = uc $hdr;
938
939 my $hr = \$hdr; # Will cause croak on perl-5.6.x
940 open my $h, "<$enc", $hr;
941 my $row = $self->getline ($h) or croak;
942 close $h;
943
944 my @hdr = @$row or croak ($self->SetDiag (1010));
945 ref $args{munge_column_names} eq "CODE" and
946 @hdr = map { $args{munge_column_names}->($_) } @hdr;
947 my %hdr = map { $_ => 1 } @hdr;
948 exists $hdr{""} and croak ($self->SetDiag (1012));
949 keys %hdr == @hdr or croak ($self->SetDiag (1013));
950 $args{set_column_names} and $self->column_names (@hdr);
951 wantarray ? @hdr : $self;
952 }
953
954sub bind_columns {
955 my ( $self, @refs ) = @_;
956
957 @refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
958 @refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef;
959
960 if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) {
961 croak $self->SetDiag( 3003 );
962 }
963
964 if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep?
965 croak $self->SetDiag( 3004 );
966 }
967
968 $self->_set_attr_N("_is_bound", scalar @refs);
969 $self->{_BOUND_COLUMNS} = [ @refs ];
970 @refs;
971}
972
973sub getline_hr {
974 my ($self, @args, %hr) = @_;
975 $self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
976 my $fr = $self->getline (@args) or return;
977 if (ref $self->{_FFLAGS}) { # missing
978 $self->{_FFLAGS}[$_] = IS_MISSING
979 for (@$fr ? $#{$fr} + 1 : 0) .. $#{$self->{_COLUMN_NAMES}};
980 @$fr == 1 && (!defined $fr->[0] || $fr->[0] eq "") and
981 $self->{_FFLAGS}[0] ||= IS_MISSING;
982 }
983 @hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
984 \%hr;
985}
986
987sub getline_hr_all {
988 my ( $self, $io, @args ) = @_;
989 my %hr;
990
991 unless ( $self->{_COLUMN_NAMES} ) {
992 croak $self->SetDiag( 3002 );
993 }
994
995 my @cn = @{$self->{_COLUMN_NAMES}};
996
997 return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ];
998}
999
1000sub say {
1001 my ($self, $io, @f) = @_;
1002 my $eol = $self->eol;
1003 defined $eol && $eol ne "" or $self->eol ($\ || $/);
1004 my $state = $self->print ($io, @f);
1005 $self->eol ($eol);
1006 return $state;
1007 }
1008
1009sub print_hr {
1010 my ($self, $io, $hr) = @_;
1011 $self->{_COLUMN_NAMES} or croak($self->SetDiag(3009));
1012 ref $hr eq "HASH" or croak($self->SetDiag(3010));
1013 $self->print ($io, [ map { $hr->{$_} } $self->column_names ]);
1014}
1015
1016sub fragment {
1017 my ($self, $io, $spec) = @_;
1018
1019 my $qd = qr{\s* [0-9]+ \s* }x; # digit
1020 my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x; # digit or star
1021 my $qr = qr{$qd (?: - $qs )?}x; # range
1022 my $qc = qr{$qr (?: ; $qr )*}x; # list
1023 defined $spec && $spec =~ m{^ \s*
1024 \x23 ? \s* # optional leading #
1025 ( row | col | cell ) \s* =
1026 ( $qc # for row and col
1027 | $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
1028 (?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
1029 ) \s* $}xi or croak ($self->SetDiag (2013));
1030 my ($type, $range) = (lc $1, $2);
1031
1032 my @h = $self->column_names ();
1033
1034 my @c;
1035 if ($type eq "cell") {
1036 my @spec;
1037 my $min_row;
1038 my $max_row = 0;
1039 for (split m/\s*;\s*/ => $range) {
1040 my ($tlr, $tlc, $brr, $brc) = (m{
1041 ^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
1042 (?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
1043 $}x) or croak ($self->SetDiag (2013));
1044 defined $brr or ($brr, $brc) = ($tlr, $tlc);
1045 $tlr == 0 || $tlc == 0 ||
1046 ($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
1047 ($brc ne "*" && ($brc == 0 || $brc < $tlc))
1048 and croak ($self->SetDiag (2013));
1049 $tlc--;
1050 $brc-- unless $brc eq "*";
1051 defined $min_row or $min_row = $tlr;
1052 $tlr < $min_row and $min_row = $tlr;
1053 $brr eq "*" || $brr > $max_row and
1054 $max_row = $brr;
1055 push @spec, [ $tlr, $tlc, $brr, $brc ];
1056 }
1057 my $r = 0;
1058 while (my $row = $self->getline ($io)) {
1059 ++$r < $min_row and next;
1060 my %row;
1061 my $lc;
1062 foreach my $s (@spec) {
1063 my ($tlr, $tlc, $brr, $brc) = @$s;
1064 $r < $tlr || ($brr ne "*" && $r > $brr) and next;
1065 !defined $lc || $tlc < $lc and $lc = $tlc;
1066 my $rr = $brc eq "*" ? $#$row : $brc;
1067 $row{$_} = $row->[$_] for $tlc .. $rr;
1068 }
1069 push @c, [ @row{sort { $a <=> $b } keys %row } ];
1070 if (@h) {
1071 my %h; @h{@h} = @{$c[-1]};
1072 $c[-1] = \%h;
1073 }
1074 $max_row ne "*" && $r == $max_row and last;
1075 }
1076 return \@c;
1077 }
1078
1079 # row or col
1080 my @r;
1081 my $eod = 0;
1082 for (split m/\s*;\s*/ => $range) {
1083 my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
1084 or croak ($self->SetDiag (2013));
1085 $to ||= $from;
1086 $to eq "*" and ($to, $eod) = ($from, 1);
1087 $from <= 0 || $to <= 0 || $to < $from and croak ($self->SetDiag (2013));
1088 $r[$_] = 1 for $from .. $to;
1089 }
1090
1091 my $r = 0;
1092 $type eq "col" and shift @r;
1093 $_ ||= 0 for @r;
1094 while (my $row = $self->getline ($io)) {
1095 $r++;
1096 if ($type eq "row") {
1097 if (($r > $#r && $eod) || $r[$r]) {
1098 push @c, $row;
1099 if (@h) {
1100 my %h; @h{@h} = @{$c[-1]};
1101 $c[-1] = \%h;
1102 }
1103 }
1104 next;
1105 }
1106 push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#$row ];
1107 if (@h) {
1108 my %h; @h{@h} = @{$c[-1]};
1109 $c[-1] = \%h;
1110 }
1111 }
1112
1113 return \@c;
1114 }
1115
1116my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1117
1118sub _csv_attr {
1119 my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak;
1120
1121 $attr{binary} = 1;
1122
1123 my $enc = delete $attr{enc} || delete $attr{encoding} || "";
1124 $enc eq "auto" and ($attr{detect_bom}, $enc) = (1, "");
1125 $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1126
1127 my $fh;
1128 my $cls = 0; # If I open a file, I have to close it
1129 my $in = delete $attr{in} || delete $attr{file} or croak $csv_usage;
1130 my $out = delete $attr{out} || delete $attr{file};
1131
1132 ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
1133
1134 if ($out) {
1135 $in or croak $csv_usage; # No out without in
1136 if ((ref $out and ref $out ne "SCALAR") or "GLOB" eq ref \$out) {
1137 $fh = $out;
1138 }
1139 else {
1140 open $fh, ">", $out or croak "$out: $!";
1141 $cls = 1;
1142 }
1143 $enc and binmode $fh, $enc;
1144 unless (defined $attr{eol}) {
1145 my @layers = eval { PerlIO::get_layers ($fh) };
1146 $attr{eol} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1147 }
1148 }
1149
1150 if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
1151 # All done
1152 }
1153 elsif (ref $in eq "SCALAR") {
1154 # Strings with code points over 0xFF may not be mapped into in-memory file handles
1155 # "<$enc" does not change that :(
1156 open $fh, "<", $in or croak "Cannot open from SCALAR using PerlIO";
1157 $cls = 1;
1158 }
1159 elsif (ref $in or "GLOB" eq ref \$in) {
1160 if (!ref $in && $] < 5.008005) {
1161 $fh = \*$in; # uncoverable statement ancient perl version required
1162 }
1163 else {
1164 $fh = $in;
1165 }
1166 }
1167 else {
1168 open $fh, "<$enc", $in or croak "$in: $!";
1169 $cls = 1;
1170 }
1171 $fh or croak qq{No valid source passed. "in" is required};
1172
1173 my $hdrs = delete $attr{headers};
1174 my $frag = delete $attr{fragment};
1175 my $key = delete $attr{key};
1176
1177 my $cbai = delete $attr{callbacks}{after_in} ||
1178 delete $attr{after_in} ||
1179 delete $attr{callbacks}{after_parse} ||
1180 delete $attr{after_parse};
1181 my $cbbo = delete $attr{callbacks}{before_out} ||
1182 delete $attr{before_out};
1183 my $cboi = delete $attr{callbacks}{on_in} ||
1184 delete $attr{on_in};
1185
1186 my $hd_s = delete $attr{sep_set} ||
1187 delete $attr{seps};
1188 my $hd_b = delete $attr{detect_bom} ||
1189 delete $attr{bom};
1190 my $hd_m = delete $attr{munge} ||
1191 delete $attr{munge_column_names};
1192 my $hd_c = delete $attr{set_column_names};
1193
1194 for ([ quo => "quote" ],
1195 [ esc => "escape" ],
1196 [ escape => "escape_char" ],
1197 ) {
1198 my ($f, $t) = @$_;
1199 exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1200 }
1201
1202 my $fltr = delete $attr{filter};
1203 my %fltr = (
1204 not_blank => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
1205 not_empty => sub { grep { defined && $_ ne "" } @{$_[1]} },
1206 filled => sub { grep { defined && m/\S/ } @{$_[1]} },
1207 );
1208 defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1209 $fltr = { 0 => $fltr{$fltr} };
1210 ref $fltr eq "HASH" or $fltr = undef;
1211
1212 defined $attr{auto_diag} or $attr{auto_diag} = 1;
1213 defined $attr{escape_null} or $attr{escape_null} = 0;
1214 my $csv = delete $attr{csv} || Text::CSV_PP->new (\%attr)
1215 or croak $last_new_error;
1216
1217 return {
1218 csv => $csv,
1219 attr => { %attr },
1220 fh => $fh,
1221 cls => $cls,
1222 in => $in,
1223 out => $out,
1224 enc => $enc,
1225 hdrs => $hdrs,
1226 key => $key,
1227 frag => $frag,
1228 fltr => $fltr,
1229 cbai => $cbai,
1230 cbbo => $cbbo,
1231 cboi => $cboi,
1232 hd_s => $hd_s,
1233 hd_b => $hd_b,
1234 hd_m => $hd_m,
1235 hd_c => $hd_c,
1236 };
1237 }
1238
1239sub csv {
1240 @_ && (ref $_[0] eq __PACKAGE__ or ref $_[0] eq 'Text::CSV') and splice @_, 0, 0, "csv";
1241 @_ or croak $csv_usage;
1242
1243 my $c = _csv_attr (@_);
1244
1245 my ($csv, $in, $fh, $hdrs) = @{$c}{"csv", "in", "fh", "hdrs"};
1246 my %hdr;
1247 if (ref $hdrs eq "HASH") {
1248 %hdr = %$hdrs;
1249 $hdrs = "auto";
1250 }
1251
1252 if ($c->{out}) {
1253 if (ref $in eq "CODE") {
1254 my $hdr = 1;
1255 while (my $row = $in->($csv)) {
1256 if (ref $row eq "ARRAY") {
1257 $csv->print ($fh, $row);
1258 next;
1259 }
1260 if (ref $row eq "HASH") {
1261 if ($hdr) {
1262 $hdrs ||= [ map { $hdr{$_} || $_ } keys %$row ];
1263 $csv->print ($fh, $hdrs);
1264 $hdr = 0;
1265 }
1266 $csv->print ($fh, [ @{$row}{@$hdrs} ]);
1267 }
1268 }
1269 }
1270 elsif (ref $in->[0] eq "ARRAY") { # aoa
1271 ref $hdrs and $csv->print ($fh, $hdrs);
1272 for (@{$in}) {
1273 $c->{cboi} and $c->{cboi}->($csv, $_);
1274 $c->{cbbo} and $c->{cbbo}->($csv, $_);
1275 $csv->print ($fh, $_);
1276 }
1277 }
1278 else { # aoh
1279 my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
1280 defined $hdrs or $hdrs = "auto";
1281 ref $hdrs || $hdrs eq "auto" and
1282 $csv->print ($fh, [ map { $hdr{$_} || $_ } @hdrs ]);
1283 for (@{$in}) {
1284 local %_;
1285 *_ = $_;
1286 $c->{cboi} and $c->{cboi}->($csv, $_);
1287 $c->{cbbo} and $c->{cbbo}->($csv, $_);
1288 $csv->print ($fh, [ @{$_}{@hdrs} ]);
1289 }
1290 }
1291
1292 $c->{cls} and close $fh;
1293 return 1;
1294 }
1295
1296 if (defined $c->{hd_s} || defined $c->{hd_b} || defined $c->{hd_m} || defined $c->{hd_c}) {
1297 my %harg;
1298 defined $c->{hd_s} and $harg{set_set} = $c->{hd_s};
1299 defined $c->{hd_d} and $harg{detect_bom} = $c->{hd_b};
1300 defined $c->{hd_m} and $harg{munge_column_names} = $hdrs ? "none" : $c->{hd_m};
1301 defined $c->{hd_c} and $harg{set_column_names} = $hdrs ? 0 : $c->{hd_c};
1302 $csv->header ($fh, \%harg);
1303 my @hdr = $csv->column_names;
1304 @hdr and $hdrs ||= \@hdr;
1305 }
1306
1307 my $key = $c->{key} and $hdrs ||= "auto";
1308 $c->{fltr} && grep m/\D/ => keys %{$c->{fltr}} and $hdrs ||= "auto";
1309 if (defined $hdrs) {
1310 if (!ref $hdrs) {
1311 if ($hdrs eq "skip") {
1312 $csv->getline ($fh); # discard;
1313 }
1314 elsif ($hdrs eq "auto") {
1315 my $h = $csv->getline ($fh) or return;
1316 $hdrs = [ map { $hdr{$_} || $_ } @$h ];
1317 }
1318 elsif ($hdrs eq "lc") {
1319 my $h = $csv->getline ($fh) or return;
1320 $hdrs = [ map { lc ($hdr{$_} || $_) } @$h ];
1321 }
1322 elsif ($hdrs eq "uc") {
1323 my $h = $csv->getline ($fh) or return;
1324 $hdrs = [ map { uc ($hdr{$_} || $_) } @$h ];
1325 }
1326 }
1327 elsif (ref $hdrs eq "CODE") {
1328 my $h = $csv->getline ($fh) or return;
1329 my $cr = $hdrs;
1330 $hdrs = [ map { $cr->($hdr{$_} || $_) } @$h ];
1331 }
1332 }
1333
1334 if ($c->{fltr}) {
1335 my %f = %{$c->{fltr}};
1336 # convert headers to index
1337 my @hdr;
1338 if (ref $hdrs) {
1339 @hdr = @{$hdrs};
1340 for (0 .. $#hdr) {
1341 exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1342 }
1343 }
1344 $csv->callbacks (after_parse => sub {
1345 my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1346 foreach my $FLD (sort keys %f) {
1347 local $_ = $ROW->[$FLD - 1];
1348 local %_;
1349 @hdr and @_{@hdr} = @$ROW;
1350 $f{$FLD}->($CSV, $ROW) or return \"skip";
1351 $ROW->[$FLD - 1] = $_;
1352 }
1353 });
1354 }
1355
1356 my $frag = $c->{frag};
1357 my $ref = ref $hdrs
1358 ? # aoh
1359 do {
1360 $csv->column_names ($hdrs);
1361 $frag ? $csv->fragment ($fh, $frag) :
1362 $key ? { map { $_->{$key} => $_ } @{$csv->getline_hr_all ($fh)} }
1363 : $csv->getline_hr_all ($fh);
1364 }
1365 : # aoa
1366 $frag ? $csv->fragment ($fh, $frag)
1367 : $csv->getline_all ($fh);
1368 $ref or Text::CSV_PP->auto_diag;
1369 $c->{cls} and close $fh;
1370 if ($ref and $c->{cbai} || $c->{cboi}) {
1371 foreach my $r (@{$ref}) {
1372 local %_;
1373 ref $r eq "HASH" and *_ = $r;
1374 $c->{cbai} and $c->{cbai}->($csv, $r);
1375 $c->{cboi} and $c->{cboi}->($csv, $r);
1376 }
1377 }
1378
1379 defined wantarray or
1380 return csv (%{$c->{attr}}, in => $ref, headers => $hdrs, %{$c->{attr}});
1381
1382 return $ref;
1383 }
1384
1385# The end of the common pure perl part.
1386
1387################################################################################
1388#
1389# The following are methods implemented in XS in Text::CSV_XS or
1390# helper methods for Text::CSV_PP only
1391#
1392################################################################################
1393
1394sub _setup_ctx {
1395 my $self = shift;
1396
1397 $last_error = undef;
1398
1399 my $ctx;
1400 if ($self->{_CACHE}) {
1401 $ctx = $self->{_CACHE};
1402 } else {
1403 $ctx ||= {};
1404 # $ctx->{self} = $self;
1405 $ctx->{pself} = ref $self || $self;
1406
1407 $ctx->{sep} = ',';
1408 if (defined $self->{sep_char}) {
1409 $ctx->{sep} = $self->{sep_char};
1410 }
1411 if (defined $self->{sep} and $self->{sep} ne '') {
1412 use bytes;
1413 $ctx->{sep} = $self->{sep};
1414 my $sep_len = length($ctx->{sep});
1415 $ctx->{sep_len} = $sep_len if $sep_len > 1;
1416 }
1417
1418 $ctx->{quo} = '"';
1419 if (exists $self->{quote_char}) {
1420 my $quote_char = $self->{quote_char};
1421 if (defined $quote_char and length $quote_char) {
1422 $ctx->{quo} = $quote_char;
1423 } else {
1424 $ctx->{quo} = "\0";
1425 }
1426 }
1427 if (defined $self->{quote} and $self->{quote} ne '') {
1428 use bytes;
1429 $ctx->{quo} = $self->{quote};
1430 my $quote_len = length($ctx->{quo});
1431 $ctx->{quo_len} = $quote_len if $quote_len > 1;
1432 }
1433
1434 $ctx->{escape_char} = '"';
1435 if (exists $self->{escape_char}) {
1436 my $escape_char = $self->{escape_char};
1437 if (defined $escape_char and length $escape_char) {
1438 $ctx->{escape_char} = $escape_char;
1439 } else {
1440 $ctx->{escape_char} = "\0";
1441 }
1442 }
1443
1444 if (defined $self->{eol}) {
1445 my $eol = $self->{eol};
1446 my $eol_len = length($eol);
1447 $ctx->{eol} = $eol;
1448 $ctx->{eol_len} = $eol_len;
1449 if ($eol_len == 1 and $eol eq "\015") {
1450 $ctx->{eol_is_cr} = 1;
1451 }
1452 }
1453
1454 if (defined $self->{_types}) {
1455 $ctx->{types} = $self->{_types};
1456 $ctx->{types_len} = length($ctx->{types});
1457 }
1458
1459 if (defined $self->{_is_bound}) {
1460 $ctx->{is_bound} = $self->{_is_bound};
1461 }
1462
1463 if (defined $self->{callbacks}) {
1464 my $cb = $self->{callbacks};
1465 $ctx->{has_hooks} = 0;
1466 if (defined $cb->{after_parse} and ref $cb->{after_parse} eq 'CODE') {
1467 $ctx->{has_hooks} |= HOOK_AFTER_PARSE;
1468 }
1469 if (defined $cb->{before_print} and ref $cb->{before_print} eq 'CODE') {
1470 $ctx->{has_hooks} |= HOOK_BEFORE_PRINT;
1471 }
1472 }
1473
1474 for (qw/
1475 binary decode_utf8 always_quote strict quote_empty
1476 allow_loose_quotes allow_loose_escapes
1477 allow_unquoted_escape allow_whitespace blank_is_undef
1478 empty_is_undef verbatim auto_diag diag_verbose
1479 keep_meta_info
1480 /) {
1481 $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 0;
1482 }
1483 for (qw/quote_space escape_null quote_binary/) {
1484 $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 1;
1485 }
1486 # FIXME: readonly
1487 $self->{_CACHE} = $ctx;
1488 }
1489
1490 $ctx->{utf8} = 0;
1491 $ctx->{size} = 0;
1492 $ctx->{used} = 0;
1493
1494 if ($ctx->{is_bound}) {
1495 my $bound = $self->{_BOUND_COLUMNS};
1496 if ($bound and ref $bound eq 'ARRAY') {
1497 $ctx->{bound} = $bound;
1498 } else {
1499 $ctx->{is_bound} = 0;
1500 }
1501 }
1502
1503 $ctx->{eol_pos} = -1;
1504 $ctx->{eolx} = $ctx->{eol_len}
1505 ? $ctx->{verbatim} || $ctx->{eol_len} >= 2
1506 ? 1
1507 : $ctx->{eol} =~ /\A[\015|\012]/ ? 0 : 1
1508 : 0;
1509
1510 if ($ctx->{sep_len} and _is_valid_utf8($ctx->{sep})) {
1511 $ctx->{utf8} = 1;
1512 }
1513 if ($ctx->{quo_len} and _is_valid_utf8($ctx->{quo})) {
1514 $ctx->{utf8} = 1;
1515 }
1516
1517 $ctx;
1518}
1519
1520sub _cache_set {
1521 my ($self, $idx, $value) = @_;
1522 return unless exists $self->{_CACHE};
1523 my $cache = $self->{_CACHE};
1524
1525 my $key = $_reverse_cache_id{$idx};
1526 if (!defined $key) {
1527 warn (sprintf "Unknown cache index %d ignored\n", $idx);
1528 } elsif ($key eq 'sep_char') {
1529 $cache->{sep} = $value;
1530 $cache->{sep_len} = 0;
1531 }
1532 elsif ($key eq 'quote_char') {
1533 $cache->{quo} = $value;
1534 $cache->{quo_len} = 0;
1535 }
1536 elsif ($key eq '_has_hooks') {
1537 $cache->{has_hooks} = $value;
1538 }
1539 elsif ($key eq '_is_bound') {
1540 $cache->{is_bound} = $value;
1541 }
1542 elsif ($key eq 'sep') {
1543 use bytes;
1544 my $len = bytes::length($value);
1545 $cache->{sep} = $value if $len;
1546 $cache->{sep_len} = $len == 1 ? 0 : $len;
1547 }
1548 elsif ($key eq 'quote') {
1549 use bytes;
1550 my $len = bytes::length($value);
1551 $cache->{quo} = $value if $len;
1552 $cache->{quo_len} = $len == 1 ? 0 : $len;
1553 }
1554 elsif ($key eq 'eol') {
1555 $cache->{eol} = $value if length($value);
1556 $cache->{eol_is_cr} = $value eq "\015" ? 1 : 0;
1557 }
1558 else {
1559 $cache->{$key} = $value;
1560 }
1561 return 1;
1562}
1563
1564sub _cache_diag {
1565 my $self = shift;
1566 unless (exists $self->{_CACHE}) {
1567 warn ("CACHE: invalid\n");
1568 return;
1569 }
1570
1571 my $cache = $self->{_CACHE};
1572 warn ("CACHE:\n");
1573 $self->__cache_show_char(quote_char => $cache->{quo});
1574 $self->__cache_show_char(escape_char => $cache->{escape_char});
1575 $self->__cache_show_char(sep_char => $cache->{sep});
1576 for (qw/
1577 binary decode_utf8 allow_loose_escapes allow_loose_quotes
1578 allow_whitespace always_quote quote_empty quote_space
1579 escape_null quote_binary auto_diag diag_verbose strict
1580 has_error_input blank_is_undef empty_is_undef has_ahead
1581 keep_meta_info verbatim has_hooks eol_is_cr eol_len
1582 /) {
1583 $self->__cache_show_byte($_ => $cache->{$_});
1584 }
1585 $self->__cache_show_str(eol => $cache->{eol_len}, $cache->{eol});
1586 $self->__cache_show_byte(sep_len => $cache->{sep_len});
1587 if ($cache->{sep_len} and $cache->{sep_len} > 1) {
1588 $self->__cache_show_str(sep => $cache->{sep_len}, $cache->{sep});
1589 }
1590 $self->__cache_show_byte(quo_len => $cache->{quo_len});
1591 if ($cache->{quo_len} and $cache->{quo_len} > 1) {
1592 $self->__cache_show_str(quote => $cache->{quo_len}, $cache->{quo});
1593 }
1594}
1595
1596sub __cache_show_byte {
1597 my ($self, $key, $value) = @_;
1598 warn (sprintf " %-21s %02x:%3d\n", $key, defined $value ? ord($value) : 0, defined $value ? $value : 0);
1599}
1600
1601sub __cache_show_char {
1602 my ($self, $key, $value) = @_;
1603 my $v = $value;
1604 if (defined $value) {
1605 my @b = unpack "U0C*", $value;
1606 $v = pack "U*", $b[0];
1607 }
1608 warn (sprintf " %-21s %02x:%s\n", $key, defined $v ? ord($v) : 0, $self->__pretty_str($v, 1));
1609}
1610
1611sub __cache_show_str {
1612 my ($self, $key, $len, $value) = @_;
1613 warn (sprintf " %-21s %02d:%s\n", $key, $len, $self->__pretty_str($value, $len));
1614}
1615
1616sub __pretty_str { # FIXME
1617 my ($self, $str, $len) = @_;
1618 return '' unless defined $str;
1619 $str = substr($str, 0, $len);
1620 $str =~ s/"/\\"/g;
1621 $str =~ s/([^\x09\x20-\x7e])/sprintf '\\x{%x}', ord($1)/eg;
1622 qq{"$str"};
1623}
1624
1625sub _hook {
1626 my ($self, $name, $fields) = @_;
1627 return 0 unless $self->{callbacks};
1628
1629 my $cb = $self->{callbacks}{$name};
1630 return 0 unless $cb && ref $cb eq 'CODE';
1631
1632 my (@res) = $cb->($self, $fields);
1633 if (@res) {
1634 return 0 if ref $res[0] eq 'SCALAR' and ${$res[0]} eq "skip";
1635 }
1636 scalar @res;
1637}
1638
1639################################################################################
1640# methods for combine
1641################################################################################
1642
1643sub __combine {
1644 my ($self, $dst, $fields, $useIO) = @_;
1645
1646 my $ctx = $self->_setup_ctx;
1647
1648 my ($binary, $quot, $sep, $esc, $quote_space) = @{$ctx}{qw/binary quo sep escape_char quote_space/};
1649
1650 if(!defined $quot or $quot eq "\0"){ $quot = ''; }
1651
1652 my $re_esc;
1653 if ($quot ne '') {
1654 $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$quot\E|\Q$esc\E)/;
1655 } else {
1656 $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$esc\E)/;
1657 }
1658
1659 my $re_sp = $self->{_re_comb_sp}->{$sep}->{$quote_space} ||= ( $quote_space ? qr/[\s\Q$sep\E]/ : qr/[\Q$sep\E]/ );
1660
1661 my $bound = 0;
1662 my $n = @$fields - 1;
1663 if ($n < 0 and $ctx->{is_bound}) {
1664 $n = $ctx->{is_bound} - 1;
1665 $bound = 1;
1666 }
1667
1668 my $check_meta = ($ctx->{keep_meta_info} >= 10 and @{$self->{_FFLAGS} || []} >= $n) ? 1 : 0;
1669
1670 my $must_be_quoted;
1671 my @results;
1672 for(my $i = 0; $i <= $n; $i++) {
1673 my $v_ref;
1674 if ($bound) {
1675 $v_ref = $self->__bound_field($ctx, $i, 1);
1676 } else {
1677 if (@$fields > $i) {
1678 $v_ref = \($fields->[$i]);
1679 }
1680 }
1681 next unless $v_ref;
1682
1683 my $value = $$v_ref;
1684
1685 unless (defined $value) {
1686 push @results, '';
1687 next;
1688 }
1689 elsif ( !$binary ) {
1690 $binary = 1 if utf8::is_utf8 $value;
1691 }
1692
1693 if (!$binary and $value =~ /[^\x09\x20-\x7E]/) {
1694 # an argument contained an invalid character...
1695 $self->{_ERROR_INPUT} = $value;
1696 $self->SetDiag(2110);
1697 return 0;
1698 }
1699
1700 $must_be_quoted = 0;
1701 if ($value eq '') {
1702 $must_be_quoted++ if $ctx->{quote_empty} or ($check_meta && $self->is_quoted($i));
1703 }
1704 else {
1705 if($value =~ s/$re_esc/$esc$1/g and $quot ne ''){
1706 $must_be_quoted++;
1707 }
1708 if($value =~ /$re_sp/){
1709 $must_be_quoted++;
1710 }
1711
1712 if( $binary and $ctx->{escape_null} ){
1713 use bytes;
1714 $must_be_quoted++ if ( $value =~ s/\0/${esc}0/g || ($ctx->{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) );
1715 }
1716 }
1717
1718 if($ctx->{always_quote} or $must_be_quoted or ($check_meta && $self->is_quoted($i))){
1719 $value = $quot . $value . $quot;
1720 }
1721 push @results, $value;
1722 }
1723
1724 $$dst = join($sep, @results) . ( defined $ctx->{eol} ? $ctx->{eol} : '' );
1725
1726 return 1;
1727}
1728
1729sub print {
1730 my ($self, $io, $fields) = @_;
1731
1732 require IO::Handle;
1733
1734 if (!defined $fields) {
1735 $fields = [];
1736 } elsif(ref($fields) ne 'ARRAY'){
1737 Carp::croak("Expected fields to be an array ref");
1738 }
1739
1740 $self->_hook(before_print => $fields);
1741
1742 my $str = "";
1743 $self->__combine(\$str, $fields, 1) or return '';
1744
1745 local $\ = '';
1746
1747 $io->print( $str ) or $self->_set_error_diag(2200);
1748}
1749
1750################################################################################
1751# methods for parse
1752################################################################################
1753
1754
1755sub __parse { # cx_xsParse
1756 my ($self, $fields, $fflags, $src, $useIO) = @_;
1757
1758 my $ctx = $self->_setup_ctx;
1759 my $state = $self->___parse($ctx, $fields, $fflags, $src, $useIO);
1760 if ($state and ($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
1761 $self->_hook(after_parse => $fields);
1762 }
1763 return $state || !$last_error;
1764}
1765
1766sub ___parse { # cx_c_xsParse
1767 my ($self, $ctx, $fields, $fflags, $src, $useIO) = @_;
1768
1769 local $/ = $ctx->{eol} if $ctx->{eolx} or $ctx->{eol_is_cr};
1770
1771 if ($ctx->{useIO} = $useIO) {
1772 require IO::Handle;
1773
1774 $ctx->{tmp} = undef;
1775 if ($ctx->{has_ahead} and defined $self->{_AHEAD}) {
1776 $ctx->{tmp} = $self->{_AHEAD};
1777 $ctx->{size} = length $ctx->{tmp};
1778 $ctx->{used} = 0;
1779 }
1780 } else {
1781 $ctx->{tmp} = $src;
1782 $ctx->{size} = length $src;
1783 $ctx->{used} = 0;
1784 $ctx->{utf8} = utf8::is_utf8($src);
1785 }
1786 if ($ctx->{has_error_input}) {
1787 $self->{_ERROR_INPUT} = undef;
1788 $ctx->{has_error_input} = 0;
1789 }
1790
1791 my $result = $self->____parse($ctx, $src, $fields, $fflags);
1792 $self->{_RECNO} = ++($ctx->{recno});
1793 $self->{_EOF} = '';
1794
1795 if ($ctx->{strict}) {
1796 $ctx->{strict_n} ||= $ctx->{fld_idx};
1797 if ($ctx->{strict_n} != $ctx->{fld_idx}) {
1798 $self->__parse_error($ctx, 2014, $ctx->{used});
1799 return;
1800 }
1801 }
1802
1803 if ($ctx->{useIO}) {
1804 if (defined $ctx->{tmp} and $ctx->{used} < $ctx->{size} and $ctx->{has_ahead}) {
1805 $self->{_AHEAD} = substr($ctx->{tmp}, $ctx->{used}, $ctx->{size} - $ctx->{used});
1806 } else {
1807 $ctx->{has_ahead} = 0;
1808 if ($ctx->{useIO} & useIO_EOF) {
1809 $self->{_EOF} = 1;
1810 }
1811 }
1812
1813 if ($fflags) {
1814 if ($ctx->{keep_meta_info}) {
1815 $self->{_FFLAGS} = $fflags;
1816 } else {
1817 undef $fflags;
1818 }
1819 }
1820 }
1821
1822 if ($result and $ctx->{types}) {
1823 my $len = @$fields;
1824 for(my $i = 0; $i <= $len && $i <= $ctx->{types_len}; $i++) {
1825 my $value = $fields->[$i];
1826 next unless defined $value;
1827 my $type = ord(substr($ctx->{types}, $i, 1));
1828 if ($type == IV) {
1829 $fields->[$i] = int($value);
1830 } elsif ($type == NV) {
1831 $fields->[$i] = $value + 0.0;
1832 }
1833 }
1834 }
1835
1836 $result;
1837}
1838
1839sub ____parse { # cx_Parse
1840 my ($self, $ctx, $src, $fields, $fflags) = @_;
1841
1842 my ($quot, $sep, $esc, $eol) = @{$ctx}{qw/quo sep escape_char eol/};
1843
1844 utf8::encode($sep) if !$ctx->{utf8} and $ctx->{sep_len};
1845 utf8::encode($quot) if !$ctx->{utf8} and $ctx->{quo_len};
1846 utf8::encode($eol) if !$ctx->{utf8} and $ctx->{eol_len};
1847
1848 my $seenSomething = 0;
1849 my $waitingForField = 1;
1850 my ($value, $v_ref);
1851 $ctx->{fld_idx} = my $fnum = 0;
1852 $ctx->{flag} = 0;
1853
1854 my $re_str = join '|', map({$_ eq "\0" ? '[\\0]' : quotemeta($_)} sort {length $b <=> length $a} grep {defined $_ and $_ ne ''} $sep, $quot, $esc, $eol), "\015", "\012", "\x09", " ";
1855 $ctx->{_re} = qr/$re_str/;
1856 my $re = qr/$re_str|[^\x09\x20-\x7E]|$/;
1857
1858LOOP:
1859 while($self->__get_from_src($ctx, $src)) {
1860 while($ctx->{tmp} =~ /\G(.*?)($re)/gs) {
1861 my ($hit, $c) = ($1, $2);
1862 $ctx->{used} = pos($ctx->{tmp});
1863 if (!$waitingForField and $c eq '' and $hit ne '' and $ctx->{useIO} and !($ctx->{useIO} & useIO_EOF)) {
1864 $self->{_AHEAD} = $hit;
1865 $ctx->{has_ahead} = 1;
1866 $ctx->{has_leftover} = 1;
1867 last;
1868 }
1869 last if $seenSomething and $hit eq '' and $c eq ''; # EOF
1870
1871 # new field
1872 if (!$v_ref) {
1873 if ($ctx->{is_bound}) {
1874 $v_ref = $self->__bound_field($ctx, $fnum++, 0);
1875 } else {
1876 $value = '';
1877 $v_ref = \$value;
1878 }
1879 return unless $v_ref;
1880 $ctx->{flag} = 0;
1881 $ctx->{fld_idx}++;
1882 }
1883
1884 $seenSomething = 1;
1885
1886 if (defined $hit and $hit ne '') {
1887 if ($waitingForField) {
1888 $waitingForField = 0;
1889 }
1890 if ($hit =~ /[^\x09\x20-\x7E]/) {
1891 $ctx->{flag} |= IS_BINARY;
1892 }
1893 $$v_ref .= $hit;
1894 }
1895
1896RESTART:
1897 if (defined $c and defined $sep and $c eq $sep) {
1898 if ($waitingForField) {
1899 # ,1,"foo, 3",,bar,
1900 # ^ ^
1901 if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
1902 $$v_ref = undef;
1903 } else {
1904 $$v_ref = "";
1905 }
1906 unless ($ctx->{is_bound}) {
1907 push @$fields, $$v_ref;
1908 }
1909 $v_ref = undef;
1910 if ($ctx->{keep_meta_info} and $fflags) {
1911 push @$fflags, $ctx->{flag};
1912 }
1913 } elsif ($ctx->{flag} & IS_QUOTED) {
1914 # ,1,"foo, 3",,bar,
1915 # ^
1916 $$v_ref .= $c;
1917 } else {
1918 # ,1,"foo, 3",,bar,
1919 # ^ ^ ^
1920 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
1921 $v_ref = undef;
1922 $waitingForField = 1;
1923 }
1924 }
1925 elsif (defined $c and defined $quot and $quot ne "\0" and $c eq $quot) {
1926 if ($waitingForField) {
1927 # ,1,"foo, 3",,bar,\r\n
1928 # ^
1929 $ctx->{flag} |= IS_QUOTED;
1930 $waitingForField = 0;
1931 next;
1932 }
1933 if ($ctx->{flag} & IS_QUOTED) {
1934 # ,1,"foo, 3",,bar,\r\n
1935 # ^
1936 my $quoesc = 0;
1937 my $c2 = $self->__get($ctx);
1938
1939 if ($ctx->{allow_whitespace}) {
1940 # , 1 , "foo, 3" , , bar , \r\n
1941 # ^
1942 while($self->__is_whitespace($ctx, $c2)) {
1943 if ($ctx->{allow_loose_quotes} and !(defined $esc and $c2 eq $esc)) {
1944 $$v_ref .= $c;
1945 $c = $c2;
1946 }
1947 $c2 = $self->__get($ctx);
1948 }
1949 }
1950
1951 if (!defined $c2) { # EOF
1952 # ,1,"foo, 3"
1953 # ^
1954 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
1955 return 1;
1956 }
1957
1958 if (defined $c2 and defined $sep and $c2 eq $sep) {
1959 # ,1,"foo, 3",,bar,\r\n
1960 # ^
1961 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
1962 $v_ref = undef;
1963 $waitingForField = 1;
1964 next;
1965 }
1966 if (defined $c2 and ($c2 eq "\012" or (defined $eol and $c2 eq $eol))) { # FIXME: EOLX
1967 # ,1,"foo, 3",,"bar"\n
1968 # ^
1969 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
1970 return 1;
1971 }
1972
1973 if (defined $esc and $c eq $esc) {
1974 $quoesc = 1;
1975 if (defined $c2 and $c2 eq '0') {
1976 # ,1,"foo, 3"056",,bar,\r\n
1977 # ^
1978 $$v_ref .= "\0";
1979 next;
1980 }
1981 if (defined $c2 and defined $quot and $c2 eq $quot) {
1982 # ,1,"foo, 3""56",,bar,\r\n
1983 # ^
1984 if ($ctx->{utf8}) {
1985 $ctx->{flag} |= IS_BINARY;
1986 }
1987 $$v_ref .= $c2;
1988 next;
1989 }
1990 if ($ctx->{allow_loose_escapes} and defined $c2 and $c2 ne "\015") {
1991 # ,1,"foo, 3"56",,bar,\r\n
1992 # ^
1993 $$v_ref .= $c;
1994 $c = $c2;
1995 goto RESTART;
1996 }
1997 }
1998 if (defined $c2 and $c2 eq "\015") {
1999 if ($ctx->{eol_is_cr}) {
2000 # ,1,"foo, 3"\r
2001 # ^
2002 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2003 return 1;
2004 }
2005
2006 my $c3 = $self->__get($ctx);
2007 if (defined $c3 and $c3 eq "\012") {
2008 # ,1,"foo, 3"\r\n
2009 # ^
2010 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2011 return 1;
2012 }
2013
2014 if ($ctx->{useIO} and !$ctx->{eol_len} and $c3 !~ /[^\x09\x20-\x7E]/) {
2015 # ,1,"foo\n 3",,"bar"\r
2016 # baz,4
2017 # ^
2018 $self->__set_eol_is_cr($ctx);
2019 $ctx->{used}--;
2020 $ctx->{has_ahead} = 1;
2021 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2022 return 1;
2023 }
2024
2025 $self->__parse_error($ctx, $quoesc ? 2023 : 2010, $ctx->{used} - 2);
2026 return;
2027 }
2028
2029 if ($ctx->{allow_loose_quotes} and !$quoesc) {
2030 # ,1,"foo, 3"456",,bar,\r\n
2031 # ^
2032 $$v_ref .= $c;
2033 $c = $c2;
2034 goto RESTART;
2035 }
2036 # 1,"foo" ",3
2037 # ^
2038 if ($quoesc) {
2039 $ctx->{used}--;
2040 $self->__error_inside_quotes($ctx, 2023);
2041 return;
2042 }
2043 $self->__error_inside_quotes($ctx, 2011);
2044 return;
2045 }
2046 # !waitingForField, !InsideQuotes
2047 if ($ctx->{allow_loose_quotes}) { # 1,foo "boo" d'uh,1
2048 $ctx->{flag} |= IS_ERROR;
2049 $$v_ref .= $c;
2050 } else {
2051 $self->__error_inside_field($ctx, 2034);
2052 return;
2053 }
2054 }
2055 elsif (defined $c and defined $esc and $esc ne "\0" and $c eq $esc) {
2056 # This means quote_char != escape_char
2057 if ($waitingForField) {
2058 $waitingForField = 0;
2059 if ($ctx->{allow_unquoted_escape}) {
2060 # The escape character is the first character of an
2061 # unquoted field
2062 # ... get and store next character
2063 my $c2 = $self->__get($ctx);
2064 $$v_ref = "";
2065
2066 if (!defined $c2) { # EOF
2067 $ctx->{used}--;
2068 $self->__error_inside_field($ctx, 2035);
2069 return;
2070 }
2071 if ($c2 eq '0') {
2072 $$v_ref .= "\0";
2073 }
2074 elsif (
2075 (defined $quot and $c2 eq $quot) or
2076 (defined $sep and $c2 eq $sep) or
2077 (defined $esc and $c2 eq $esc) or
2078 $ctx->{allow_loose_escapes}
2079 ) {
2080 if ($ctx->{utf8}) {
2081 $ctx->{flag} |= IS_BINARY;
2082 }
2083 $$v_ref .= $c2;
2084 } else {
2085 $self->__parse_inside_quotes($ctx, 2025);
2086 return;
2087 }
2088 }
2089 }
2090 elsif ($ctx->{flag} & IS_QUOTED) {
2091 my $c2 = $self->__get($ctx);
2092 if (!defined $c2) { # EOF
2093 $ctx->{used}--;
2094 $self->__error_inside_quotes($ctx, 2024);
2095 return;
2096 }
2097 if ($c2 eq '0') {
2098 $$v_ref .= "\0";
2099 }
2100 elsif (
2101 (defined $quot and $c2 eq $quot) or
2102 (defined $sep and $c2 eq $sep) or
2103 (defined $esc and $c2 eq $esc) or
2104 $ctx->{allow_loose_escapes}
2105 ) {
2106 if ($ctx->{utf8}) {
2107 $ctx->{flag} |= IS_BINARY;
2108 }
2109 $$v_ref .= $c2;
2110 } else {
2111 $ctx->{used}--;
2112 $self->__error_inside_quotes($ctx, 2025);
2113 return;
2114 }
2115 }
2116 elsif ($v_ref) {
2117 my $c2 = $self->__get($ctx);
2118 if (!defined $c2) { # EOF
2119 $ctx->{used}--;
2120 $self->__error_inside_field($ctx, 2035);
2121 return;
2122 }
2123 $$v_ref .= $c2;
2124 }
2125 else {
2126 $self->__error_inside_field($ctx, 2036);
2127 return;
2128 }
2129 }
2130 elsif (defined $c and ($c eq "\012" or $c eq '' or (defined $eol and $c eq $eol and $eol ne "\015"))) { # EOL
2131 EOLX:
2132 if ($waitingForField) {
2133 # ,1,"foo, 3",,bar,
2134 # ^
2135 if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2136 $$v_ref = undef;
2137 } else {
2138 $$v_ref = "";
2139 }
2140 unless ($ctx->{is_bound}) {
2141 push @$fields, $$v_ref;
2142 }
2143 if ($ctx->{keep_meta_info} and $fflags) {
2144 push @$fflags, $ctx->{flag};
2145 }
2146 return 1;
2147 }
2148 if ($ctx->{flag} & IS_QUOTED) {
2149 # ,1,"foo\n 3",,bar,
2150 # ^
2151 $ctx->{flag} |= IS_BINARY;
2152 unless ($ctx->{binary}) {
2153 $self->__error_inside_quotes($ctx, 2021);
2154 return;
2155 }
2156 $$v_ref .= $c;
2157 }
2158 elsif ($ctx->{verbatim}) {
2159 # ,1,foo\n 3,,bar,
2160 # This feature should be deprecated
2161 $ctx->{flag} |= IS_BINARY;
2162 unless ($ctx->{binary}) {
2163 $self->__error_inside_field($ctx, 2030);
2164 return;
2165 }
2166 $$v_ref .= $c unless $ctx->{eol} eq $c and $ctx->{useIO};
2167 }
2168 else {
2169 # sep=,
2170 # ^
2171 if (!$ctx->{recno} and $ctx->{fld_idx} == 1 and $ctx->{useIO} and $hit =~ /^sep=(.{1,16})$/i) {
2172 $ctx->{sep} = $1;
2173 use bytes;
2174 my $len = length $ctx->{sep};
2175 if ($len <= 16) {
2176 $ctx->{sep_len} = $len == 1 ? 0 : $len;
2177 return $self->____parse($ctx, $src, $fields, $fflags);
2178 }
2179 }
2180
2181 # ,1,"foo\n 3",,bar
2182 # ^
2183 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2184 return 1;
2185 }
2186 }
2187 elsif (defined $c and $c eq "\015" and !$ctx->{verbatim}) {
2188 if ($waitingForField) {
2189 $waitingForField = 0;
2190 if ($ctx->{eol_is_cr}) {
2191 # ,1,"foo\n 3",,bar,\r
2192 # ^
2193 $c = "\012";
2194 goto RESTART;
2195 }
2196
2197 my $c2 = $self->__get($ctx);
2198 if (!defined $c2) { # EOF
2199 # ,1,"foo\n 3",,bar,\r
2200 # ^
2201 $c = undef;
2202 goto RESTART;
2203 }
2204 if ($c2 eq "\012") { # \r is not optional before EOLX!
2205 # ,1,"foo\n 3",,bar,\r\n
2206 # ^
2207 $c = $c2;
2208 goto RESTART;
2209 }
2210
2211 if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2212 # ,1,"foo\n 3",,bar,\r
2213 # baz,4
2214 # ^
2215 $self->__set_eol_is_cr($ctx);
2216 $ctx->{used}--;
2217 $ctx->{has_ahead} = 1;
2218 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2219 return 1;
2220 }
2221
2222 # ,1,"foo\n 3",,bar,\r\t
2223 # ^
2224 $ctx->{used}--;
2225 $self->__error_inside_field($ctx, 2031);
2226 return;
2227 }
2228 if ($ctx->{flag} & IS_QUOTED) {
2229 # ,1,"foo\r 3",,bar,\r\t
2230 # ^
2231 $ctx->{flag} |= IS_BINARY;
2232 unless ($ctx->{binary}) {
2233 $self->__error_inside_quotes($ctx, 2022);
2234 return;
2235 }
2236 $$v_ref .= $c;
2237 }
2238 else {
2239 if ($ctx->{eol_is_cr}) {
2240 # ,1,"foo\n 3",,bar\r
2241 # ^
2242 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2243 return 1;
2244 }
2245
2246 my $c2 = $self->__get($ctx);
2247 if (defined $c2 and $c2 eq "\012") { # \r is not optional before EOLX!
2248 # ,1,"foo\n 3",,bar\r\n
2249 # ^
2250 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2251 return 1;
2252 }
2253
2254 if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2255 # ,1,"foo\n 3",,bar\r
2256 # baz,4
2257 # ^
2258 $self->__set_eol_is_cr($ctx);
2259 $ctx->{used}--;
2260 $ctx->{has_ahead} = 1;
2261 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2262 return 1;
2263 }
2264
2265 # ,1,"foo\n 3",,bar\r\t
2266 # ^
2267 $self->__error_inside_field($ctx, 2032);
2268 return;
2269 }
2270 }
2271 else {
2272 if ($ctx->{eolx} and $c eq $eol) {
2273 $c = '';
2274 goto EOLX;
2275 }
2276
2277 if ($waitingForField) {
2278 if ($ctx->{allow_whitespace} and $self->__is_whitespace($ctx, $c)) {
2279 do {
2280 $c = $self->__get($ctx);
2281 last if !defined $c;
2282 } while $self->__is_whitespace($ctx, $c);
2283 goto RESTART;
2284 }
2285 $waitingForField = 0;
2286 goto RESTART;
2287 }
2288 if ($ctx->{flag} & IS_QUOTED) {
2289 if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2290 $ctx->{flag} |= IS_BINARY;
2291 unless ($ctx->{binary} or $ctx->{utf8}) {
2292 $self->__error_inside_quotes($ctx, 2026);
2293 return;
2294 }
2295 }
2296 $$v_ref .= $c;
2297 } else {
2298 if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2299 $ctx->{flag} |= IS_BINARY;
2300 unless ($ctx->{binary} or $ctx->{utf8}) {
2301 $self->__error_inside_field($ctx, 2037);
2302 return;
2303 }
2304 }
2305 $$v_ref .= $c;
2306 }
2307 }
2308 last LOOP if $ctx->{useIO} and $ctx->{verbatim} and $ctx->{used} == $ctx->{size};
2309 }
2310 }
2311
2312 if ($waitingForField) {
2313 if ($seenSomething or !$ctx->{useIO}) {
2314 # new field
2315 if (!$v_ref) {
2316 if ($ctx->{is_bound}) {
2317 $v_ref = $self->__bound_field($ctx, $fnum++, 0);
2318 } else {
2319 $value = '';
2320 $v_ref = \$value;
2321 }
2322 return unless $v_ref;
2323 $ctx->{flag} = 0;
2324 $ctx->{fld_idx}++;
2325 }
2326 if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2327 $$v_ref = undef;
2328 } else {
2329 $$v_ref = "";
2330 }
2331 unless ($ctx->{is_bound}) {
2332 push @$fields, $$v_ref;
2333 }
2334 if ($ctx->{keep_meta_info} and $fflags) {
2335 push @$fflags, $ctx->{flag};
2336 }
2337 return 1;
2338 }
2339 $self->SetDiag(2012);
2340 return;
2341 }
2342
2343 if ($ctx->{flag} & IS_QUOTED) {
2344 $self->__error_inside_quotes($ctx, 2027);
2345 return;
2346 }
2347
2348 if ($v_ref) {
2349 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag});
2350 }
2351 return 1;
2352}
2353
2354sub __get_from_src {
2355 my ($self, $ctx, $src) = @_;
2356 return 1 if defined $ctx->{tmp} and $ctx->{used} <= 0;
2357 return 1 if $ctx->{used} < $ctx->{size};
2358 return unless $ctx->{useIO};
2359 my $res = $src->getline;
2360 if (defined $res) {
2361 if ($ctx->{has_ahead}) {
2362 $ctx->{tmp} = $self->{_AHEAD};
2363 $ctx->{tmp} .= $ctx->{eol} if $ctx->{eol_len};
2364 $ctx->{tmp} .= $res;
2365 $ctx->{has_ahead} = 0;
2366 } else {
2367 $ctx->{tmp} = $res;
2368 }
2369 if ($ctx->{size} = length $ctx->{tmp}) {
2370 $ctx->{used} = -1;
2371 $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2372 pos($ctx->{tmp}) = 0;
2373 return 1;
2374 }
2375 } elsif (delete $ctx->{has_leftover}) {
2376 $ctx->{tmp} = $self->{_AHEAD};
2377 $ctx->{has_ahead} = 0;
2378 $ctx->{useIO} |= useIO_EOF;
2379 if ($ctx->{size} = length $ctx->{tmp}) {
2380 $ctx->{used} = -1;
2381 $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2382 pos($ctx->{tmp}) = 0;
2383 return 1;
2384 }
2385 }
2386 $ctx->{tmp} = '' unless defined $ctx->{tmp};
2387 $ctx->{useIO} |= useIO_EOF;
2388 return;
2389}
2390
2391sub __set_eol_is_cr {
2392 my ($self, $ctx) = @_;
2393 $ctx->{eol} = "\015";
2394 $ctx->{eol_is_cr} = 1;
2395 $ctx->{eol_len} = 1;
2396
2397 $self->{eol} = $ctx->{eol};
2398}
2399
2400sub __bound_field {
2401 my ($self, $ctx, $i, $keep) = @_;
2402 if ($i >= $ctx->{is_bound}) {
2403 $self->SetDiag(3006);
2404 return;
2405 }
2406 if (ref $ctx->{bound} eq 'ARRAY') {
2407 my $ref = $ctx->{bound}[$i];
2408 if (ref $ref) {
2409 if ($keep) {
2410 return $ref;
2411 }
2412 unless (Scalar::Util::readonly($$ref)) {
2413 $$ref = "";
2414 return $ref;
2415 }
2416 }
2417 }
2418 $self->SetDiag(3008);
2419 return;
2420}
2421
2422sub __get {
2423 my ($self, $ctx) = @_;
2424 return unless defined $ctx->{used};
2425 return if $ctx->{used} >= $ctx->{size};
2426 my $pos = pos($ctx->{tmp});
2427 if ($ctx->{tmp} =~ /\G($ctx->{_re}|.)/gs) {
2428 my $c = $1;
2429 if ($c =~ /[^\x09\x20-\x7e]/) {
2430 $ctx->{flag} |= IS_BINARY;
2431 }
2432 $ctx->{used} = pos($ctx->{tmp});
2433 return $c;
2434 } else {
2435 pos($ctx->{tmp}) = $pos;
2436 return;
2437 }
2438}
2439
2440sub __error_inside_quotes {
2441 my ($self, $ctx, $error) = @_;
2442 $self->__parse_error($ctx, $error, $ctx->{used} - 1);
2443}
2444
2445sub __error_inside_field {
2446 my ($self, $ctx, $error) = @_;
2447 $self->__parse_error($ctx, $error, $ctx->{used} - 1);
2448}
2449
2450sub __parse_error {
2451 my ($self, $ctx, $error, $pos) = @_;
2452 $self->{_ERROR_POS} = $pos;
2453 $self->{_ERROR_FLD} = $ctx->{fld_idx};
2454 $self->{_ERROR_INPUT} = $ctx->{tmp} if $ctx->{tmp};
2455 $self->SetDiag($error);
2456 return;
2457}
2458
2459sub __is_whitespace {
2460 my ($self, $ctx, $c) = @_;
2461 return unless defined $c;
2462 return (
2463 (!defined $ctx->{sep} or $c ne $ctx->{sep}) &&
2464 (!defined $ctx->{quo} or $c ne $ctx->{quo}) &&
2465 (!defined $ctx->{escape_char} or $c ne $ctx->{escape_char}) &&
2466 ($c eq " " or $c eq "\t")
2467 );
2468}
2469
2470sub __push_value { # AV_PUSH (part of)
2471 my ($self, $ctx, $v_ref, $fields, $fflags, $flag) = @_;
2472 utf8::encode($$v_ref) if $ctx->{utf8};
2473 if (
2474 (!defined $$v_ref or $$v_ref eq '') and
2475 ($ctx->{empty_is_undef} or (!($flag & IS_QUOTED) and $ctx->{blank_is_undef}))
2476 ) {
2477 $$v_ref = undef;
2478 } else {
2479 if ($ctx->{allow_whitespace} && !($flag & IS_QUOTED)) {
2480 $$v_ref =~ s/[ \t]+$//;
2481 }
2482 if ($flag & IS_BINARY and $ctx->{decode_utf8} and ($ctx->{utf8} || _is_valid_utf8($$v_ref))) {
2483 utf8::decode($$v_ref);
2484 }
2485 }
2486 unless ($ctx->{is_bound}) {
2487 push @$fields, $$v_ref;
2488 }
2489 if ($ctx->{keep_meta_info} and $fflags) {
2490 push @$fflags, $flag;
2491 }
2492}
2493
2494sub getline {
2495 my ($self, $io) = @_;
2496
2497 my (@fields, @fflags);
2498 my $res = $self->__parse(\@fields, \@fflags, $io, 1);
2499 $res ? \@fields : undef;
2500}
2501
2502sub getline_all {
2503 my ( $self, $io, $offset, $len ) = @_;
2504
2505 my $ctx = $self->_setup_ctx;
2506
2507 my $tail = 0;
2508 my $n = 0;
2509 $offset ||= 0;
2510
2511 if ( $offset < 0 ) {
2512 $tail = -$offset;
2513 $offset = -1;
2514 }
2515
2516 my (@row, @list);
2517 while ($self->___parse($ctx, \@row, undef, $io, 1)) {
2518 $ctx = $self->_setup_ctx;
2519
2520 if ($offset > 0) {
2521 $offset--;
2522 @row = ();
2523 next;
2524 }
2525 if ($n++ >= $tail and $tail) {
2526 shift @list;
2527 $n--;
2528 }
2529 if (($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
2530 unless ($self->_hook(after_parse => \@row)) {
2531 @row = ();
2532 next;
2533 }
2534 }
2535 push @list, [@row];
2536 @row = ();
2537
2538 last if defined $len && $n >= $len and $offset >= 0; # exceeds limit size
2539 }
2540
2541 if ( defined $len && $n > $len ) {
2542 @list = splice( @list, 0, $len);
2543 }
2544
2545 return \@list;
2546}
2547
2548sub _is_valid_utf8 {
2549 return ( $_[0] =~ /^(?:
2550 [\x00-\x7F]
2551 |[\xC2-\xDF][\x80-\xBF]
2552 |[\xE0][\xA0-\xBF][\x80-\xBF]
2553 |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
2554 |[\xED][\x80-\x9F][\x80-\xBF]
2555 |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
2556 |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
2557 |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
2558 |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
2559 )+$/x ) ? 1 : 0;
2560}
2561
2562################################################################################
2563# methods for errors
2564################################################################################
2565
2566sub _set_error_diag {
2567 my ( $self, $error, $pos ) = @_;
2568
2569 $self->SetDiag($error);
2570
2571 if (defined $pos) {
2572 $_[0]->{_ERROR_POS} = $pos;
2573 }
2574
2575 return;
2576}
2577
2578sub error_input {
2579 my $self = shift;
2580 if ($self and ((Scalar::Util::reftype($self) || '') eq 'HASH' or (ref $self) =~ /^Text::CSV/)) {
2581 return $self->{_ERROR_INPUT};
2582 }
2583 return;
2584}
2585
2586sub _sv_diag {
2587 my ($self, $error) = @_;
2588 bless [$error, $ERRORS->{$error}], 'Text::CSV::ErrorDiag';
2589}
2590
2591sub _set_diag {
2592 my ($self, $ctx, $error) = @_;
2593
2594 $last_error = $self->_sv_diag($error);
2595 $self->{_ERROR_DIAG} = $last_error;
2596 if ($error == 0) {
2597 $self->{_ERROR_POS} = 0;
2598 $self->{_ERROR_FLD} = 0;
2599 $self->{_ERROR_INPUT} = undef;
2600 $ctx->{has_error_input} = 0;
2601 }
2602 if ($error == 2012) { # EOF
2603 $self->{_EOF} = 1;
2604 }
2605 if ($ctx->{auto_diag}) {
2606 $self->error_diag;
2607 }
2608 return $last_error;
2609}
2610
2611sub SetDiag {
2612 my ($self, $error, $errstr) = @_;
2613 my $res;
2614 if (ref $self) {
2615 my $ctx = $self->_setup_ctx;
2616 $res = $self->_set_diag($ctx, $error);
2617
2618 } else {
2619 $res = $self->_sv_diag($error);
2620 }
2621 if (defined $errstr) {
2622 $res->[1] = $errstr;
2623 }
2624 $res;
2625}
2626
2627################################################################################
2628package Text::CSV::ErrorDiag;
2629
2630use strict;
2631use overload (
2632 '""' => \&stringify,
2633 '+' => \&numeric,
2634 '-' => \&numeric,
2635 '*' => \&numeric,
2636 '/' => \&numeric,
2637 fallback => 1,
2638);
2639
2640
2641sub numeric {
2642 my ($left, $right) = @_;
2643 return ref $left ? $left->[0] : $right->[0];
2644}
2645
2646
2647sub stringify {
2648 $_[0]->[1];
2649}
2650################################################################################
26511;
2652__END__
2653
2654=head1 NAME
2655
2656Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
2657
2658
2659=head1 SYNOPSIS
2660
2661 use Text::CSV_PP;
2662
2663 $csv = Text::CSV_PP->new(); # create a new object
2664 # If you want to handle non-ascii char.
2665 $csv = Text::CSV_PP->new({binary => 1});
2666
2667 $status = $csv->combine(@columns); # combine columns into a string
2668 $line = $csv->string(); # get the combined string
2669
2670 $status = $csv->parse($line); # parse a CSV string into fields
2671 @columns = $csv->fields(); # get the parsed fields
2672
2673 $status = $csv->status (); # get the most recent status
2674 $bad_argument = $csv->error_input (); # get the most recent bad argument
2675 $diag = $csv->error_diag (); # if an error occurred, explains WHY
2676
2677 $status = $csv->print ($io, $colref); # Write an array of fields
2678 # immediately to a file $io
2679 $colref = $csv->getline ($io); # Read a line from file $io,
2680 # parse it and return an array
2681 # ref of fields
2682 $csv->column_names (@names); # Set column names for getline_hr ()
2683 $ref = $csv->getline_hr ($io); # getline (), but returns a hashref
2684 $eof = $csv->eof (); # Indicate if last parse or
2685 # getline () hit End Of File
2686
2687 $csv->types(\@t_array); # Set column types
2688
2689=head1 DESCRIPTION
2690
2691Text::CSV_PP is a pure-perl module that provides facilities for the
2692composition and decomposition of comma-separated values. This is
2693(almost) compatible with much faster L<Text::CSV_XS>, and mainly
2694used as its fallback module when you use L<Text::CSV> module without
2695having installed Text::CSV_XS. If you don't have any reason to use
2696this module directly, use Text::CSV for speed boost and portability
2697(or maybe Text::CSV_XS when you write an one-off script and don't need
2698to care about portability).
2699
2700The following caveats are taken from the doc of Text::CSV_XS.
2701
2702=head2 Embedded newlines
2703
2704B<Important Note>: The default behavior is to accept only ASCII characters
2705in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
2706fields can not contain newlines. If your data contains newlines embedded in
2707fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
2708set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
2709parsing options, you will always want to set binary.
2710
2711But you still have the problem that you have to pass a correct line to the
2712L</parse> method, which is more complicated from the usual point of usage:
2713
2714 my $csv = Text::CSV_PP->new ({ binary => 1, eol => $/ });
2715 while (<>) { # WRONG!
2716 $csv->parse ($_);
2717 my @fields = $csv->fields ();
2718 }
2719
2720this will break, as the C<while> might read broken lines: it does not care
2721about the quoting. If you need to support embedded newlines, the way to go
2722is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>,
2723B<and> C<\r\n> by default) and then
2724
2725 my $csv = Text::CSV_PP->new ({ binary => 1 });
2726 open my $io, "<", $file or die "$file: $!";
2727 while (my $row = $csv->getline ($io)) {
2728 my @fields = @$row;
2729 }
2730
2731The old(er) way of using global file handles is still supported
2732
2733 while (my $row = $csv->getline (*ARGV)) { ... }
2734
2735=head2 Unicode
2736
2737Unicode is only tested to work with perl-5.8.2 and up.
2738
2739The simplest way to ensure the correct encoding is used for in- and output
2740is by either setting layers on the filehandles, or setting the L</encoding>
2741argument for L</csv>.
2742
2743 open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
2744or
2745 my $aoa = csv (in => "in.csv", encoding => "UTF-8");
2746
2747 open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
2748or
2749 csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
2750
2751On parsing (both for L</getline> and L</parse>), if the source is marked
2752being UTF8, then all fields that are marked binary will also be marked UTF8.
2753
2754On combining (L</print> and L</combine>): if any of the combining fields
2755was marked UTF8, the resulting string will be marked as UTF8. Note however
2756that all fields I<before> the first field marked UTF8 and contained 8-bit
2757characters that were not upgraded to UTF8, these will be C<bytes> in the
2758resulting string too, possibly causing unexpected errors. If you pass data
2759of different encoding, or you don't know if there is different encoding,
2760force it to be upgraded before you pass them on:
2761
2762 $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
2763
2764For complete control over encoding, please use L<Text::CSV::Encoded>:
2765
2766 use Text::CSV::Encoded;
2767 my $csv = Text::CSV::Encoded->new ({
2768 encoding_in => "iso-8859-1", # the encoding comes into Perl
2769 encoding_out => "cp1252", # the encoding comes out of Perl
2770 });
2771
2772 $csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
2773 # combine () and print () accept *literally* utf8 encoded data
2774 # parse () and getline () return *literally* utf8 encoded data
2775
2776 $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
2777 # combine () and print () accept UTF8 marked data
2778 # parse () and getline () return UTF8 marked data
2779
2780=head1 METHODS
2781
2782This whole section is also taken from Text::CSV_XS.
2783
2784=head2 version ()
2785
2786(Class method) Returns the current module version.
2787
2788=head2 new (\%attr)
2789
2790(Class method) Returns a new instance of Text::CSV_PP. The attributes
2791are described by the (optional) hash ref C<\%attr>.
2792
2793 my $csv = Text::CSV_PP->new ({ attributes ... });
2794
2795The following attributes are available:
2796
2797=head3 eol
2798
2799 my $csv = Text::CSV_PP->new ({ eol => $/ });
2800 $csv->eol (undef);
2801 my $eol = $csv->eol;
2802
2803The end-of-line string to add to rows for L</print> or the record separator
2804for L</getline>.
2805
2806When not passed in a B<parser> instance, the default behavior is to accept
2807C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
2808all. Passing C<undef> or the empty string behave the same.
2809
2810When not passed in a B<generating> instance, records are not terminated at
2811all, so it is probably wise to pass something you expect. A safe choice for
2812C<eol> on output is either C<$/> or C<\r\n>.
2813
2814Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
2815(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
2816Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
2817
2818If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
2819only a Carriage Return without Line Feed, will be L</parse>d correct.
2820
2821=head3 sep_char
2822
2823 my $csv = Text::CSV_PP->new ({ sep_char => ";" });
2824 $csv->sep_char (";");
2825 my $c = $csv->sep_char;
2826
2827The char used to separate fields, by default a comma. (C<,>). Limited to a
2828single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
2829(tilde). When longer sequences are required, use L<C<sep>|/sep>.
2830
2831The separation character can not be equal to the quote character or to the
2832escape character.
2833
2834=head3 sep
2835
2836 my $csv = Text::CSV_PP->new ({ sep => "\N{FULLWIDTH COMMA}" });
2837 $csv->sep (";");
2838 my $sep = $csv->sep;
2839
2840The chars used to separate fields, by default undefined. Limited to 8 bytes.
2841
2842When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it
2843acts as an alias to L<C<sep_char>|/sep_char>.
2844
2845=head3 quote_char
2846
2847 my $csv = Text::CSV_PP->new ({ quote_char => "'" });
2848 $csv->quote_char (undef);
2849 my $c = $csv->quote_char;
2850
2851The character to quote fields containing blanks or binary data, by default
2852the double quote character (C<">). A value of undef suppresses quote chars
2853(for simple cases only). Limited to a single-byte character, usually in the
2854range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
2855required, use L<C<quote>|/quote>.
2856
2857C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
2858
2859=head3 quote
2860
2861 my $csv = Text::CSV_PP->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
2862 $csv->quote ("'");
2863 my $quote = $csv->quote;
2864
2865The chars used to quote fields, by default undefined. Limited to 8 bytes.
2866
2867When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
2868it acts as an alias to L<C<quote_char>|/quote_char>.
2869
2870=head3 escape_char
2871
2872 my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
2873 $csv->escape_char (undef);
2874 my $c = $csv->escape_char;
2875
2876The character to escape certain characters inside quoted fields. This is
2877limited to a single-byte character, usually in the range from C<0x20>
2878(space) to C<0x7E> (tilde).
2879
2880The C<escape_char> defaults to being the double-quote mark (C<">). In other
2881words the same as the default L<C<quote_char>|/quote_char>. This means that
2882doubling the quote mark in a field escapes it:
2883
2884 "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
2885
2886If you change the L<C<quote_char>|/quote_char> without changing the
2887C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
2888If instead you want to escape the L<C<quote_char>|/quote_char> by doubling
2889it you will need to also change the C<escape_char> to be the same as what
2890you have changed the L<C<quote_char>|/quote_char> to.
2891
2892The escape character can not be equal to the separation character.
2893
2894=head3 binary
2895
2896 my $csv = Text::CSV_PP->new ({ binary => 1 });
2897 $csv->binary (0);
2898 my $f = $csv->binary;
2899
2900If this attribute is C<1>, you may use binary characters in quoted fields,
2901including line feeds, carriage returns and C<NULL> bytes. (The latter could
2902be escaped as C<"0>.) By default this feature is off.
2903
2904If a string is marked UTF8, C<binary> will be turned on automatically when
2905binary characters other than C<CR> and C<NL> are encountered. Note that a
2906simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
2907so setting C<< { binary => 1 } >> is still a wise option.
2908
2909=head3 strict
2910
2911 my $csv = Text::CSV_PP->new ({ strict => 1 });
2912 $csv->strict (0);
2913 my $f = $csv->strict;
2914
2915If this attribute is set to C<1>, any row that parses to a different number
2916of fields than the previous row will cause the parser to throw error 2014.
2917
2918=head3 decode_utf8
2919
2920 my $csv = Text::CSV_PP->new ({ decode_utf8 => 1 });
2921 $csv->decode_utf8 (0);
2922 my $f = $csv->decode_utf8;
2923
2924This attributes defaults to TRUE.
2925
2926While I<parsing>, fields that are valid UTF-8, are automatically set to be
2927UTF-8, so that
2928
2929 $csv->parse ("\xC4\xA8\n");
2930
2931results in
2932
2933 PV("\304\250"\0) [UTF8 "\x{128}"]
2934
2935Sometimes it might not be a desired action. To prevent those upgrades, set
2936this attribute to false, and the result will be
2937
2938 PV("\304\250"\0)
2939
2940=head3 auto_diag
2941
2942 my $csv = Text::CSV_PP->new ({ auto_diag => 1 });
2943 $csv->auto_diag (2);
2944 my $l = $csv->auto_diag;
2945
2946Set this attribute to a number between C<1> and C<9> causes L</error_diag>
2947to be automatically called in void context upon errors.
2948
2949In case of error C<2012 - EOF>, this call will be void.
2950
2951If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
2952on errors instead of C<warn>. If set to anything unrecognized, it will be
2953silently ignored.
2954
2955Future extensions to this feature will include more reliable auto-detection
2956of C<autodie> being active in the scope of which the error occurred which
2957will increment the value of C<auto_diag> with C<1> the moment the error is
2958detected.
2959
2960=head3 diag_verbose
2961
2962 my $csv = Text::CSV_PP->new ({ diag_verbose => 1 });
2963 $csv->diag_verbose (2);
2964 my $l = $csv->diag_verbose;
2965
2966Set the verbosity of the output triggered by C<auto_diag>. Currently only
2967adds the current input-record-number (if known) to the diagnostic output
2968with an indication of the position of the error.
2969
2970=head3 blank_is_undef
2971
2972 my $csv = Text::CSV_PP->new ({ blank_is_undef => 1 });
2973 $csv->blank_is_undef (0);
2974 my $f = $csv->blank_is_undef;
2975
2976Under normal circumstances, C<CSV> data makes no distinction between quoted-
2977and unquoted empty fields. These both end up in an empty string field once
2978read, thus
2979
2980 1,"",," ",2
2981
2982is read as
2983
2984 ("1", "", "", " ", "2")
2985
2986When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote>
2987or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the
2988result of an undefined value. To enable this distinction when I<reading>
2989C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
2990fields to be set to C<undef>, causing the above to be parsed as
2991
2992 ("1", "", undef, " ", "2")
2993
2994note that this is specifically important when loading C<CSV> fields into a
2995database that allows C<NULL> values, as the perl equivalent for C<NULL> is
2996C<undef> in L<DBI> land.
2997
2998=head3 empty_is_undef
2999
3000 my $csv = Text::CSV_PP->new ({ empty_is_undef => 1 });
3001 $csv->empty_is_undef (0);
3002 my $f = $csv->empty_is_undef;
3003
3004Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this
3005attribute converts all empty fields to C<undef>, so
3006
3007 1,"",," ",2
3008
3009is read as
3010
3011 (1, undef, undef, " ", 2)
3012
3013Note that this effects only fields that are originally empty, not fields
3014that are empty after stripping allowed whitespace. YMMV.
3015
3016=head3 allow_whitespace
3017
3018 my $csv = Text::CSV_PP->new ({ allow_whitespace => 1 });
3019 $csv->allow_whitespace (0);
3020 my $f = $csv->allow_whitespace;
3021
3022When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
3023surrounding the separation character is removed when parsing. If either
3024C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
3025L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
3026be considered whitespace.
3027
3028Now lines like:
3029
3030 1 , "foo" , bar , 3 , zapp
3031
3032are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
3033
3034Note that B<all> whitespace is stripped from both start and end of each
3035field. That would make it I<more> than a I<feature> to enable parsing bad
3036C<CSV> lines, as
3037
3038 1, 2.0, 3, ape , monkey
3039
3040will now be parsed as
3041
3042 ("1", "2.0", "3", "ape", "monkey")
3043
3044even if the original line was perfectly acceptable C<CSV>.
3045
3046=head3 allow_loose_quotes
3047
3048 my $csv = Text::CSV_PP->new ({ allow_loose_quotes => 1 });
3049 $csv->allow_loose_quotes (0);
3050 my $f = $csv->allow_loose_quotes;
3051
3052By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
3053characters like
3054
3055 1,foo "bar" baz,42
3056
3057would result in parse error 2034. Though it is still bad practice to allow
3058this format, we cannot help the fact that some vendors make their
3059applications spit out lines styled this way.
3060
3061If there is B<really> bad C<CSV> data, like
3062
3063 1,"foo "bar" baz",42
3064
3065or
3066
3067 1,""foo bar baz"",42
3068
3069there is a way to get this data-line parsed and leave the quotes inside the
3070quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
3071B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal
3072to L<C<quote_char>|/quote_char>.
3073
3074=head3 allow_loose_escapes
3075
3076 my $csv = Text::CSV_PP->new ({ allow_loose_escapes => 1 });
3077 $csv->allow_loose_escapes (0);
3078 my $f = $csv->allow_loose_escapes;
3079
3080Parsing fields that have L<C<escape_char>|/escape_char> characters that
3081escape characters that do not need to be escaped, like:
3082
3083 my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3084 $csv->parse (qq{1,"my bar\'s",baz,42});
3085
3086would result in parse error 2025. Though it is bad practice to allow this
3087format, this attribute enables you to treat all escape character sequences
3088equal.
3089
3090=head3 allow_unquoted_escape
3091
3092 my $csv = Text::CSV_PP->new ({ allow_unquoted_escape => 1 });
3093 $csv->allow_unquoted_escape (0);
3094 my $f = $csv->allow_unquoted_escape;
3095
3096A backward compatibility issue where L<C<escape_char>|/escape_char> differs
3097from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char>
3098to be in the first position of a field. If L<C<quote_char>|/quote_char> is
3099equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
3100this would be illegal:
3101
3102 1,\0,2
3103
3104Setting this attribute to C<1> might help to overcome issues with backward
3105compatibility and allow this style.
3106
3107=head3 always_quote
3108
3109 my $csv = Text::CSV_PP->new ({ always_quote => 1 });
3110 $csv->always_quote (0);
3111 my $f = $csv->always_quote;
3112
3113By default the generated fields are quoted only if they I<need> to be. For
3114example, if they contain the separator character. If you set this attribute
3115to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
3116quoted, see L</blank_is_undef>). This makes it quite often easier to handle
3117exported data in external applications.
3118
3119=head3 quote_space
3120
3121 my $csv = Text::CSV_PP->new ({ quote_space => 1 });
3122 $csv->quote_space (0);
3123 my $f = $csv->quote_space;
3124
3125By default, a space in a field would trigger quotation. As no rule exists
3126this to be forced in C<CSV>, nor any for the opposite, the default is true
3127for safety. You can exclude the space from this trigger by setting this
3128attribute to 0.
3129
3130=head3 quote_empty
3131
3132 my $csv = Text::CSV_PP->new ({ quote_empty => 1 });
3133 $csv->quote_empty (0);
3134 my $f = $csv->quote_empty;
3135
3136By default the generated fields are quoted only if they I<need> to be. An
3137empty (defined) field does not need quotation. If you set this attribute to
3138C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
3139quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
3140
3141=head3 quote_binary
3142
3143 my $csv = Text::CSV_PP->new ({ quote_binary => 1 });
3144 $csv->quote_binary (0);
3145 my $f = $csv->quote_binary;
3146
3147By default, all "unsafe" bytes inside a string cause the combined field to
3148be quoted. By setting this attribute to C<0>, you can disable that trigger
3149for bytes >= C<0x7F>.
3150
3151=head3 escape_null or quote_null (deprecated)
3152
3153 my $csv = Text::CSV_PP->new ({ escape_null => 1 });
3154 $csv->escape_null (0);
3155 my $f = $csv->escape_null;
3156
3157By default, a C<NULL> byte in a field would be escaped. This option enables
3158you to treat the C<NULL> byte as a simple binary character in binary mode
3159(the C<< { binary => 1 } >> is set). The default is true. You can prevent
3160C<NULL> escapes by setting this attribute to C<0>.
3161
3162The default when using the C<csv> function is C<false>.
3163
3164=head3 keep_meta_info
3165
3166 my $csv = Text::CSV_PP->new ({ keep_meta_info => 1 });
3167 $csv->keep_meta_info (0);
3168 my $f = $csv->keep_meta_info;
3169
3170By default, the parsing of input records is as simple and fast as possible.
3171However, some parsing information - like quotation of the original field -
3172is lost in that process. Setting this flag to true enables retrieving that
3173information after parsing with the methods L</meta_info>, L</is_quoted>,
3174and L</is_binary> described below. Default is false for performance.
3175
3176If you set this attribute to a value greater than 9, than you can control
3177output quotation style like it was used in the input of the the last parsed
3178record (unless quotation was added because of other reasons).
3179
3180 my $csv = Text::CSV_PP->new ({
3181 binary => 1,
3182 keep_meta_info => 1,
3183 quote_space => 0,
3184 });
3185
3186 my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
3187
3188 $csv->print (*STDOUT, \@row);
3189 # 1,,, , ,f,g,"h""h",help,help
3190 $csv->keep_meta_info (11);
3191 $csv->print (*STDOUT, \@row);
3192 # 1,,"", ," ",f,"g","h""h",help,"help"
3193
3194=head3 verbatim
3195
3196 my $csv = Text::CSV_PP->new ({ verbatim => 1 });
3197 $csv->verbatim (0);
3198 my $f = $csv->verbatim;
3199
3200This is a quite controversial attribute to set, but makes some hard things
3201possible.
3202
3203The rationale behind this attribute is to tell the parser that the normally
3204special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
3205special when this flag is set, and be dealt with as being ordinary binary
3206characters. This will ease working with data with embedded newlines.
3207
3208When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
3209every line.
3210
3211Imagine a file format like
3212
3213 M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
3214
3215where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
3216C<^> (caret). None of the fields is quoted, but embedded binary data is
3217likely to be present. With the specific line ending, this should not be too
3218hard to detect.
3219
3220By default, Text::CSV_PP' parse function is instructed to only know about
3221C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
3222embedded newline as a real C<end-of-line>, so it can scan the next line if
3223binary is true, and the newline is inside a quoted field. With this option,
3224we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
3225a binary character.
3226
3227For L</parse> this means that the parser has no more idea about line ending
3228and L</getline> C<chomp>s line endings on reading.
3229
3230=head3 types
3231
3232A set of column types; the attribute is immediately passed to the L</types>
3233method.
3234
3235=head3 callbacks
3236
3237See the L</Callbacks> section below.
3238
3239=head3 accessors
3240
3241To sum it up,
3242
3243 $csv = Text::CSV_PP->new ();
3244
3245is equivalent to
3246
3247 $csv = Text::CSV_PP->new ({
3248 eol => undef, # \r, \n, or \r\n
3249 sep_char => ',',
3250 sep => undef,
3251 quote_char => '"',
3252 quote => undef,
3253 escape_char => '"',
3254 binary => 0,
3255 decode_utf8 => 1,
3256 auto_diag => 0,
3257 diag_verbose => 0,
3258 blank_is_undef => 0,
3259 empty_is_undef => 0,
3260 allow_whitespace => 0,
3261 allow_loose_quotes => 0,
3262 allow_loose_escapes => 0,
3263 allow_unquoted_escape => 0,
3264 always_quote => 0,
3265 quote_empty => 0,
3266 quote_space => 1,
3267 escape_null => 1,
3268 quote_binary => 1,
3269 keep_meta_info => 0,
3270 verbatim => 0,
3271 types => undef,
3272 callbacks => undef,
3273 });
3274
3275For all of the above mentioned flags, an accessor method is available where
3276you can inquire the current value, or change the value
3277
3278 my $quote = $csv->quote_char;
3279 $csv->binary (1);
3280
3281It is not wise to change these settings halfway through writing C<CSV> data
3282to a stream. If however you want to create a new stream using the available
3283C<CSV> object, there is no harm in changing them.
3284
3285If the L</new> constructor call fails, it returns C<undef>, and makes the
3286fail reason available through the L</error_diag> method.
3287
3288 $csv = Text::CSV_PP->new ({ ecs_char => 1 }) or
3289 die "".Text::CSV_PP->error_diag ();
3290
3291L</error_diag> will return a string like
3292
3293 "INI - Unknown attribute 'ecs_char'"
3294
3295=head2 known_attributes
3296
3297 @attr = Text::CSV_PP->known_attributes;
3298 @attr = Text::CSV_PP::known_attributes;
3299 @attr = $csv->known_attributes;
3300
3301This method will return an ordered list of all the supported attributes as
3302described above. This can be useful for knowing what attributes are valid
3303in classes that use or extend Text::CSV_PP.
3304
3305=head2 print
3306
3307 $status = $csv->print ($io, $colref);
3308
3309Similar to L</combine> + L</string> + L</print>, but much more efficient.
3310It expects an array ref as input (not an array!) and the resulting string
3311is not really created, but immediately written to the C<$io> object,
3312typically an IO handle or any other object that offers a L</print> method.
3313
3314For performance reasons C<print> does not create a result string, so all
3315L</string>, L</status>, L</fields>, and L</error_input> methods will return
3316undefined information after executing this method.
3317
3318If C<$colref> is C<undef> (explicit, not through a variable argument) and
3319L</bind_columns> was used to specify fields to be printed, it is possible
3320to make performance improvements, as otherwise data would have to be copied
3321as arguments to the method call:
3322
3323 $csv->bind_columns (\($foo, $bar));
3324 $status = $csv->print ($fh, undef);
3325
3326=head2 say
3327
3328 $status = $csv->say ($io, $colref);
3329
3330Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
3331
3332=head2 print_hr
3333
3334 $csv->print_hr ($io, $ref);
3335
3336Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
3337provided the column names are set with L</column_names>.
3338
3339It is just a wrapper method with basic parameter checks over
3340
3341 $csv->print ($io, [ map { $ref->{$_} } $csv->column_names ]);
3342
3343=head2 combine
3344
3345 $status = $csv->combine (@fields);
3346
3347This method constructs a C<CSV> record from C<@fields>, returning success
3348or failure. Failure can result from lack of arguments or an argument that
3349contains an invalid character. Upon success, L</string> can be called to
3350retrieve the resultant C<CSV> string. Upon failure, the value returned by
3351L</string> is undefined and L</error_input> could be called to retrieve the
3352invalid argument.
3353
3354=head2 string
3355
3356 $line = $csv->string ();
3357
3358This method returns the input to L</parse> or the resultant C<CSV> string
3359of L</combine>, whichever was called more recently.
3360
3361=head2 getline
3362
3363 $colref = $csv->getline ($io);
3364
3365This is the counterpart to L</print>, as L</parse> is the counterpart to
3366L</combine>: it parses a row from the C<$io> handle using the L</getline>
3367method associated with C<$io> and parses this row into an array ref. This
3368array ref is returned by the function or C<undef> for failure. When C<$io>
3369does not support C<getline>, you are likely to hit errors.
3370
3371When fields are bound with L</bind_columns> the return value is a reference
3372to an empty list.
3373
3374The L</string>, L</fields>, and L</status> methods are meaningless again.
3375
3376=head2 getline_all
3377
3378 $arrayref = $csv->getline_all ($io);
3379 $arrayref = $csv->getline_all ($io, $offset);
3380 $arrayref = $csv->getline_all ($io, $offset, $length);
3381
3382This will return a reference to a list of L<getline ($io)|/getline> results.
3383In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
3384with C<splice>, only the last C<abs ($offset)> records of C<$io> are taken
3385into consideration.
3386
3387Given a CSV file with 10 lines:
3388
3389 lines call
3390 ----- ---------------------------------------------------------
3391 0..9 $csv->getline_all ($io) # all
3392 0..9 $csv->getline_all ($io, 0) # all
3393 8..9 $csv->getline_all ($io, 8) # start at 8
3394 - $csv->getline_all ($io, 0, 0) # start at 0 first 0 rows
3395 0..4 $csv->getline_all ($io, 0, 5) # start at 0 first 5 rows
3396 4..5 $csv->getline_all ($io, 4, 2) # start at 4 first 2 rows
3397 8..9 $csv->getline_all ($io, -2) # last 2 rows
3398 6..7 $csv->getline_all ($io, -4, 2) # first 2 of last 4 rows
3399
3400=head2 getline_hr
3401
3402The L</getline_hr> and L</column_names> methods work together to allow you
3403to have rows returned as hashrefs. You must call L</column_names> first to
3404declare your column names.
3405
3406 $csv->column_names (qw( code name price description ));
3407 $hr = $csv->getline_hr ($io);
3408 print "Price for $hr->{name} is $hr->{price} EUR\n";
3409
3410L</getline_hr> will croak if called before L</column_names>.
3411
3412Note that L</getline_hr> creates a hashref for every row and will be much
3413slower than the combined use of L</bind_columns> and L</getline> but still
3414offering the same ease of use hashref inside the loop:
3415
3416 my @cols = @{$csv->getline ($io)};
3417 $csv->column_names (@cols);
3418 while (my $row = $csv->getline_hr ($io)) {
3419 print $row->{price};
3420 }
3421
3422Could easily be rewritten to the much faster:
3423
3424 my @cols = @{$csv->getline ($io)};
3425 my $row = {};
3426 $csv->bind_columns (\@{$row}{@cols});
3427 while ($csv->getline ($io)) {
3428 print $row->{price};
3429 }
3430
3431Your mileage may vary for the size of the data and the number of rows.
3432
3433=head2 getline_hr_all
3434
3435 $arrayref = $csv->getline_hr_all ($io);
3436 $arrayref = $csv->getline_hr_all ($io, $offset);
3437 $arrayref = $csv->getline_hr_all ($io, $offset, $length);
3438
3439This will return a reference to a list of L<getline_hr ($io)|/getline_hr>
3440results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
3441
3442=head2 parse
3443
3444 $status = $csv->parse ($line);
3445
3446This method decomposes a C<CSV> string into fields, returning success or
3447failure. Failure can result from a lack of argument or the given C<CSV>
3448string is improperly formatted. Upon success, L</fields> can be called to
3449retrieve the decomposed fields. Upon failure calling L</fields> will return
3450undefined data and L</error_input> can be called to retrieve the invalid
3451argument.
3452
3453You may use the L</types> method for setting column types. See L</types>'
3454description below.
3455
3456The C<$line> argument is supposed to be a simple scalar. Everything else is
3457supposed to croak and set error 1500.
3458
3459=head2 fragment
3460
3461This function tries to implement RFC7111 (URI Fragment Identifiers for the
3462text/csv Media Type) - http://tools.ietf.org/html/rfc7111
3463
3464 my $AoA = $csv->fragment ($io, $spec);
3465
3466In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
3467to indicate a range. All indices are C<1>-based: the first row or column
3468has index C<1>. Selections can be combined with the semi-colon (C<;>).
3469
3470When using this method in combination with L</column_names>, the returned
3471reference will point to a list of hashes instead of a list of lists. A
3472disjointed cell-based combined selection might return rows with different
3473number of columns making the use of hashes unpredictable.
3474
3475 $csv->column_names ("Name", "Age");
3476 my $AoH = $csv->fragment ($io, "col=3;8");
3477
3478If the L</after_parse> callback is active, it is also called on every line
3479parsed and skipped before the fragment.
3480
3481=over 2
3482
3483=item row
3484
3485 row=4
3486 row=5-7
3487 row=6-*
3488 row=1-2;4;6-*
3489
3490=item col
3491
3492 col=2
3493 col=1-3
3494 col=4-*
3495 col=1-2;4;7-*
3496
3497=item cell
3498
3499In cell-based selection, the comma (C<,>) is used to pair row and column
3500
3501 cell=4,1
3502
3503The range operator (C<->) using C<cell>s can be used to define top-left and
3504bottom-right C<cell> location
3505
3506 cell=3,1-4,6
3507
3508The C<*> is only allowed in the second part of a pair
3509
3510 cell=3,2-*,2 # row 3 till end, only column 2
3511 cell=3,2-3,* # column 2 till end, only row 3
3512 cell=3,2-*,* # strip row 1 and 2, and column 1
3513
3514Cells and cell ranges may be combined with C<;>, possibly resulting in rows
3515with different number of columns
3516
3517 cell=1,1-2,2;3,3-4,4;1,4;4,1
3518
3519Disjointed selections will only return selected cells. The cells that are
3520not specified will not be included in the returned set, not even as
3521C<undef>. As an example given a C<CSV> like
3522
3523 11,12,13,...19
3524 21,22,...28,29
3525 : :
3526 91,...97,98,99
3527
3528with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
3529
3530 11,12,14
3531 21,22
3532 33,34
3533 41,43,44
3534
3535Overlapping cell-specs will return those cells only once, So
3536C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
3537
3538 11,12,13
3539 21,22,23,24
3540 31,32,33,34
3541 42,43,44
3542
3543=back
3544
3545L<RFC7111|http://tools.ietf.org/html/rfc7111> does B<not> allow different
3546types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
3547Passing an invalid fragment specification will croak and set error 2013.
3548
3549=head2 column_names
3550
3551Set the "keys" that will be used in the L</getline_hr> calls. If no keys
3552(column names) are passed, it will return the current setting as a list.
3553
3554L</column_names> accepts a list of scalars (the column names) or a single
3555array_ref, so you can pass the return value from L</getline> too:
3556
3557 $csv->column_names ($csv->getline ($io));
3558
3559L</column_names> does B<no> checking on duplicates at all, which might lead
3560to unexpected results. Undefined entries will be replaced with the string
3561C<"\cAUNDEF\cA">, so
3562
3563 $csv->column_names (undef, "", "name", "name");
3564 $hr = $csv->getline_hr ($io);
3565
3566Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
3567the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
3568field.
3569
3570L</column_names> croaks on invalid arguments.
3571
3572=head2 header
3573
3574This method does NOT work in perl-5.6.x
3575
3576Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
3577
3578 my @hdr = $csv->header ($fh);
3579 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
3580 $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
3581
3582The first argument should be a file handle.
3583
3584Assuming that the file opened for parsing has a header, and the header does
3585not contain problematic characters like embedded newlines, read the first
3586line from the open handle then auto-detect whether the header separates the
3587column names with a character from the allowed separator list.
3588
3589If any of the allowed separators matches, and none of the I<other> allowed
3590separators match, set L<C<sep>|/sep> to that separator for the current
3591CSV_PP instance and use it to parse the first line, map those to lowercase,
3592and use that to set the instance L</column_names>:
3593
3594 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
3595 open my $fh, "<", "file.csv";
3596 binmode $fh; # for Windows
3597 $csv->header ($fh);
3598 while (my $row = $csv->getline_hr ($fh)) {
3599 ...
3600 }
3601
3602If the header is empty, contains more than one unique separator out of the
3603allowed set, contains empty fields, or contains identical fields (after
3604folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
3605
3606If the header contains embedded newlines or is not valid CSV in any other
3607way, this method will croak and leave the parse error untouched.
3608
3609A successful call to C<header> will always set the L<C<sep>|/sep> of the
3610C<$csv> object. This behavior can not be disabled.
3611
3612=head3 return value
3613
3614On error this method will croak.
3615
3616In list context, the headers will be returned whether they are used to set
3617L</column_names> or not.
3618
3619In scalar context, the instance itself is returned. B<Note>: the values as
3620found in the header will effectively be B<lost> if C<set_column_names> is
3621false.
3622
3623=head3 Options
3624
3625=over 2
3626
3627=item sep_set
3628
3629 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
3630
3631The list of legal separators defaults to C<[ ";", "," ]> and can be changed
3632by this option. As this is probably the most often used option, it can be
3633passed on its own as an unnamed argument:
3634
3635 $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
3636
3637Multi-byte sequences are allowed, both multi-character and Unicode. See
3638L<C<sep>|/sep>.
3639
3640=item detect_bom
3641
3642 $csv->header ($fh, { detect_bom => 1 });
3643
3644The default behavior is to detect if the header line starts with a BOM. If
3645the header has a BOM, use that to set the encoding of C<$fh>. This default
3646behavior can be disabled by passing a false value to C<detect_bom>.
3647
3648Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
3649UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
3650but L<Encode> does not (yet). UTF-7 is not supported.
3651
3652The encoding is set using C<binmode> on C<$fh>.
3653
3654If the handle was opened in a (correct) encoding, this method will B<not>
3655alter the encoding, as it checks the leading B<bytes> of the first line.
3656
3657=item munge_column_names
3658
3659This option offers the means to modify the column names into something that
3660is most useful to the application. The default is to map all column names
3661to lower case.
3662
3663 $csv->header ($fh, { munge_column_names => "lc" });
3664
3665The following values are available:
3666
3667 lc - lower case
3668 uc - upper case
3669 none - do not change
3670 \&cb - supply a callback
3671
3672 $csv->header ($fh, { munge_column_names => sub { fc } });
3673 $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
3674 $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
3675
3676As this callback is called in a C<map>, you can use C<$_> directly.
3677
3678=item set_column_names
3679
3680 $csv->header ($fh, { set_column_names => 1 });
3681
3682The default is to set the instances column names using L</column_names> if
3683the method is successful, so subsequent calls to L</getline_hr> can return
3684a hash. Disable setting the header can be forced by using a false value for
3685this option.
3686
3687=back
3688
3689=head3 Validation
3690
3691When receiving CSV files from external sources, this method can be used to
3692protect against changes in the layout by restricting to known headers (and
3693typos in the header fields).
3694
3695 my %known = (
3696 "record key" => "c_rec",
3697 "rec id" => "c_rec",
3698 "id_rec" => "c_rec",
3699 "kode" => "code",
3700 "code" => "code",
3701 "vaule" => "value",
3702 "value" => "value",
3703 );
3704 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
3705 open my $fh, "<", $source or die "$source: $!";
3706 $csv->header ($fh, { munge_column_names => sub {
3707 s/\s+$//;
3708 s/^\s+//;
3709 $known{lc $_} or die "Unknown column '$_' in $source";
3710 }});
3711 while (my $row = $csv->getline_hr ($fh)) {
3712 say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
3713 }
3714
3715=head2 bind_columns
3716
3717Takes a list of scalar references to be used for output with L</print> or
3718to store in the fields fetched by L</getline>. When you do not pass enough
3719references to store the fetched fields in, L</getline> will fail with error
3720C<3006>. If you pass more than there are fields to return, the content of
3721the remaining references is left untouched.
3722
3723 $csv->bind_columns (\$code, \$name, \$price, \$description);
3724 while ($csv->getline ($io)) {
3725 print "The price of a $name is \x{20ac} $price\n";
3726 }
3727
3728To reset or clear all column binding, call L</bind_columns> with the single
3729argument C<undef>. This will also clear column names.
3730
3731 $csv->bind_columns (undef);
3732
3733If no arguments are passed at all, L</bind_columns> will return the list of
3734current bindings or C<undef> if no binds are active.
3735
3736Note that in parsing with C<bind_columns>, the fields are set on the fly.
3737That implies that if the third field of a row causes an error, the first
3738two fields already have been assigned the values of the current row, while
3739the rest of the fields will still hold the values of the previous row.
3740If you want the parser to fail in these cases, use the L<C<strict>|/strict> attribute.
3741
3742=head2 eof
3743
3744 $eof = $csv->eof ();
3745
3746If L</parse> or L</getline> was used with an IO stream, this method will
3747return true (1) if the last call hit end of file, otherwise it will return
3748false (''). This is useful to see the difference between a failure and end
3749of file.
3750
3751Note that if the parsing of the last line caused an error, C<eof> is still
3752true. That means that if you are I<not> using L</auto_diag>, an idiom like
3753
3754 while (my $row = $csv->getline ($fh)) {
3755 # ...
3756 }
3757 $csv->eof or $csv->error_diag;
3758
3759will I<not> report the error. You would have to change that to
3760
3761 while (my $row = $csv->getline ($fh)) {
3762 # ...
3763 }
3764 +$csv->error_diag and $csv->error_diag;
3765
3766=head2 types
3767
3768 $csv->types (\@tref);
3769
3770This method is used to force that (all) columns are of a given type. For
3771example, if you have an integer column, two columns with doubles and a
3772string column, then you might do a
3773
3774 $csv->types ([Text::CSV_PP::IV (),
3775 Text::CSV_PP::NV (),
3776 Text::CSV_PP::NV (),
3777 Text::CSV_PP::PV ()]);
3778
3779Column types are used only for I<decoding> columns while parsing, in other
3780words by the L</parse> and L</getline> methods.
3781
3782You can unset column types by doing a
3783
3784 $csv->types (undef);
3785
3786or fetch the current type settings with
3787
3788 $types = $csv->types ();
3789
3790=over 4
3791
3792=item IV
3793
3794Set field type to integer.
3795
3796=item NV
3797
3798Set field type to numeric/float.
3799
3800=item PV
3801
3802Set field type to string.
3803
3804=back
3805
3806=head2 fields
3807
3808 @columns = $csv->fields ();
3809
3810This method returns the input to L</combine> or the resultant decomposed
3811fields of a successful L</parse>, whichever was called more recently.
3812
3813Note that the return value is undefined after using L</getline>, which does
3814not fill the data structures returned by L</parse>.
3815
3816=head2 meta_info
3817
3818 @flags = $csv->meta_info ();
3819
3820This method returns the "flags" of the input to L</combine> or the flags of
3821the resultant decomposed fields of L</parse>, whichever was called more
3822recently.
3823
3824For each field, a meta_info field will hold flags that inform something
3825about the field returned by the L</fields> method or passed to the
3826L</combine> method. The flags are bit-wise-C<or>'d like:
3827
3828=over 2
3829
3830=item C< >0x0001
3831
3832The field was quoted.
3833
3834=item C< >0x0002
3835
3836The field was binary.
3837
3838=back
3839
3840See the C<is_***> methods below.
3841
3842=head2 is_quoted
3843
3844 my $quoted = $csv->is_quoted ($column_idx);
3845
3846Where C<$column_idx> is the (zero-based) index of the column in the last
3847result of L</parse>.
3848
3849This returns a true value if the data in the indicated column was enclosed
3850in L<C<quote_char>|/quote_char> quotes. This might be important for fields
3851where content C<,20070108,> is to be treated as a numeric value, and where
3852C<,"20070108",> is explicitly marked as character string data.
3853
3854This method is only valid when L</keep_meta_info> is set to a true value.
3855
3856=head2 is_binary
3857
3858 my $binary = $csv->is_binary ($column_idx);
3859
3860Where C<$column_idx> is the (zero-based) index of the column in the last
3861result of L</parse>.
3862
3863This returns a true value if the data in the indicated column contained any
3864byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
3865
3866This method is only valid when L</keep_meta_info> is set to a true value.
3867
3868=head2 is_missing
3869
3870 my $missing = $csv->is_missing ($column_idx);
3871
3872Where C<$column_idx> is the (zero-based) index of the column in the last
3873result of L</getline_hr>.
3874
3875 $csv->keep_meta_info (1);
3876 while (my $hr = $csv->getline_hr ($fh)) {
3877 $csv->is_missing (0) and next; # This was an empty line
3878 }
3879
3880When using L</getline_hr>, it is impossible to tell if the parsed fields
3881are C<undef> because they where not filled in the C<CSV> stream or because
3882they were not read at all, as B<all> the fields defined by L</column_names>
3883are set in the hash-ref. If you still need to know if all fields in each
3884row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
3885you can check the flags.
3886
3887If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will
3888always return C<undef>, regardless of C<$column_idx> being valid or not. If
3889this attribute is C<true> it will return either C<0> (the field is present)
3890or C<1> (the field is missing).
3891
3892A special case is the empty line. If the line is completely empty - after
3893dealing with the flags - this is still a valid CSV line: it is a record of
3894just one single empty field. However, if C<keep_meta_info> is set, invoking
3895C<is_missing> with index C<0> will now return true.
3896
3897=head2 status
3898
3899 $status = $csv->status ();
3900
3901This method returns the status of the last invoked L</combine> or L</parse>
3902call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
3903
3904=head2 error_input
3905
3906 $bad_argument = $csv->error_input ();
3907
3908This method returns the erroneous argument (if it exists) of L</combine> or
3909L</parse>, whichever was called more recently. If the last invocation was
3910successful, C<error_input> will return C<undef>.
3911
3912=head2 error_diag
3913
3914 Text::CSV_PP->error_diag ();
3915 $csv->error_diag ();
3916 $error_code = 0 + $csv->error_diag ();
3917 $error_str = "" . $csv->error_diag ();
3918 ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
3919
3920If (and only if) an error occurred, this function returns the diagnostics
3921of that error.
3922
3923If called in void context, this will print the internal error code and the
3924associated error message to STDERR.
3925
3926If called in list context, this will return the error code and the error
3927message in that order. If the last error was from parsing, the rest of the
3928values returned are a best guess at the location within the line that was
3929being parsed. Their values are 1-based. The position currently is index of
3930the byte at which the parsing failed in the current record. It might change
3931to be the index of the current character in a later release. The records is
3932the index of the record parsed by the csv instance. The field number is the
3933index of the field the parser thinks it is currently trying to parse. See
3934F<examples/csv-check> for how this can be used.
3935
3936If called in scalar context, it will return the diagnostics in a single
3937scalar, a-la C<$!>. It will contain the error code in numeric context, and
3938the diagnostics message in string context.
3939
3940When called as a class method or a direct function call, the diagnostics
3941are that of the last L</new> call.
3942
3943=head2 record_number
3944
3945 $recno = $csv->record_number ();
3946
3947Returns the records parsed by this csv instance. This value should be more
3948accurate than C<$.> when embedded newlines come in play. Records written by
3949this instance are not counted.
3950
3951=head2 SetDiag
3952
3953 $csv->SetDiag (0);
3954
3955Use to reset the diagnostics if you are dealing with errors.
3956
3957=head1 FUNCTIONS
3958
3959This whole section is also taken from Text::CSV_XS.
3960
3961=head2 csv
3962
3963This function is not exported by default and should be explicitly requested:
3964
3965 use Text::CSV_PP qw( csv );
3966
3967This is an high-level function that aims at simple (user) interfaces. This
3968can be used to read/parse a C<CSV> file or stream (the default behavior) or
3969to produce a file or write to a stream (define the C<out> attribute). It
3970returns an array- or hash-reference on parsing (or C<undef> on fail) or the
3971numeric value of L</error_diag> on writing. When this function fails you
3972can get to the error using the class call to L</error_diag>
3973
3974 my $aoa = csv (in => "test.csv") or
3975 die Text::CSV_PP->error_diag;
3976
3977This function takes the arguments as key-value pairs. This can be passed as
3978a list or as an anonymous hash:
3979
3980 my $aoa = csv ( in => "test.csv", sep_char => ";");
3981 my $aoh = csv ({ in => $fh, headers => "auto" });
3982
3983The arguments passed consist of two parts: the arguments to L</csv> itself
3984and the optional attributes to the C<CSV> object used inside the function
3985as enumerated and explained in L</new>.
3986
3987If not overridden, the default option used for CSV is
3988
3989 auto_diag => 1
3990 escape_null => 0
3991
3992The option that is always set and cannot be altered is
3993
3994 binary => 1
3995
3996As this function will likely be used in one-liners, it allows C<quote> to
3997be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
3998or C<escape>.
3999
4000Alternative invocations:
4001
4002 my $aoa = Text::CSV_PP::csv (in => "file.csv");
4003
4004 my $csv = Text::CSV_PP->new ();
4005 my $aoa = $csv->csv (in => "file.csv");
4006
4007In the latter case, the object attributes are used from the existing object
4008and the attribute arguments in the function call are ignored:
4009
4010 my $csv = Text::CSV_PP->new ({ sep_char => ";" });
4011 my $aoh = $csv->csv (in => "file.csv", bom => 1);
4012
4013will parse using C<;> as C<sep_char>, not C<,>.
4014
4015=head3 in
4016
4017Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
4018which will be opened for reading and closed when finished, a file handle
4019(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
4020itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
4021
4022When used with L</out>, C<in> should be a reference to a CSV structure (AoA
4023or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
4024The code-ref will be invoked with no arguments.
4025
4026 my $aoa = csv (in => "file.csv");
4027
4028 open my $fh, "<", "file.csv";
4029 my $aoa = csv (in => $fh);
4030
4031 my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
4032 my $err = csv (in => $csv, out => "file.csv");
4033
4034If called in void context without the L</out> attribute, the resulting ref
4035will be used as input to a subsequent call to csv:
4036
4037 csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
4038
4039will be a shortcut to
4040
4041 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
4042
4043where, in the absence of the C<out> attribute, this is a shortcut to
4044
4045 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
4046 out => *STDOUT)
4047
4048=head3 out
4049
4050In output mode, the default CSV options when producing CSV are
4051
4052 eol => "\r\n"
4053
4054The L</fragment> attribute is ignored in output mode.
4055
4056C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
4057writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
4058reference to a glob (e.g. C<\*STDOUT>), or the glob itself (e.g. C<*STDOUT>).
4059
4060 csv (in => sub { $sth->fetch }, out => "dump.csv");
4061 csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
4062 headers => $sth->{NAME_lc});
4063
4064When a code-ref is used for C<in>, the output is generated per invocation,
4065so no buffering is involved. This implies that there is no size restriction
4066on the number of records. The C<csv> function ends when the coderef returns
4067a false value.
4068
4069=head3 encoding
4070
4071If passed, it should be an encoding accepted by the C<:encoding()> option
4072to C<open>. There is no default value. This attribute does not work in perl
40735.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
4074line invocations.
4075
4076If C<encoding> is set to the literal value C<"auto">, the method L</header>
4077will be invoked on the opened stream to check if there is a BOM and set the
4078encoding accordingly. This is equal to passing a true value in the option
4079L<C<detect_bom>|/detect_bom>.
4080
4081=head3 detect_bom
4082
4083If C<detect_bom> is given, the method L</header> will be invoked on the
4084opened stream to check if there is a BOM and set the encoding accordingly.
4085
4086C<detect_bom> can be abbreviated to C<bom>.
4087
4088This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
4089
4090Note that as L</header> is invoked, its default is to also set the headers.
4091
4092=head3 headers
4093
4094If this attribute is not given, the default behavior is to produce an array
4095of arrays.
4096
4097If C<headers> is supplied, it should be an anonymous list of column names,
4098an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
4099or C<skip>.
4100
4101=over 2
4102
4103=item skip
4104
4105When C<skip> is used, the header will not be included in the output.
4106
4107 my $aoa = csv (in => $fh, headers => "skip");
4108
4109=item auto
4110
4111If C<auto> is used, the first line of the C<CSV> source will be read as the
4112list of field headers and used to produce an array of hashes.
4113
4114 my $aoh = csv (in => $fh, headers => "auto");
4115
4116=item lc
4117
4118If C<lc> is used, the first line of the C<CSV> source will be read as the
4119list of field headers mapped to lower case and used to produce an array of
4120hashes. This is a variation of C<auto>.
4121
4122 my $aoh = csv (in => $fh, headers => "lc");
4123
4124=item uc
4125
4126If C<uc> is used, the first line of the C<CSV> source will be read as the
4127list of field headers mapped to upper case and used to produce an array of
4128hashes. This is a variation of C<auto>.
4129
4130 my $aoh = csv (in => $fh, headers => "uc");
4131
4132=item CODE
4133
4134If a coderef is used, the first line of the C<CSV> source will be read as
4135the list of mangled field headers in which each field is passed as the only
4136argument to the coderef. This list is used to produce an array of hashes.
4137
4138 my $aoh = csv (in => $fh,
4139 headers => sub { lc ($_[0]) =~ s/kode/code/gr });
4140
4141this example is a variation of using C<lc> where all occurrences of C<kode>
4142are replaced with C<code>.
4143
4144=item ARRAY
4145
4146If C<headers> is an anonymous list, the entries in the list will be used
4147as field names. The first line is considered data instead of headers.
4148
4149 my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
4150 csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
4151
4152=item HASH
4153
4154If C<headers> is an hash reference, this implies C<auto>, but header fields
4155for that exist as key in the hashref will be replaced by the value for that
4156key. Given a CSV file like
4157
4158 post-kode,city,name,id number,fubble
4159 1234AA,Duckstad,Donald,13,"X313DF"
4160
4161using
4162
4163 csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
4164
4165will return an entry like
4166
4167 { pc => "1234AA",
4168 city => "Duckstad",
4169 name => "Donald",
4170 ID => "13",
4171 fubble => "X313DF",
4172 }
4173
4174=back
4175
4176See also L<C<munge_column_names>|/munge_column_names> and
4177L<C<set_column_names>|/set_column_names>.
4178
4179=head3 munge_column_names
4180
4181If C<munge_column_names> is set, the method L</header> is invoked on the
4182opened stream with all matching arguments to detect and set the headers.
4183
4184C<munge_column_names> can be abbreviated to C<munge>.
4185
4186=head3 key
4187
4188If passed, will default L<C<headers>|/headers> to C<"auto"> and return a
4189hashref instead of an array of hashes.
4190
4191 my $ref = csv (in => "test.csv", key => "code");
4192
4193with test.csv like
4194
4195 code,product,price,color
4196 1,pc,850,gray
4197 2,keyboard,12,white
4198 3,mouse,5,black
4199
4200will return
4201
4202 { 1 => {
4203 code => 1,
4204 color => 'gray',
4205 price => 850,
4206 product => 'pc'
4207 },
4208 2 => {
4209 code => 2,
4210 color => 'white',
4211 price => 12,
4212 product => 'keyboard'
4213 },
4214 3 => {
4215 code => 3,
4216 color => 'black',
4217 price => 5,
4218 product => 'mouse'
4219 }
4220 }
4221
4222=head3 fragment
4223
4224Only output the fragment as defined in the L</fragment> method. This option
4225is ignored when I<generating> C<CSV>. See L</out>.
4226
4227Combining all of them could give something like
4228
4229 use Text::CSV_PP qw( csv );
4230 my $aoh = csv (
4231 in => "test.txt",
4232 encoding => "utf-8",
4233 headers => "auto",
4234 sep_char => "|",
4235 fragment => "row=3;6-9;15-*",
4236 );
4237 say $aoh->[15]{Foo};
4238
4239=head3 sep_set
4240
4241If C<sep_set> is set, the method L</header> is invoked on the opened stream
4242to detect and set L<C<sep_char>|/sep_char> with the given set.
4243
4244C<sep_set> can be abbreviated to C<seps>.
4245
4246Note that as L</header> is invoked, its default is to also set the headers.
4247
4248=head3 set_column_names
4249
4250If C<set_column_names> is passed, the method L</header> is invoked on the
4251opened stream with all arguments meant for L</header>.
4252
4253=head2 Callbacks
4254
4255Callbacks enable actions triggered from the I<inside> of Text::CSV_PP.
4256
4257While most of what this enables can easily be done in an unrolled loop as
4258described in the L</SYNOPSIS> callbacks can be used to meet special demands
4259or enhance the L</csv> function.
4260
4261=over 2
4262
4263=item error
4264
4265 $csv->callbacks (error => sub { $csv->SetDiag (0) });
4266
4267the C<error> callback is invoked when an error occurs, but I<only> when
4268L</auto_diag> is set to a true value. A callback is invoked with the values
4269returned by L</error_diag>:
4270
4271 my ($c, $s);
4272
4273 sub ignore3006
4274 {
4275 my ($err, $msg, $pos, $recno, $fldno) = @_;
4276 if ($err == 3006) {
4277 # ignore this error
4278 ($c, $s) = (undef, undef);
4279 Text::CSV_PP->SetDiag (0);
4280 }
4281 # Any other error
4282 return;
4283 } # ignore3006
4284
4285 $csv->callbacks (error => \&ignore3006);
4286 $csv->bind_columns (\$c, \$s);
4287 while ($csv->getline ($fh)) {
4288 # Error 3006 will not stop the loop
4289 }
4290
4291=item after_parse
4292
4293 $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
4294 while (my $row = $csv->getline ($fh)) {
4295 $row->[-1] eq "NEW";
4296 }
4297
4298This callback is invoked after parsing with L</getline> only if no error
4299occurred. The callback is invoked with two arguments: the current C<CSV>
4300parser object and an array reference to the fields parsed.
4301
4302The return code of the callback is ignored unless it is a reference to the
4303string "skip", in which case the record will be skipped in L</getline_all>.
4304
4305 sub add_from_db
4306 {
4307 my ($csv, $row) = @_;
4308 $sth->execute ($row->[4]);
4309 push @$row, $sth->fetchrow_array;
4310 } # add_from_db
4311
4312 my $aoa = csv (in => "file.csv", callbacks => {
4313 after_parse => \&add_from_db });
4314
4315This hook can be used for validation:
4316
4317=over 2
4318
4319=item FAIL
4320
4321Die if any of the records does not validate a rule:
4322
4323 after_parse => sub {
4324 $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
4325 die "5th field does not have a valid Dutch zipcode";
4326 }
4327
4328=item DEFAULT
4329
4330Replace invalid fields with a default value:
4331
4332 after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
4333
4334=item SKIP
4335
4336Skip records that have invalid fields (only applies to L</getline_all>):
4337
4338 after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
4339
4340=back
4341
4342=item before_print
4343
4344 my $idx = 1;
4345 $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
4346 $csv->print (*STDOUT, [ 0, $_ ]) for @members;
4347
4348This callback is invoked before printing with L</print> only if no error
4349occurred. The callback is invoked with two arguments: the current C<CSV>
4350parser object and an array reference to the fields passed.
4351
4352The return code of the callback is ignored.
4353
4354 sub max_4_fields
4355 {
4356 my ($csv, $row) = @_;
4357 @$row > 4 and splice @$row, 4;
4358 } # max_4_fields
4359
4360 csv (in => csv (in => "file.csv"), out => *STDOUT,
4361 callbacks => { before print => \&max_4_fields });
4362
4363This callback is not active for L</combine>.
4364
4365=back
4366
4367=head3 Callbacks for csv ()
4368
4369The L</csv> allows for some callbacks that do not integrate in XS internals
4370but only feature the L</csv> function.
4371
4372 csv (in => "file.csv",
4373 callbacks => {
4374 filter => { 6 => sub { $_ > 15 } }, # first
4375 after_parse => sub { say "AFTER PARSE"; }, # first
4376 after_in => sub { say "AFTER IN"; }, # second
4377 on_in => sub { say "ON IN"; }, # third
4378 },
4379 );
4380
4381 csv (in => $aoh,
4382 out => "file.csv",
4383 callbacks => {
4384 on_in => sub { say "ON IN"; }, # first
4385 before_out => sub { say "BEFORE OUT"; }, # second
4386 before_print => sub { say "BEFORE PRINT"; }, # third
4387 },
4388 );
4389
4390=over 2
4391
4392=item filter
4393
4394This callback can be used to filter records. It is called just after a new
4395record has been scanned. The callback accepts a hashref where the keys are
4396the index to the row (the field number, 1-based) and the values are subs to
4397return a true or false value.
4398
4399 csv (in => "file.csv", filter => {
4400 3 => sub { m/a/ }, # third field should contain an "a"
4401 5 => sub { length > 4 }, # length of the 5th field minimal 5
4402 });
4403
4404 csv (in => "file.csv", filter => "not_blank");
4405 csv (in => "file.csv", filter => "not_empty");
4406 csv (in => "file.csv", filter => "filled");
4407
4408If the keys to the filter hash contain any character that is not a digit it
4409will also implicitly set L</headers> to C<"auto"> unless L</headers> was
4410already passed as argument. When headers are active, returning an array of
4411hashes, the filter is not applicable to the header itself.
4412
4413 csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
4414
4415All sub results should match, as in AND.
4416
4417The context of the callback sets C<$_> localized to the field indicated by
4418the filter. The two arguments are as with all other callbacks, so the other
4419fields in the current row can be seen:
4420
4421 filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
4422
4423If the context is set to return a list of hashes (L</headers> is defined),
4424the current record will also be available in the localized C<%_>:
4425
4426 filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
4427
4428If the filter is used to I<alter> the content by changing C<$_>, make sure
4429that the sub returns true in order not to have that record skipped:
4430
4431 filter => { 2 => sub { $_ = uc }}
4432
4433will upper-case the second field, and then skip it if the resulting content
4434evaluates to false. To always accept, end with truth:
4435
4436 filter => { 2 => sub { $_ = uc; 1 }}
4437
4438B<Predefined filters>
4439
4440Given a file like (line numbers prefixed for doc purpose only):
4441
4442 1:1,2,3
4443 2:
4444 3:,
4445 4:""
4446 5:,,
4447 6:, ,
4448 7:"",
4449 8:" "
4450 9:4,5,6
4451
4452=over 2
4453
4454=item not_blank
4455
4456Filter out the blank lines
4457
4458This filter is a shortcut for
4459
4460 filter => { 0 => sub { @{$_[1]} > 1 or
4461 defined $_[1][0] && $_[1][0] ne "" } }
4462
4463Due to the implementation, it is currently impossible to also filter lines
4464that consists only of a quoted empty field. These lines are also considered
4465blank lines.
4466
4467With the given example, lines 2 and 4 will be skipped.
4468
4469=item not_empty
4470
4471Filter out lines where all the fields are empty.
4472
4473This filter is a shortcut for
4474
4475 filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
4476
4477A space is not regarded being empty, so given the example data, lines 2, 3,
44784, 5, and 7 are skipped.
4479
4480=item filled
4481
4482Filter out lines that have no visible data
4483
4484This filter is a shortcut for
4485
4486 filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
4487
4488This filter rejects all lines that I<not> have at least one field that does
4489not evaluate to the empty string.
4490
4491With the given example data, this filter would skip lines 2 through 8.
4492
4493=back
4494
4495=item after_in
4496
4497This callback is invoked for each record after all records have been parsed
4498but before returning the reference to the caller. The hook is invoked with
4499two arguments: the current C<CSV> parser object and a reference to the
4500record. The reference can be a reference to a HASH or a reference to an
4501ARRAY as determined by the arguments.
4502
4503This callback can also be passed as an attribute without the C<callbacks>
4504wrapper.
4505
4506=item before_out
4507
4508This callback is invoked for each record before the record is printed. The
4509hook is invoked with two arguments: the current C<CSV> parser object and a
4510reference to the record. The reference can be a reference to a HASH or a
4511reference to an ARRAY as determined by the arguments.
4512
4513This callback can also be passed as an attribute without the C<callbacks>
4514wrapper.
4515
4516This callback makes the row available in C<%_> if the row is a hashref. In
4517this case C<%_> is writable and will change the original row.
4518
4519=item on_in
4520
4521This callback acts exactly as the L</after_in> or the L</before_out> hooks.
4522
4523This callback can also be passed as an attribute without the C<callbacks>
4524wrapper.
4525
4526This callback makes the row available in C<%_> if the row is a hashref. In
4527this case C<%_> is writable and will change the original row. So e.g. with
4528
4529 my $aoh = csv (
4530 in => \"foo\n1\n2\n",
4531 headers => "auto",
4532 on_in => sub { $_{bar} = 2; },
4533 );
4534
4535C<$aoh> will be:
4536
4537 [ { foo => 1,
4538 bar => 2,
4539 }
4540 { foo => 2,
4541 bar => 2,
4542 }
4543 ]
4544
4545=item csv
4546
4547The I<function> L</csv> can also be called as a method or with an existing
4548Text::CSV_PP object. This could help if the function is to be invoked a lot
4549of times and the overhead of creating the object internally over and over
4550again would be prevented by passing an existing instance.
4551
4552 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4553
4554 my $aoa = $csv->csv (in => $fh);
4555 my $aoa = csv (in => $fh, csv => $csv);
4556
4557both act the same. Running this 20000 times on a 20 lines CSV file, showed
4558a 53% speedup.
4559
4560=back
4561
4562=head1 DIAGNOSTICS
4563
4564This section is also taken from Text::CSV_XS.
4565
4566If an error occurs, C<< $csv->error_diag >> can be used to get information
4567on the cause of the failure. Note that for speed reasons the internal value
4568is never cleared on success, so using the value returned by L</error_diag>
4569in normal cases - when no error occurred - may cause unexpected results.
4570
4571If the constructor failed, the cause can be found using L</error_diag> as a
4572class method, like C<< Text::CSV_PP->error_diag >>.
4573
4574The C<< $csv->error_diag >> method is automatically invoked upon error when
4575the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or
4576C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
4577C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
4578EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
4579
4580Errors can be (individually) caught using the L</error> callback.
4581
4582The errors as described below are available. I have tried to make the error
4583itself explanatory enough, but more descriptions will be added. For most of
4584these errors, the first three capitals describe the error category:
4585
4586=over 2
4587
4588=item *
4589INI
4590
4591Initialization error or option conflict.
4592
4593=item *
4594ECR
4595
4596Carriage-Return related parse error.
4597
4598=item *
4599EOF
4600
4601End-Of-File related parse error.
4602
4603=item *
4604EIQ
4605
4606Parse error inside quotation.
4607
4608=item *
4609EIF
4610
4611Parse error inside field.
4612
4613=item *
4614ECB
4615
4616Combine error.
4617
4618=item *
4619EHR
4620
4621HashRef parse related error.
4622
4623=back
4624
4625And below should be the complete list of error codes that can be returned:
4626
4627=over 2
4628
4629=item *
46301001 "INI - sep_char is equal to quote_char or escape_char"
4631X<1001>
4632
4633The L<separation character|/sep_char> cannot be equal to L<the quotation
4634character|/quote_char> or to L<the escape character|/escape_char>, as this
4635would invalidate all parsing rules.
4636
4637=item *
46381002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
4639X<1002>
4640
4641Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either
4642L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to
4643C<SPACE> or C<TAB> is too ambiguous to allow.
4644
4645=item *
46461003 "INI - \r or \n in main attr not allowed"
4647X<1003>
4648
4649Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
4650L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not
4651allowed.
4652
4653=item *
46541004 "INI - callbacks should be undef or a hashref"
4655X<1004>
4656
4657The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or
4658a hash reference.
4659
4660=item *
46611005 "INI - EOL too long"
4662X<1005>
4663
4664The value passed for EOL is exceeding its maximum length (16).
4665
4666=item *
46671006 "INI - SEP too long"
4668X<1006>
4669
4670The value passed for SEP is exceeding its maximum length (16).
4671
4672=item *
46731007 "INI - QUOTE too long"
4674X<1007>
4675
4676The value passed for QUOTE is exceeding its maximum length (16).
4677
4678=item *
46791008 "INI - SEP undefined"
4680X<1008>
4681
4682The value passed for SEP should be defined and not empty.
4683
4684=item *
46851010 "INI - the header is empty"
4686X<1010>
4687
4688The header line parsed in the L</header> is empty.
4689
4690=item *
46911011 "INI - the header contains more than one valid separator"
4692X<1011>
4693
4694The header line parsed in the L</header> contains more than one (unique)
4695separator character out of the allowed set of separators.
4696
4697=item *
46981012 "INI - the header contains an empty field"
4699X<1012>
4700
4701The header line parsed in the L</header> is contains an empty field.
4702
4703=item *
47041013 "INI - the header contains nun-unique fields"
4705X<1013>
4706
4707The header line parsed in the L</header> contains at least two identical
4708fields.
4709
4710=item *
47111014 "INI - header called on undefined stream"
4712X<1014>
4713
4714The header line cannot be parsed from an undefined sources.
4715
4716=item *
47171500 "PRM - Invalid/unsupported argument(s)"
4718X<1500>
4719
4720Function or method called with invalid argument(s) or parameter(s).
4721
4722=item *
47232010 "ECR - QUO char inside quotes followed by CR not part of EOL"
4724X<2010>
4725
4726When L<C<eol>|/eol> has been set to anything but the default, like
4727C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
4728L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
4729not make up the L<C<eol>|/eol> sequence, this is an error.
4730
4731=item *
47322011 "ECR - Characters after end of quoted field"
4733X<2011>
4734
4735Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
4736field and after the closing double-quote, there should be either a new-line
4737sequence or a separation character.
4738
4739=item *
47402012 "EOF - End of data in parsing input stream"
4741X<2012>
4742
4743Self-explaining. End-of-file while inside parsing a stream. Can happen only
4744when reading from streams with L</getline>, as using L</parse> is done on
4745strings that are not required to have a trailing L<C<eol>|/eol>.
4746
4747=item *
47482013 "INI - Specification error for fragments RFC7111"
4749X<2013>
4750
4751Invalid specification for URI L</fragment> specification.
4752
4753=item *
47542014 "ENF - Inconsistent number of fields"
4755X<2014>
4756
4757Inconsistent number of fields under strict parsing.
4758
4759=item *
47602021 "EIQ - NL char inside quotes, binary off"
4761X<2021>
4762
4763Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
4764has been selected with the constructor.
4765
4766=item *
47672022 "EIQ - CR char inside quotes, binary off"
4768X<2022>
4769
4770Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
4771has been selected with the constructor.
4772
4773=item *
47742023 "EIQ - QUO character not allowed"
4775X<2023>
4776
4777Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
4778will cause this error.
4779
4780=item *
47812024 "EIQ - EOF cannot be escaped, not even inside quotes"
4782X<2024>
4783
4784The escape character is not allowed as last character in an input stream.
4785
4786=item *
47872025 "EIQ - Loose unescaped escape"
4788X<2025>
4789
4790An escape character should escape only characters that need escaping.
4791
4792Allowing the escape for other characters is possible with the attribute
4793L</allow_loose_escape>.
4794
4795=item *
47962026 "EIQ - Binary character inside quoted field, binary off"
4797X<2026>
4798
4799Binary characters are not allowed by default. Exceptions are fields that
4800contain valid UTF-8, that will automatically be upgraded if the content is
4801valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
4802
4803=item *
48042027 "EIQ - Quoted field not terminated"
4805X<2027>
4806
4807When parsing a field that started with a quotation character, the field is
4808expected to be closed with a quotation character. When the parsed line is
4809exhausted before the quote is found, that field is not terminated.
4810
4811=item *
48122030 "EIF - NL char inside unquoted verbatim, binary off"
4813X<2030>
4814
4815=item *
48162031 "EIF - CR char is first char of field, not part of EOL"
4817X<2031>
4818
4819=item *
48202032 "EIF - CR char inside unquoted, not part of EOL"
4821X<2032>
4822
4823=item *
48242034 "EIF - Loose unescaped quote"
4825X<2034>
4826
4827=item *
48282035 "EIF - Escaped EOF in unquoted field"
4829X<2035>
4830
4831=item *
48322036 "EIF - ESC error"
4833X<2036>
4834
4835=item *
48362037 "EIF - Binary character in unquoted field, binary off"
4837X<2037>
4838
4839=item *
48402110 "ECB - Binary character in Combine, binary off"
4841X<2110>
4842
4843=item *
48442200 "EIO - print to IO failed. See errno"
4845X<2200>
4846
4847=item *
48483001 "EHR - Unsupported syntax for column_names ()"
4849X<3001>
4850
4851=item *
48523002 "EHR - getline_hr () called before column_names ()"
4853X<3002>
4854
4855=item *
48563003 "EHR - bind_columns () and column_names () fields count mismatch"
4857X<3003>
4858
4859=item *
48603004 "EHR - bind_columns () only accepts refs to scalars"
4861X<3004>
4862
4863=item *
48643006 "EHR - bind_columns () did not pass enough refs for parsed fields"
4865X<3006>
4866
4867=item *
48683007 "EHR - bind_columns needs refs to writable scalars"
4869X<3007>
4870
4871=item *
48723008 "EHR - unexpected error in bound fields"
4873X<3008>
4874
4875=item *
48763009 "EHR - print_hr () called before column_names ()"
4877X<3009>
4878
4879=item *
48803010 "EHR - print_hr () called with invalid arguments"
4881X<3010>
4882
4883=back
4884
4885=head1 SEE ALSO
4886
4887L<Text::CSV_XS>, L<Text::CSV>
4888
4889Older versions took many regexp from L<http://www.din.or.jp/~ohzaki/perl.htm>
4890
4891=head1 AUTHOR
4892
4893Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
4894Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
4895
4896Text::CSV_XS was written by E<lt>joe[at]ispsoft.deE<gt>
4897and maintained by E<lt>h.m.brand[at]xs4all.nlE<gt>.
4898
4899Text::CSV was written by E<lt>alan[at]mfgrtl.comE<gt>.
4900
4901=head1 COPYRIGHT AND LICENSE
4902
4903Copyright 2017- by Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
4904Copyright 2005-2015 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
4905
4906Most of the code and doc is directly taken from the pure perl part of
4907Text::CSV_XS.
4908
4909Copyright (C) 2007-2016 H.Merijn Brand. All rights reserved.
4910Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
4911Copyright (C) 1997 Alan Citterman. All rights reserved.
4912
4913This library is free software; you can redistribute it and/or modify
4914it under the same terms as Perl itself.
4915
4916=cut