blob: 5dd72267b09893993447f953fe2ca23b0c38d383 [file] [log] [blame]
yu.dongc33b3072024-08-21 23:14:49 -07001package Text::CSV;
2
3
4use strict;
5use Exporter;
6use Carp ();
7use vars qw( $VERSION $DEBUG @ISA @EXPORT_OK );
8@ISA = qw( Exporter );
9@EXPORT_OK = qw( csv );
10
11BEGIN {
12 $VERSION = '1.95';
13 $DEBUG = 0;
14}
15
16# if use CSV_XS, requires version
17my $Module_XS = 'Text::CSV_XS';
18my $Module_PP = 'Text::CSV_PP';
19my $XS_Version = '1.02';
20
21my $Is_Dynamic = 0;
22
23my @PublicMethods = qw/
24 version new error_diag error_input
25 known_attributes csv
26 PV IV NV
27/;
28#
29
30# Check the environment variable to decide worker module.
31
32unless ($Text::CSV::Worker) {
33 $Text::CSV::DEBUG and Carp::carp("Check used worker module...");
34
35 if ( exists $ENV{PERL_TEXT_CSV} ) {
36 if ($ENV{PERL_TEXT_CSV} eq '0' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_PP') {
37 _load_pp() or Carp::croak $@;
38 }
39 elsif ($ENV{PERL_TEXT_CSV} eq '1' or $ENV{PERL_TEXT_CSV} =~ /Text::CSV_XS\s*,\s*Text::CSV_PP/) {
40 _load_xs() or _load_pp() or Carp::croak $@;
41 }
42 elsif ($ENV{PERL_TEXT_CSV} eq '2' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_XS') {
43 _load_xs() or Carp::croak $@;
44 }
45 else {
46 Carp::croak "The value of environmental variable 'PERL_TEXT_CSV' is invalid.";
47 }
48 }
49 else {
50 _load_xs() or _load_pp() or Carp::croak $@;
51 }
52
53}
54
55sub new { # normal mode
56 my $proto = shift;
57 my $class = ref($proto) || $proto;
58
59 unless ( $proto ) { # for Text::CSV_XS/PP::new(0);
60 return eval qq| $Text::CSV::Worker\::new( \$proto ) |;
61 }
62
63 #if (ref $_[0] and $_[0]->{module}) {
64 # Carp::croak("Can't set 'module' in non dynamic mode.");
65 #}
66
67 if ( my $obj = $Text::CSV::Worker->new(@_) ) {
68 $obj->{_MODULE} = $Text::CSV::Worker;
69 bless $obj, $class;
70 return $obj;
71 }
72 else {
73 return;
74 }
75
76
77}
78
79
80sub require_xs_version { $XS_Version; }
81
82
83sub module {
84 my $proto = shift;
85 return !ref($proto) ? $Text::CSV::Worker
86 : ref($proto->{_MODULE}) ? ref($proto->{_MODULE}) : $proto->{_MODULE};
87}
88
89*backend = *module;
90
91
92sub is_xs {
93 return $_[0]->module eq $Module_XS;
94}
95
96
97sub is_pp {
98 return $_[0]->module eq $Module_PP;
99}
100
101
102sub is_dynamic { $Is_Dynamic; }
103
104sub _load_xs { _load($Module_XS, $XS_Version) }
105
106sub _load_pp { _load($Module_PP) }
107
108sub _load {
109 my ($module, $version) = @_;
110 $version ||= '';
111
112 $Text::CSV::DEBUG and Carp::carp "Load $module.";
113
114 eval qq| use $module $version |;
115
116 return if $@;
117
118 push @Text::CSV::ISA, $module;
119 $Text::CSV::Worker = $module;
120
121 local $^W;
122 no strict qw(refs);
123
124 for my $method (@PublicMethods) {
125 *{"Text::CSV::$method"} = \&{"$module\::$method"};
126 }
127 return 1;
128}
129
130
131
1321;
133__END__
134
135=pod
136
137=head1 NAME
138
139Text::CSV - comma-separated values manipulator (using XS or PurePerl)
140
141
142=head1 SYNOPSIS
143
144 use Text::CSV;
145
146 my @rows;
147 my $csv = Text::CSV->new ( { binary => 1 } ) # should set binary attribute.
148 or die "Cannot use CSV: ".Text::CSV->error_diag ();
149
150 open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
151 while ( my $row = $csv->getline( $fh ) ) {
152 $row->[2] =~ m/pattern/ or next; # 3rd field should match
153 push @rows, $row;
154 }
155 $csv->eof or $csv->error_diag();
156 close $fh;
157
158 $csv->eol ("\r\n");
159
160 open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
161 $csv->print ($fh, $_) for @rows;
162 close $fh or die "new.csv: $!";
163
164 #
165 # parse and combine style
166 #
167
168 $status = $csv->combine(@columns); # combine columns into a string
169 $line = $csv->string(); # get the combined string
170
171 $status = $csv->parse($line); # parse a CSV string into fields
172 @columns = $csv->fields(); # get the parsed fields
173
174 $status = $csv->status (); # get the most recent status
175 $bad_argument = $csv->error_input (); # get the most recent bad argument
176 $diag = $csv->error_diag (); # if an error occurred, explains WHY
177
178 $status = $csv->print ($io, $colref); # Write an array of fields
179 # immediately to a file $io
180 $colref = $csv->getline ($io); # Read a line from file $io,
181 # parse it and return an array
182 # ref of fields
183 $csv->column_names (@names); # Set column names for getline_hr ()
184 $ref = $csv->getline_hr ($io); # getline (), but returns a hashref
185 $eof = $csv->eof (); # Indicate if last parse or
186 # getline () hit End Of File
187
188 $csv->types(\@t_array); # Set column types
189
190=head1 DESCRIPTION
191
192Text::CSV is a thin wrapper for L<Text::CSV_XS>-compatible modules now.
193All the backend modules provide facilities for the composition and
194decomposition of comma-separated values. Text::CSV uses Text::CSV_XS
195by default, and when Text::CSV_XS is not available, falls back on
196L<Text::CSV_PP>, which is bundled in the same distribution as this module.
197
198=head1 CHOOSING BACKEND
199
200This module respects an environmental variable called C<PERL_TEXT_CSV>
201when it decides a backend module to use. If this environmental variable
202is not set, it tries to load Text::CSV_XS, and if Text::CSV_XS is not
203available, falls back on Text::CSV_PP;
204
205If you always don't want it to fall back on Text::CSV_PP, set the variable
206like this (C<export> may be C<setenv>, C<set> and the likes, depending
207on your environment):
208
209 > export PERL_TEXT_CSV=Text::CSV_XS
210
211If you prefer Text::CSV_XS to Text::CSV_PP (default), then:
212
213 > export PERL_TEXT_CSV=Text::CSV_XS,Text::CSV_PP
214
215You may also want to set this variable at the top of your test files, in order
216not to be bothered with incompatibilities between backends (you need to wrap
217this in C<BEGIN>, and set before actually C<use>-ing Text::CSV module, as it
218decides its backend as soon as it's loaded):
219
220 BEGIN { $ENV{PERL_TEXT_CSV}='Text::CSV_PP'; }
221 use Text::CSV;
222
223=head1 NOTES
224
225This section is taken from Text::CSV_XS.
226
227=head2 Embedded newlines
228
229B<Important Note>: The default behavior is to accept only ASCII characters
230in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
231fields can not contain newlines. If your data contains newlines embedded in
232fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
233set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
234parsing options, you will always want to set binary.
235
236But you still have the problem that you have to pass a correct line to the
237L</parse> method, which is more complicated from the usual point of usage:
238
239 my $csv = Text::CSV->new ({ binary => 1, eol => $/ });
240 while (<>) { # WRONG!
241 $csv->parse ($_);
242 my @fields = $csv->fields ();
243 }
244
245this will break, as the C<while> might read broken lines: it does not care
246about the quoting. If you need to support embedded newlines, the way to go
247is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>,
248B<and> C<\r\n> by default) and then
249
250 my $csv = Text::CSV->new ({ binary => 1 });
251 open my $io, "<", $file or die "$file: $!";
252 while (my $row = $csv->getline ($io)) {
253 my @fields = @$row;
254 }
255
256The old(er) way of using global file handles is still supported
257
258 while (my $row = $csv->getline (*ARGV)) { ... }
259
260=head2 Unicode
261
262Unicode is only tested to work with perl-5.8.2 and up.
263
264The simplest way to ensure the correct encoding is used for in- and output
265is by either setting layers on the filehandles, or setting the L</encoding>
266argument for L</csv>.
267
268 open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
269or
270 my $aoa = csv (in => "in.csv", encoding => "UTF-8");
271
272 open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
273or
274 csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
275
276On parsing (both for L</getline> and L</parse>), if the source is marked
277being UTF8, then all fields that are marked binary will also be marked UTF8.
278
279On combining (L</print> and L</combine>): if any of the combining fields
280was marked UTF8, the resulting string will be marked as UTF8. Note however
281that all fields I<before> the first field marked UTF8 and contained 8-bit
282characters that were not upgraded to UTF8, these will be C<bytes> in the
283resulting string too, possibly causing unexpected errors. If you pass data
284of different encoding, or you don't know if there is different encoding,
285force it to be upgraded before you pass them on:
286
287 $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
288
289For complete control over encoding, please use L<Text::CSV::Encoded>:
290
291 use Text::CSV::Encoded;
292 my $csv = Text::CSV::Encoded->new ({
293 encoding_in => "iso-8859-1", # the encoding comes into Perl
294 encoding_out => "cp1252", # the encoding comes out of Perl
295 });
296
297 $csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
298 # combine () and print () accept *literally* utf8 encoded data
299 # parse () and getline () return *literally* utf8 encoded data
300
301 $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
302 # combine () and print () accept UTF8 marked data
303 # parse () and getline () return UTF8 marked data
304
305=head1 METHODS
306
307This whole section is also taken from Text::CSV_XS.
308
309=head2 version ()
310
311(Class method) Returns the current backend module version.
312
313=head2 new (\%attr)
314
315(Class method) Returns a new instance of Text::CSV backend. The attributes
316are described by the (optional) hash ref C<\%attr>.
317
318 my $csv = Text::CSV->new ({ attributes ... });
319
320The following attributes are available:
321
322=head3 eol
323
324 my $csv = Text::CSV->new ({ eol => $/ });
325 $csv->eol (undef);
326 my $eol = $csv->eol;
327
328The end-of-line string to add to rows for L</print> or the record separator
329for L</getline>.
330
331When not passed in a B<parser> instance, the default behavior is to accept
332C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
333all. Passing C<undef> or the empty string behave the same.
334
335When not passed in a B<generating> instance, records are not terminated at
336all, so it is probably wise to pass something you expect. A safe choice for
337C<eol> on output is either C<$/> or C<\r\n>.
338
339Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
340(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
341Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
342
343If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
344only a Carriage Return without Line Feed, will be L</parse>d correct.
345
346=head3 sep_char
347
348 my $csv = Text::CSV->new ({ sep_char => ";" });
349 $csv->sep_char (";");
350 my $c = $csv->sep_char;
351
352The char used to separate fields, by default a comma. (C<,>). Limited to a
353single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
354(tilde). When longer sequences are required, use L<C<sep>|/sep>.
355
356The separation character can not be equal to the quote character or to the
357escape character.
358
359=head3 sep
360
361 my $csv = Text::CSV->new ({ sep => "\N{FULLWIDTH COMMA}" });
362 $csv->sep (";");
363 my $sep = $csv->sep;
364
365The chars used to separate fields, by default undefined. Limited to 8 bytes.
366
367When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it
368acts as an alias to L<C<sep_char>|/sep_char>.
369
370=head3 quote_char
371
372 my $csv = Text::CSV->new ({ quote_char => "'" });
373 $csv->quote_char (undef);
374 my $c = $csv->quote_char;
375
376The character to quote fields containing blanks or binary data, by default
377the double quote character (C<">). A value of undef suppresses quote chars
378(for simple cases only). Limited to a single-byte character, usually in the
379range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
380required, use L<C<quote>|/quote>.
381
382C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
383
384=head3 quote
385
386 my $csv = Text::CSV->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
387 $csv->quote ("'");
388 my $quote = $csv->quote;
389
390The chars used to quote fields, by default undefined. Limited to 8 bytes.
391
392When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
393it acts as an alias to L<C<quote_char>|/quote_char>.
394
395=head3 escape_char
396
397 my $csv = Text::CSV->new ({ escape_char => "\\" });
398 $csv->escape_char (undef);
399 my $c = $csv->escape_char;
400
401The character to escape certain characters inside quoted fields. This is
402limited to a single-byte character, usually in the range from C<0x20>
403(space) to C<0x7E> (tilde).
404
405The C<escape_char> defaults to being the double-quote mark (C<">). In other
406words the same as the default L<C<quote_char>|/quote_char>. This means that
407doubling the quote mark in a field escapes it:
408
409 "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
410
411If you change the L<C<quote_char>|/quote_char> without changing the
412C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
413If instead you want to escape the L<C<quote_char>|/quote_char> by doubling
414it you will need to also change the C<escape_char> to be the same as what
415you have changed the L<C<quote_char>|/quote_char> to.
416
417The escape character can not be equal to the separation character.
418
419=head3 binary
420
421 my $csv = Text::CSV->new ({ binary => 1 });
422 $csv->binary (0);
423 my $f = $csv->binary;
424
425If this attribute is C<1>, you may use binary characters in quoted fields,
426including line feeds, carriage returns and C<NULL> bytes. (The latter could
427be escaped as C<"0>.) By default this feature is off.
428
429If a string is marked UTF8, C<binary> will be turned on automatically when
430binary characters other than C<CR> and C<NL> are encountered. Note that a
431simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
432so setting C<< { binary => 1 } >> is still a wise option.
433
434=head3 decode_utf8
435
436 my $csv = Text::CSV->new ({ decode_utf8 => 1 });
437 $csv->decode_utf8 (0);
438 my $f = $csv->decode_utf8;
439
440This attributes defaults to TRUE.
441
442While I<parsing>, fields that are valid UTF-8, are automatically set to be
443UTF-8, so that
444
445 $csv->parse ("\xC4\xA8\n");
446
447results in
448
449 PV("\304\250"\0) [UTF8 "\x{128}"]
450
451Sometimes it might not be a desired action. To prevent those upgrades, set
452this attribute to false, and the result will be
453
454 PV("\304\250"\0)
455
456=head3 auto_diag
457
458 my $csv = Text::CSV->new ({ auto_diag => 1 });
459 $csv->auto_diag (2);
460 my $l = $csv->auto_diag;
461
462Set this attribute to a number between C<1> and C<9> causes L</error_diag>
463to be automatically called in void context upon errors.
464
465In case of error C<2012 - EOF>, this call will be void.
466
467If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
468on errors instead of C<warn>. If set to anything unrecognized, it will be
469silently ignored.
470
471Future extensions to this feature will include more reliable auto-detection
472of C<autodie> being active in the scope of which the error occurred which
473will increment the value of C<auto_diag> with C<1> the moment the error is
474detected.
475
476=head3 diag_verbose
477
478 my $csv = Text::CSV->new ({ diag_verbose => 1 });
479 $csv->diag_verbose (2);
480 my $l = $csv->diag_verbose;
481
482Set the verbosity of the output triggered by C<auto_diag>. Currently only
483adds the current input-record-number (if known) to the diagnostic output
484with an indication of the position of the error.
485
486=head3 blank_is_undef
487
488 my $csv = Text::CSV->new ({ blank_is_undef => 1 });
489 $csv->blank_is_undef (0);
490 my $f = $csv->blank_is_undef;
491
492Under normal circumstances, C<CSV> data makes no distinction between quoted-
493and unquoted empty fields. These both end up in an empty string field once
494read, thus
495
496 1,"",," ",2
497
498is read as
499
500 ("1", "", "", " ", "2")
501
502When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote>
503or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the
504result of an undefined value. To enable this distinction when I<reading>
505C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
506fields to be set to C<undef>, causing the above to be parsed as
507
508 ("1", "", undef, " ", "2")
509
510note that this is specifically important when loading C<CSV> fields into a
511database that allows C<NULL> values, as the perl equivalent for C<NULL> is
512C<undef> in L<DBI> land.
513
514=head3 empty_is_undef
515
516 my $csv = Text::CSV->new ({ empty_is_undef => 1 });
517 $csv->empty_is_undef (0);
518 my $f = $csv->empty_is_undef;
519
520Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this
521attribute converts all empty fields to C<undef>, so
522
523 1,"",," ",2
524
525is read as
526
527 (1, undef, undef, " ", 2)
528
529Note that this effects only fields that are originally empty, not fields
530that are empty after stripping allowed whitespace. YMMV.
531
532=head3 allow_whitespace
533
534 my $csv = Text::CSV->new ({ allow_whitespace => 1 });
535 $csv->allow_whitespace (0);
536 my $f = $csv->allow_whitespace;
537
538When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
539surrounding the separation character is removed when parsing. If either
540C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
541L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
542be considered whitespace.
543
544Now lines like:
545
546 1 , "foo" , bar , 3 , zapp
547
548are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
549
550Note that B<all> whitespace is stripped from both start and end of each
551field. That would make it I<more> than a I<feature> to enable parsing bad
552C<CSV> lines, as
553
554 1, 2.0, 3, ape , monkey
555
556will now be parsed as
557
558 ("1", "2.0", "3", "ape", "monkey")
559
560even if the original line was perfectly acceptable C<CSV>.
561
562=head3 allow_loose_quotes
563
564 my $csv = Text::CSV->new ({ allow_loose_quotes => 1 });
565 $csv->allow_loose_quotes (0);
566 my $f = $csv->allow_loose_quotes;
567
568By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
569characters like
570
571 1,foo "bar" baz,42
572
573would result in parse error 2034. Though it is still bad practice to allow
574this format, we cannot help the fact that some vendors make their
575applications spit out lines styled this way.
576
577If there is B<really> bad C<CSV> data, like
578
579 1,"foo "bar" baz",42
580
581or
582
583 1,""foo bar baz"",42
584
585there is a way to get this data-line parsed and leave the quotes inside the
586quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
587B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal
588to L<C<quote_char>|/quote_char>.
589
590=head3 allow_loose_escapes
591
592 my $csv = Text::CSV->new ({ allow_loose_escapes => 1 });
593 $csv->allow_loose_escapes (0);
594 my $f = $csv->allow_loose_escapes;
595
596Parsing fields that have L<C<escape_char>|/escape_char> characters that
597escape characters that do not need to be escaped, like:
598
599 my $csv = Text::CSV->new ({ escape_char => "\\" });
600 $csv->parse (qq{1,"my bar\'s",baz,42});
601
602would result in parse error 2025. Though it is bad practice to allow this
603format, this attribute enables you to treat all escape character sequences
604equal.
605
606=head3 allow_unquoted_escape
607
608 my $csv = Text::CSV->new ({ allow_unquoted_escape => 1 });
609 $csv->allow_unquoted_escape (0);
610 my $f = $csv->allow_unquoted_escape;
611
612A backward compatibility issue where L<C<escape_char>|/escape_char> differs
613from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char>
614to be in the first position of a field. If L<C<quote_char>|/quote_char> is
615equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
616this would be illegal:
617
618 1,\0,2
619
620Setting this attribute to C<1> might help to overcome issues with backward
621compatibility and allow this style.
622
623=head3 always_quote
624
625 my $csv = Text::CSV->new ({ always_quote => 1 });
626 $csv->always_quote (0);
627 my $f = $csv->always_quote;
628
629By default the generated fields are quoted only if they I<need> to be. For
630example, if they contain the separator character. If you set this attribute
631to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
632quoted, see L</blank_is_undef>). This makes it quite often easier to handle
633exported data in external applications.
634
635=head3 quote_space
636
637 my $csv = Text::CSV->new ({ quote_space => 1 });
638 $csv->quote_space (0);
639 my $f = $csv->quote_space;
640
641By default, a space in a field would trigger quotation. As no rule exists
642this to be forced in C<CSV>, nor any for the opposite, the default is true
643for safety. You can exclude the space from this trigger by setting this
644attribute to 0.
645
646=head3 quote_empty
647
648 my $csv = Text::CSV->new ({ quote_empty => 1 });
649 $csv->quote_empty (0);
650 my $f = $csv->quote_empty;
651
652By default the generated fields are quoted only if they I<need> to be. An
653empty (defined) field does not need quotation. If you set this attribute to
654C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
655quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
656
657=head3 quote_binary
658
659 my $csv = Text::CSV->new ({ quote_binary => 1 });
660 $csv->quote_binary (0);
661 my $f = $csv->quote_binary;
662
663By default, all "unsafe" bytes inside a string cause the combined field to
664be quoted. By setting this attribute to C<0>, you can disable that trigger
665for bytes >= C<0x7F>.
666
667=head3 escape_null or quote_null (deprecated)
668
669 my $csv = Text::CSV->new ({ escape_null => 1 });
670 $csv->escape_null (0);
671 my $f = $csv->escape_null;
672
673By default, a C<NULL> byte in a field would be escaped. This option enables
674you to treat the C<NULL> byte as a simple binary character in binary mode
675(the C<< { binary => 1 } >> is set). The default is true. You can prevent
676C<NULL> escapes by setting this attribute to C<0>.
677
678The default when using the C<csv> function is C<false>.
679
680=head3 keep_meta_info
681
682 my $csv = Text::CSV->new ({ keep_meta_info => 1 });
683 $csv->keep_meta_info (0);
684 my $f = $csv->keep_meta_info;
685
686By default, the parsing of input records is as simple and fast as possible.
687However, some parsing information - like quotation of the original field -
688is lost in that process. Setting this flag to true enables retrieving that
689information after parsing with the methods L</meta_info>, L</is_quoted>,
690and L</is_binary> described below. Default is false for performance.
691
692If you set this attribute to a value greater than 9, than you can control
693output quotation style like it was used in the input of the the last parsed
694record (unless quotation was added because of other reasons).
695
696 my $csv = Text::CSV->new ({
697 binary => 1,
698 keep_meta_info => 1,
699 quote_space => 0,
700 });
701
702 my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
703
704 $csv->print (*STDOUT, \@row);
705 # 1,,, , ,f,g,"h""h",help,help
706 $csv->keep_meta_info (11);
707 $csv->print (*STDOUT, \@row);
708 # 1,,"", ," ",f,"g","h""h",help,"help"
709
710=head3 verbatim
711
712 my $csv = Text::CSV->new ({ verbatim => 1 });
713 $csv->verbatim (0);
714 my $f = $csv->verbatim;
715
716This is a quite controversial attribute to set, but makes some hard things
717possible.
718
719The rationale behind this attribute is to tell the parser that the normally
720special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
721special when this flag is set, and be dealt with as being ordinary binary
722characters. This will ease working with data with embedded newlines.
723
724When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
725every line.
726
727Imagine a file format like
728
729 M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
730
731where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
732C<^> (caret). None of the fields is quoted, but embedded binary data is
733likely to be present. With the specific line ending, this should not be too
734hard to detect.
735
736By default, Text::CSV' parse function is instructed to only know about
737C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
738embedded newline as a real C<end-of-line>, so it can scan the next line if
739binary is true, and the newline is inside a quoted field. With this option,
740we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
741a binary character.
742
743For L</parse> this means that the parser has no more idea about line ending
744and L</getline> C<chomp>s line endings on reading.
745
746=head3 types
747
748A set of column types; the attribute is immediately passed to the L</types>
749method.
750
751=head3 callbacks
752
753See the L</Callbacks> section below.
754
755=head3 accessors
756
757To sum it up,
758
759 $csv = Text::CSV->new ();
760
761is equivalent to
762
763 $csv = Text::CSV->new ({
764 eol => undef, # \r, \n, or \r\n
765 sep_char => ',',
766 sep => undef,
767 quote_char => '"',
768 quote => undef,
769 escape_char => '"',
770 binary => 0,
771 decode_utf8 => 1,
772 auto_diag => 0,
773 diag_verbose => 0,
774 blank_is_undef => 0,
775 empty_is_undef => 0,
776 allow_whitespace => 0,
777 allow_loose_quotes => 0,
778 allow_loose_escapes => 0,
779 allow_unquoted_escape => 0,
780 always_quote => 0,
781 quote_empty => 0,
782 quote_space => 1,
783 escape_null => 1,
784 quote_binary => 1,
785 keep_meta_info => 0,
786 verbatim => 0,
787 types => undef,
788 callbacks => undef,
789 });
790
791For all of the above mentioned flags, an accessor method is available where
792you can inquire the current value, or change the value
793
794 my $quote = $csv->quote_char;
795 $csv->binary (1);
796
797It is not wise to change these settings halfway through writing C<CSV> data
798to a stream. If however you want to create a new stream using the available
799C<CSV> object, there is no harm in changing them.
800
801If the L</new> constructor call fails, it returns C<undef>, and makes the
802fail reason available through the L</error_diag> method.
803
804 $csv = Text::CSV->new ({ ecs_char => 1 }) or
805 die "".Text::CSV->error_diag ();
806
807L</error_diag> will return a string like
808
809 "INI - Unknown attribute 'ecs_char'"
810
811=head2 known_attributes
812
813 @attr = Text::CSV->known_attributes;
814 @attr = Text::CSV::known_attributes;
815 @attr = $csv->known_attributes;
816
817This method will return an ordered list of all the supported attributes as
818described above. This can be useful for knowing what attributes are valid
819in classes that use or extend Text::CSV.
820
821=head2 print
822
823 $status = $csv->print ($io, $colref);
824
825Similar to L</combine> + L</string> + L</print>, but much more efficient.
826It expects an array ref as input (not an array!) and the resulting string
827is not really created, but immediately written to the C<$io> object,
828typically an IO handle or any other object that offers a L</print> method.
829
830For performance reasons C<print> does not create a result string, so all
831L</string>, L</status>, L</fields>, and L</error_input> methods will return
832undefined information after executing this method.
833
834If C<$colref> is C<undef> (explicit, not through a variable argument) and
835L</bind_columns> was used to specify fields to be printed, it is possible
836to make performance improvements, as otherwise data would have to be copied
837as arguments to the method call:
838
839 $csv->bind_columns (\($foo, $bar));
840 $status = $csv->print ($fh, undef);
841
842=head2 say
843
844 $status = $csv->say ($io, $colref);
845
846Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
847
848=head2 print_hr
849
850 $csv->print_hr ($io, $ref);
851
852Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
853provided the column names are set with L</column_names>.
854
855It is just a wrapper method with basic parameter checks over
856
857 $csv->print ($io, [ map { $ref->{$_} } $csv->column_names ]);
858
859=head2 combine
860
861 $status = $csv->combine (@fields);
862
863This method constructs a C<CSV> record from C<@fields>, returning success
864or failure. Failure can result from lack of arguments or an argument that
865contains an invalid character. Upon success, L</string> can be called to
866retrieve the resultant C<CSV> string. Upon failure, the value returned by
867L</string> is undefined and L</error_input> could be called to retrieve the
868invalid argument.
869
870=head2 string
871
872 $line = $csv->string ();
873
874This method returns the input to L</parse> or the resultant C<CSV> string
875of L</combine>, whichever was called more recently.
876
877=head2 getline
878
879 $colref = $csv->getline ($io);
880
881This is the counterpart to L</print>, as L</parse> is the counterpart to
882L</combine>: it parses a row from the C<$io> handle using the L</getline>
883method associated with C<$io> and parses this row into an array ref. This
884array ref is returned by the function or C<undef> for failure. When C<$io>
885does not support C<getline>, you are likely to hit errors.
886
887When fields are bound with L</bind_columns> the return value is a reference
888to an empty list.
889
890The L</string>, L</fields>, and L</status> methods are meaningless again.
891
892=head2 getline_all
893
894 $arrayref = $csv->getline_all ($io);
895 $arrayref = $csv->getline_all ($io, $offset);
896 $arrayref = $csv->getline_all ($io, $offset, $length);
897
898This will return a reference to a list of L<getline ($io)|/getline> results.
899In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
900with C<splice>, only the last C<abs ($offset)> records of C<$io> are taken
901into consideration.
902
903Given a CSV file with 10 lines:
904
905 lines call
906 ----- ---------------------------------------------------------
907 0..9 $csv->getline_all ($io) # all
908 0..9 $csv->getline_all ($io, 0) # all
909 8..9 $csv->getline_all ($io, 8) # start at 8
910 - $csv->getline_all ($io, 0, 0) # start at 0 first 0 rows
911 0..4 $csv->getline_all ($io, 0, 5) # start at 0 first 5 rows
912 4..5 $csv->getline_all ($io, 4, 2) # start at 4 first 2 rows
913 8..9 $csv->getline_all ($io, -2) # last 2 rows
914 6..7 $csv->getline_all ($io, -4, 2) # first 2 of last 4 rows
915
916=head2 getline_hr
917
918The L</getline_hr> and L</column_names> methods work together to allow you
919to have rows returned as hashrefs. You must call L</column_names> first to
920declare your column names.
921
922 $csv->column_names (qw( code name price description ));
923 $hr = $csv->getline_hr ($io);
924 print "Price for $hr->{name} is $hr->{price} EUR\n";
925
926L</getline_hr> will croak if called before L</column_names>.
927
928Note that L</getline_hr> creates a hashref for every row and will be much
929slower than the combined use of L</bind_columns> and L</getline> but still
930offering the same ease of use hashref inside the loop:
931
932 my @cols = @{$csv->getline ($io)};
933 $csv->column_names (@cols);
934 while (my $row = $csv->getline_hr ($io)) {
935 print $row->{price};
936 }
937
938Could easily be rewritten to the much faster:
939
940 my @cols = @{$csv->getline ($io)};
941 my $row = {};
942 $csv->bind_columns (\@{$row}{@cols});
943 while ($csv->getline ($io)) {
944 print $row->{price};
945 }
946
947Your mileage may vary for the size of the data and the number of rows.
948
949=head2 getline_hr_all
950
951 $arrayref = $csv->getline_hr_all ($io);
952 $arrayref = $csv->getline_hr_all ($io, $offset);
953 $arrayref = $csv->getline_hr_all ($io, $offset, $length);
954
955This will return a reference to a list of L<getline_hr ($io)|/getline_hr>
956results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
957
958=head2 parse
959
960 $status = $csv->parse ($line);
961
962This method decomposes a C<CSV> string into fields, returning success or
963failure. Failure can result from a lack of argument or the given C<CSV>
964string is improperly formatted. Upon success, L</fields> can be called to
965retrieve the decomposed fields. Upon failure calling L</fields> will return
966undefined data and L</error_input> can be called to retrieve the invalid
967argument.
968
969You may use the L</types> method for setting column types. See L</types>'
970description below.
971
972The C<$line> argument is supposed to be a simple scalar. Everything else is
973supposed to croak and set error 1500.
974
975=head2 fragment
976
977This function tries to implement RFC7111 (URI Fragment Identifiers for the
978text/csv Media Type) - http://tools.ietf.org/html/rfc7111
979
980 my $AoA = $csv->fragment ($io, $spec);
981
982In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
983to indicate a range. All indices are C<1>-based: the first row or column
984has index C<1>. Selections can be combined with the semi-colon (C<;>).
985
986When using this method in combination with L</column_names>, the returned
987reference will point to a list of hashes instead of a list of lists. A
988disjointed cell-based combined selection might return rows with different
989number of columns making the use of hashes unpredictable.
990
991 $csv->column_names ("Name", "Age");
992 my $AoH = $csv->fragment ($io, "col=3;8");
993
994If the L</after_parse> callback is active, it is also called on every line
995parsed and skipped before the fragment.
996
997=over 2
998
999=item row
1000
1001 row=4
1002 row=5-7
1003 row=6-*
1004 row=1-2;4;6-*
1005
1006=item col
1007
1008 col=2
1009 col=1-3
1010 col=4-*
1011 col=1-2;4;7-*
1012
1013=item cell
1014
1015In cell-based selection, the comma (C<,>) is used to pair row and column
1016
1017 cell=4,1
1018
1019The range operator (C<->) using C<cell>s can be used to define top-left and
1020bottom-right C<cell> location
1021
1022 cell=3,1-4,6
1023
1024The C<*> is only allowed in the second part of a pair
1025
1026 cell=3,2-*,2 # row 3 till end, only column 2
1027 cell=3,2-3,* # column 2 till end, only row 3
1028 cell=3,2-*,* # strip row 1 and 2, and column 1
1029
1030Cells and cell ranges may be combined with C<;>, possibly resulting in rows
1031with different number of columns
1032
1033 cell=1,1-2,2;3,3-4,4;1,4;4,1
1034
1035Disjointed selections will only return selected cells. The cells that are
1036not specified will not be included in the returned set, not even as
1037C<undef>. As an example given a C<CSV> like
1038
1039 11,12,13,...19
1040 21,22,...28,29
1041 : :
1042 91,...97,98,99
1043
1044with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
1045
1046 11,12,14
1047 21,22
1048 33,34
1049 41,43,44
1050
1051Overlapping cell-specs will return those cells only once, So
1052C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
1053
1054 11,12,13
1055 21,22,23,24
1056 31,32,33,34
1057 42,43,44
1058
1059=back
1060
1061L<RFC7111|http://tools.ietf.org/html/rfc7111> does B<not> allow different
1062types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
1063Passing an invalid fragment specification will croak and set error 2013.
1064
1065=head2 column_names
1066
1067Set the "keys" that will be used in the L</getline_hr> calls. If no keys
1068(column names) are passed, it will return the current setting as a list.
1069
1070L</column_names> accepts a list of scalars (the column names) or a single
1071array_ref, so you can pass the return value from L</getline> too:
1072
1073 $csv->column_names ($csv->getline ($io));
1074
1075L</column_names> does B<no> checking on duplicates at all, which might lead
1076to unexpected results. Undefined entries will be replaced with the string
1077C<"\cAUNDEF\cA">, so
1078
1079 $csv->column_names (undef, "", "name", "name");
1080 $hr = $csv->getline_hr ($io);
1081
1082Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
1083the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
1084field.
1085
1086L</column_names> croaks on invalid arguments.
1087
1088=head2 header
1089
1090This method does NOT work in perl-5.6.x
1091
1092Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
1093
1094 my @hdr = $csv->header ($fh);
1095 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
1096 $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
1097
1098The first argument should be a file handle.
1099
1100Assuming that the file opened for parsing has a header, and the header does
1101not contain problematic characters like embedded newlines, read the first
1102line from the open handle then auto-detect whether the header separates the
1103column names with a character from the allowed separator list.
1104
1105If any of the allowed separators matches, and none of the I<other> allowed
1106separators match, set L<C<sep>|/sep> to that separator for the current
1107CSV_PP instance and use it to parse the first line, map those to lowercase,
1108and use that to set the instance L</column_names>:
1109
1110 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
1111 open my $fh, "<", "file.csv";
1112 binmode $fh; # for Windows
1113 $csv->header ($fh);
1114 while (my $row = $csv->getline_hr ($fh)) {
1115 ...
1116 }
1117
1118If the header is empty, contains more than one unique separator out of the
1119allowed set, contains empty fields, or contains identical fields (after
1120folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
1121
1122If the header contains embedded newlines or is not valid CSV in any other
1123way, this method will croak and leave the parse error untouched.
1124
1125A successful call to C<header> will always set the L<C<sep>|/sep> of the
1126C<$csv> object. This behavior can not be disabled.
1127
1128=head3 return value
1129
1130On error this method will croak.
1131
1132In list context, the headers will be returned whether they are used to set
1133L</column_names> or not.
1134
1135In scalar context, the instance itself is returned. B<Note>: the values as
1136found in the header will effectively be B<lost> if C<set_column_names> is
1137false.
1138
1139=head3 Options
1140
1141=over 2
1142
1143=item sep_set
1144
1145 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
1146
1147The list of legal separators defaults to C<[ ";", "," ]> and can be changed
1148by this option. As this is probably the most often used option, it can be
1149passed on its own as an unnamed argument:
1150
1151 $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
1152
1153Multi-byte sequences are allowed, both multi-character and Unicode. See
1154L<C<sep>|/sep>.
1155
1156=item detect_bom
1157
1158 $csv->header ($fh, { detect_bom => 1 });
1159
1160The default behavior is to detect if the header line starts with a BOM. If
1161the header has a BOM, use that to set the encoding of C<$fh>. This default
1162behavior can be disabled by passing a false value to C<detect_bom>.
1163
1164Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
1165UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
1166but L<Encode> does not (yet). UTF-7 is not supported.
1167
1168The encoding is set using C<binmode> on C<$fh>.
1169
1170If the handle was opened in a (correct) encoding, this method will B<not>
1171alter the encoding, as it checks the leading B<bytes> of the first line.
1172
1173=item munge_column_names
1174
1175This option offers the means to modify the column names into something that
1176is most useful to the application. The default is to map all column names
1177to lower case.
1178
1179 $csv->header ($fh, { munge_column_names => "lc" });
1180
1181The following values are available:
1182
1183 lc - lower case
1184 uc - upper case
1185 none - do not change
1186 \&cb - supply a callback
1187
1188 $csv->header ($fh, { munge_column_names => sub { fc } });
1189 $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
1190 $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
1191
1192As this callback is called in a C<map>, you can use C<$_> directly.
1193
1194=item set_column_names
1195
1196 $csv->header ($fh, { set_column_names => 1 });
1197
1198The default is to set the instances column names using L</column_names> if
1199the method is successful, so subsequent calls to L</getline_hr> can return
1200a hash. Disable setting the header can be forced by using a false value for
1201this option.
1202
1203=back
1204
1205=head3 Validation
1206
1207When receiving CSV files from external sources, this method can be used to
1208protect against changes in the layout by restricting to known headers (and
1209typos in the header fields).
1210
1211 my %known = (
1212 "record key" => "c_rec",
1213 "rec id" => "c_rec",
1214 "id_rec" => "c_rec",
1215 "kode" => "code",
1216 "code" => "code",
1217 "vaule" => "value",
1218 "value" => "value",
1219 );
1220 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
1221 open my $fh, "<", $source or die "$source: $!";
1222 $csv->header ($fh, { munge_column_names => sub {
1223 s/\s+$//;
1224 s/^\s+//;
1225 $known{lc $_} or die "Unknown column '$_' in $source";
1226 }});
1227 while (my $row = $csv->getline_hr ($fh)) {
1228 say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
1229 }
1230
1231=head2 bind_columns
1232
1233Takes a list of scalar references to be used for output with L</print> or
1234to store in the fields fetched by L</getline>. When you do not pass enough
1235references to store the fetched fields in, L</getline> will fail with error
1236C<3006>. If you pass more than there are fields to return, the content of
1237the remaining references is left untouched.
1238
1239 $csv->bind_columns (\$code, \$name, \$price, \$description);
1240 while ($csv->getline ($io)) {
1241 print "The price of a $name is \x{20ac} $price\n";
1242 }
1243
1244To reset or clear all column binding, call L</bind_columns> with the single
1245argument C<undef>. This will also clear column names.
1246
1247 $csv->bind_columns (undef);
1248
1249If no arguments are passed at all, L</bind_columns> will return the list of
1250current bindings or C<undef> if no binds are active.
1251
1252Note that in parsing with C<bind_columns>, the fields are set on the fly.
1253That implies that if the third field of a row causes an error, the first
1254two fields already have been assigned the values of the current row, while
1255the rest will still hold the values of the previous row.
1256
1257=head2 eof
1258
1259 $eof = $csv->eof ();
1260
1261If L</parse> or L</getline> was used with an IO stream, this method will
1262return true (1) if the last call hit end of file, otherwise it will return
1263false (''). This is useful to see the difference between a failure and end
1264of file.
1265
1266Note that if the parsing of the last line caused an error, C<eof> is still
1267true. That means that if you are I<not> using L</auto_diag>, an idiom like
1268
1269 while (my $row = $csv->getline ($fh)) {
1270 # ...
1271 }
1272 $csv->eof or $csv->error_diag;
1273
1274will I<not> report the error. You would have to change that to
1275
1276 while (my $row = $csv->getline ($fh)) {
1277 # ...
1278 }
1279 +$csv->error_diag and $csv->error_diag;
1280
1281=head2 types
1282
1283 $csv->types (\@tref);
1284
1285This method is used to force that (all) columns are of a given type. For
1286example, if you have an integer column, two columns with doubles and a
1287string column, then you might do a
1288
1289 $csv->types ([Text::CSV::IV (),
1290 Text::CSV::NV (),
1291 Text::CSV::NV (),
1292 Text::CSV::PV ()]);
1293
1294Column types are used only for I<decoding> columns while parsing, in other
1295words by the L</parse> and L</getline> methods.
1296
1297You can unset column types by doing a
1298
1299 $csv->types (undef);
1300
1301or fetch the current type settings with
1302
1303 $types = $csv->types ();
1304
1305=over 4
1306
1307=item IV
1308
1309Set field type to integer.
1310
1311=item NV
1312
1313Set field type to numeric/float.
1314
1315=item PV
1316
1317Set field type to string.
1318
1319=back
1320
1321=head2 fields
1322
1323 @columns = $csv->fields ();
1324
1325This method returns the input to L</combine> or the resultant decomposed
1326fields of a successful L</parse>, whichever was called more recently.
1327
1328Note that the return value is undefined after using L</getline>, which does
1329not fill the data structures returned by L</parse>.
1330
1331=head2 meta_info
1332
1333 @flags = $csv->meta_info ();
1334
1335This method returns the "flags" of the input to L</combine> or the flags of
1336the resultant decomposed fields of L</parse>, whichever was called more
1337recently.
1338
1339For each field, a meta_info field will hold flags that inform something
1340about the field returned by the L</fields> method or passed to the
1341L</combine> method. The flags are bit-wise-C<or>'d like:
1342
1343=over 2
1344
1345=item C< >0x0001
1346
1347The field was quoted.
1348
1349=item C< >0x0002
1350
1351The field was binary.
1352
1353=back
1354
1355See the C<is_***> methods below.
1356
1357=head2 is_quoted
1358
1359 my $quoted = $csv->is_quoted ($column_idx);
1360
1361Where C<$column_idx> is the (zero-based) index of the column in the last
1362result of L</parse>.
1363
1364This returns a true value if the data in the indicated column was enclosed
1365in L<C<quote_char>|/quote_char> quotes. This might be important for fields
1366where content C<,20070108,> is to be treated as a numeric value, and where
1367C<,"20070108",> is explicitly marked as character string data.
1368
1369This method is only valid when L</keep_meta_info> is set to a true value.
1370
1371=head2 is_binary
1372
1373 my $binary = $csv->is_binary ($column_idx);
1374
1375Where C<$column_idx> is the (zero-based) index of the column in the last
1376result of L</parse>.
1377
1378This returns a true value if the data in the indicated column contained any
1379byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
1380
1381This method is only valid when L</keep_meta_info> is set to a true value.
1382
1383=head2 is_missing
1384
1385 my $missing = $csv->is_missing ($column_idx);
1386
1387Where C<$column_idx> is the (zero-based) index of the column in the last
1388result of L</getline_hr>.
1389
1390 $csv->keep_meta_info (1);
1391 while (my $hr = $csv->getline_hr ($fh)) {
1392 $csv->is_missing (0) and next; # This was an empty line
1393 }
1394
1395When using L</getline_hr>, it is impossible to tell if the parsed fields
1396are C<undef> because they where not filled in the C<CSV> stream or because
1397they were not read at all, as B<all> the fields defined by L</column_names>
1398are set in the hash-ref. If you still need to know if all fields in each
1399row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
1400you can check the flags.
1401
1402If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will
1403always return C<undef>, regardless of C<$column_idx> being valid or not. If
1404this attribute is C<true> it will return either C<0> (the field is present)
1405or C<1> (the field is missing).
1406
1407A special case is the empty line. If the line is completely empty - after
1408dealing with the flags - this is still a valid CSV line: it is a record of
1409just one single empty field. However, if C<keep_meta_info> is set, invoking
1410C<is_missing> with index C<0> will now return true.
1411
1412=head2 status
1413
1414 $status = $csv->status ();
1415
1416This method returns the status of the last invoked L</combine> or L</parse>
1417call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
1418
1419=head2 error_input
1420
1421 $bad_argument = $csv->error_input ();
1422
1423This method returns the erroneous argument (if it exists) of L</combine> or
1424L</parse>, whichever was called more recently. If the last invocation was
1425successful, C<error_input> will return C<undef>.
1426
1427=head2 error_diag
1428
1429 Text::CSV->error_diag ();
1430 $csv->error_diag ();
1431 $error_code = 0 + $csv->error_diag ();
1432 $error_str = "" . $csv->error_diag ();
1433 ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
1434
1435If (and only if) an error occurred, this function returns the diagnostics
1436of that error.
1437
1438If called in void context, this will print the internal error code and the
1439associated error message to STDERR.
1440
1441If called in list context, this will return the error code and the error
1442message in that order. If the last error was from parsing, the rest of the
1443values returned are a best guess at the location within the line that was
1444being parsed. Their values are 1-based. The position currently is index of
1445the byte at which the parsing failed in the current record. It might change
1446to be the index of the current character in a later release. The records is
1447the index of the record parsed by the csv instance. The field number is the
1448index of the field the parser thinks it is currently trying to parse. See
1449F<examples/csv-check> for how this can be used.
1450
1451If called in scalar context, it will return the diagnostics in a single
1452scalar, a-la C<$!>. It will contain the error code in numeric context, and
1453the diagnostics message in string context.
1454
1455When called as a class method or a direct function call, the diagnostics
1456are that of the last L</new> call.
1457
1458=head2 record_number
1459
1460 $recno = $csv->record_number ();
1461
1462Returns the records parsed by this csv instance. This value should be more
1463accurate than C<$.> when embedded newlines come in play. Records written by
1464this instance are not counted.
1465
1466=head2 SetDiag
1467
1468 $csv->SetDiag (0);
1469
1470Use to reset the diagnostics if you are dealing with errors.
1471
1472=head1 ADDITIONAL METHODS
1473
1474=over
1475
1476=item backend
1477
1478Returns the backend module name called by Text::CSV.
1479C<module> is an alias.
1480
1481=item is_xs
1482
1483Returns true value if Text::CSV uses an XS backend.
1484
1485=item is_pp
1486
1487Returns true value if Text::CSV uses a pure-Perl backend.
1488
1489=back
1490
1491=head1 FUNCTIONS
1492
1493This whole section is also taken from Text::CSV_XS.
1494
1495=head2 csv
1496
1497This function is not exported by default and should be explicitly requested:
1498
1499 use Text::CSV qw( csv );
1500
1501This is an high-level function that aims at simple (user) interfaces. This
1502can be used to read/parse a C<CSV> file or stream (the default behavior) or
1503to produce a file or write to a stream (define the C<out> attribute). It
1504returns an array- or hash-reference on parsing (or C<undef> on fail) or the
1505numeric value of L</error_diag> on writing. When this function fails you
1506can get to the error using the class call to L</error_diag>
1507
1508 my $aoa = csv (in => "test.csv") or
1509 die Text::CSV->error_diag;
1510
1511This function takes the arguments as key-value pairs. This can be passed as
1512a list or as an anonymous hash:
1513
1514 my $aoa = csv ( in => "test.csv", sep_char => ";");
1515 my $aoh = csv ({ in => $fh, headers => "auto" });
1516
1517The arguments passed consist of two parts: the arguments to L</csv> itself
1518and the optional attributes to the C<CSV> object used inside the function
1519as enumerated and explained in L</new>.
1520
1521If not overridden, the default option used for CSV is
1522
1523 auto_diag => 1
1524 escape_null => 0
1525
1526The option that is always set and cannot be altered is
1527
1528 binary => 1
1529
1530As this function will likely be used in one-liners, it allows C<quote> to
1531be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
1532or C<escape>.
1533
1534Alternative invocations:
1535
1536 my $aoa = Text::CSV::csv (in => "file.csv");
1537
1538 my $csv = Text::CSV->new ();
1539 my $aoa = $csv->csv (in => "file.csv");
1540
1541In the latter case, the object attributes are used from the existing object
1542and the attribute arguments in the function call are ignored:
1543
1544 my $csv = Text::CSV->new ({ sep_char => ";" });
1545 my $aoh = $csv->csv (in => "file.csv", bom => 1);
1546
1547will parse using C<;> as C<sep_char>, not C<,>.
1548
1549=head3 in
1550
1551Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
1552which will be opened for reading and closed when finished, a file handle
1553(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
1554itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
1555
1556When used with L</out>, C<in> should be a reference to a CSV structure (AoA
1557or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
1558The code-ref will be invoked with no arguments.
1559
1560 my $aoa = csv (in => "file.csv");
1561
1562 open my $fh, "<", "file.csv";
1563 my $aoa = csv (in => $fh);
1564
1565 my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
1566 my $err = csv (in => $csv, out => "file.csv");
1567
1568If called in void context without the L</out> attribute, the resulting ref
1569will be used as input to a subsequent call to csv:
1570
1571 csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
1572
1573will be a shortcut to
1574
1575 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
1576
1577where, in the absence of the C<out> attribute, this is a shortcut to
1578
1579 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
1580 out => *STDOUT)
1581
1582=head3 out
1583
1584In output mode, the default CSV options when producing CSV are
1585
1586 eol => "\r\n"
1587
1588The L</fragment> attribute is ignored in output mode.
1589
1590C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
1591writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
1592reference to a glob (e.g. C<\*STDOUT>), or the glob itself (e.g. C<*STDOUT>).
1593
1594 csv (in => sub { $sth->fetch }, out => "dump.csv");
1595 csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
1596 headers => $sth->{NAME_lc});
1597
1598When a code-ref is used for C<in>, the output is generated per invocation,
1599so no buffering is involved. This implies that there is no size restriction
1600on the number of records. The C<csv> function ends when the coderef returns
1601a false value.
1602
1603=head3 encoding
1604
1605If passed, it should be an encoding accepted by the C<:encoding()> option
1606to C<open>. There is no default value. This attribute does not work in perl
16075.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
1608line invocations.
1609
1610If C<encoding> is set to the literal value C<"auto">, the method L</header>
1611will be invoked on the opened stream to check if there is a BOM and set the
1612encoding accordingly. This is equal to passing a true value in the option
1613L<C<detect_bom>|/detect_bom>.
1614
1615=head3 detect_bom
1616
1617If C<detect_bom> is given, the method L</header> will be invoked on the
1618opened stream to check if there is a BOM and set the encoding accordingly.
1619
1620C<detect_bom> can be abbreviated to C<bom>.
1621
1622This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
1623
1624Note that as L</header> is invoked, its default is to also set the headers.
1625
1626=head3 headers
1627
1628If this attribute is not given, the default behavior is to produce an array
1629of arrays.
1630
1631If C<headers> is supplied, it should be an anonymous list of column names,
1632an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
1633or C<skip>.
1634
1635=over 2
1636
1637=item skip
1638
1639When C<skip> is used, the header will not be included in the output.
1640
1641 my $aoa = csv (in => $fh, headers => "skip");
1642
1643=item auto
1644
1645If C<auto> is used, the first line of the C<CSV> source will be read as the
1646list of field headers and used to produce an array of hashes.
1647
1648 my $aoh = csv (in => $fh, headers => "auto");
1649
1650=item lc
1651
1652If C<lc> is used, the first line of the C<CSV> source will be read as the
1653list of field headers mapped to lower case and used to produce an array of
1654hashes. This is a variation of C<auto>.
1655
1656 my $aoh = csv (in => $fh, headers => "lc");
1657
1658=item uc
1659
1660If C<uc> is used, the first line of the C<CSV> source will be read as the
1661list of field headers mapped to upper case and used to produce an array of
1662hashes. This is a variation of C<auto>.
1663
1664 my $aoh = csv (in => $fh, headers => "uc");
1665
1666=item CODE
1667
1668If a coderef is used, the first line of the C<CSV> source will be read as
1669the list of mangled field headers in which each field is passed as the only
1670argument to the coderef. This list is used to produce an array of hashes.
1671
1672 my $aoh = csv (in => $fh,
1673 headers => sub { lc ($_[0]) =~ s/kode/code/gr });
1674
1675this example is a variation of using C<lc> where all occurrences of C<kode>
1676are replaced with C<code>.
1677
1678=item ARRAY
1679
1680If C<headers> is an anonymous list, the entries in the list will be used
1681as field names. The first line is considered data instead of headers.
1682
1683 my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
1684 csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
1685
1686=item HASH
1687
1688If C<headers> is an hash reference, this implies C<auto>, but header fields
1689for that exist as key in the hashref will be replaced by the value for that
1690key. Given a CSV file like
1691
1692 post-kode,city,name,id number,fubble
1693 1234AA,Duckstad,Donald,13,"X313DF"
1694
1695using
1696
1697 csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
1698
1699will return an entry like
1700
1701 { pc => "1234AA",
1702 city => "Duckstad",
1703 name => "Donald",
1704 ID => "13",
1705 fubble => "X313DF",
1706 }
1707
1708=back
1709
1710See also L<C<munge_column_names>|/munge_column_names> and
1711L<C<set_column_names>|/set_column_names>.
1712
1713=head3 munge_column_names
1714
1715If C<munge_column_names> is set, the method L</header> is invoked on the
1716opened stream with all matching arguments to detect and set the headers.
1717
1718C<munge_column_names> can be abbreviated to C<munge>.
1719
1720=head3 key
1721
1722If passed, will default L<C<headers>|/headers> to C<"auto"> and return a
1723hashref instead of an array of hashes.
1724
1725 my $ref = csv (in => "test.csv", key => "code");
1726
1727with test.csv like
1728
1729 code,product,price,color
1730 1,pc,850,gray
1731 2,keyboard,12,white
1732 3,mouse,5,black
1733
1734will return
1735
1736 { 1 => {
1737 code => 1,
1738 color => 'gray',
1739 price => 850,
1740 product => 'pc'
1741 },
1742 2 => {
1743 code => 2,
1744 color => 'white',
1745 price => 12,
1746 product => 'keyboard'
1747 },
1748 3 => {
1749 code => 3,
1750 color => 'black',
1751 price => 5,
1752 product => 'mouse'
1753 }
1754 }
1755
1756=head3 fragment
1757
1758Only output the fragment as defined in the L</fragment> method. This option
1759is ignored when I<generating> C<CSV>. See L</out>.
1760
1761Combining all of them could give something like
1762
1763 use Text::CSV qw( csv );
1764 my $aoh = csv (
1765 in => "test.txt",
1766 encoding => "utf-8",
1767 headers => "auto",
1768 sep_char => "|",
1769 fragment => "row=3;6-9;15-*",
1770 );
1771 say $aoh->[15]{Foo};
1772
1773=head3 sep_set
1774
1775If C<sep_set> is set, the method L</header> is invoked on the opened stream
1776to detect and set L<C<sep_char>|/sep_char> with the given set.
1777
1778C<sep_set> can be abbreviated to C<seps>.
1779
1780Note that as L</header> is invoked, its default is to also set the headers.
1781
1782=head3 set_column_names
1783
1784If C<set_column_names> is passed, the method L</header> is invoked on the
1785opened stream with all arguments meant for L</header>.
1786
1787=head2 Callbacks
1788
1789Callbacks enable actions triggered from the I<inside> of Text::CSV.
1790
1791While most of what this enables can easily be done in an unrolled loop as
1792described in the L</SYNOPSIS> callbacks can be used to meet special demands
1793or enhance the L</csv> function.
1794
1795=over 2
1796
1797=item error
1798
1799 $csv->callbacks (error => sub { $csv->SetDiag (0) });
1800
1801the C<error> callback is invoked when an error occurs, but I<only> when
1802L</auto_diag> is set to a true value. A callback is invoked with the values
1803returned by L</error_diag>:
1804
1805 my ($c, $s);
1806
1807 sub ignore3006
1808 {
1809 my ($err, $msg, $pos, $recno, $fldno) = @_;
1810 if ($err == 3006) {
1811 # ignore this error
1812 ($c, $s) = (undef, undef);
1813 Text::CSV->SetDiag (0);
1814 }
1815 # Any other error
1816 return;
1817 } # ignore3006
1818
1819 $csv->callbacks (error => \&ignore3006);
1820 $csv->bind_columns (\$c, \$s);
1821 while ($csv->getline ($fh)) {
1822 # Error 3006 will not stop the loop
1823 }
1824
1825=item after_parse
1826
1827 $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
1828 while (my $row = $csv->getline ($fh)) {
1829 $row->[-1] eq "NEW";
1830 }
1831
1832This callback is invoked after parsing with L</getline> only if no error
1833occurred. The callback is invoked with two arguments: the current C<CSV>
1834parser object and an array reference to the fields parsed.
1835
1836The return code of the callback is ignored unless it is a reference to the
1837string "skip", in which case the record will be skipped in L</getline_all>.
1838
1839 sub add_from_db
1840 {
1841 my ($csv, $row) = @_;
1842 $sth->execute ($row->[4]);
1843 push @$row, $sth->fetchrow_array;
1844 } # add_from_db
1845
1846 my $aoa = csv (in => "file.csv", callbacks => {
1847 after_parse => \&add_from_db });
1848
1849This hook can be used for validation:
1850
1851=over 2
1852
1853=item FAIL
1854
1855Die if any of the records does not validate a rule:
1856
1857 after_parse => sub {
1858 $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
1859 die "5th field does not have a valid Dutch zipcode";
1860 }
1861
1862=item DEFAULT
1863
1864Replace invalid fields with a default value:
1865
1866 after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
1867
1868=item SKIP
1869
1870Skip records that have invalid fields (only applies to L</getline_all>):
1871
1872 after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
1873
1874=back
1875
1876=item before_print
1877
1878 my $idx = 1;
1879 $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
1880 $csv->print (*STDOUT, [ 0, $_ ]) for @members;
1881
1882This callback is invoked before printing with L</print> only if no error
1883occurred. The callback is invoked with two arguments: the current C<CSV>
1884parser object and an array reference to the fields passed.
1885
1886The return code of the callback is ignored.
1887
1888 sub max_4_fields
1889 {
1890 my ($csv, $row) = @_;
1891 @$row > 4 and splice @$row, 4;
1892 } # max_4_fields
1893
1894 csv (in => csv (in => "file.csv"), out => *STDOUT,
1895 callbacks => { before print => \&max_4_fields });
1896
1897This callback is not active for L</combine>.
1898
1899=back
1900
1901=head3 Callbacks for csv ()
1902
1903The L</csv> allows for some callbacks that do not integrate in XS internals
1904but only feature the L</csv> function.
1905
1906 csv (in => "file.csv",
1907 callbacks => {
1908 filter => { 6 => sub { $_ > 15 } }, # first
1909 after_parse => sub { say "AFTER PARSE"; }, # first
1910 after_in => sub { say "AFTER IN"; }, # second
1911 on_in => sub { say "ON IN"; }, # third
1912 },
1913 );
1914
1915 csv (in => $aoh,
1916 out => "file.csv",
1917 callbacks => {
1918 on_in => sub { say "ON IN"; }, # first
1919 before_out => sub { say "BEFORE OUT"; }, # second
1920 before_print => sub { say "BEFORE PRINT"; }, # third
1921 },
1922 );
1923
1924=over 2
1925
1926=item filter
1927
1928This callback can be used to filter records. It is called just after a new
1929record has been scanned. The callback accepts a hashref where the keys are
1930the index to the row (the field number, 1-based) and the values are subs to
1931return a true or false value.
1932
1933 csv (in => "file.csv", filter => {
1934 3 => sub { m/a/ }, # third field should contain an "a"
1935 5 => sub { length > 4 }, # length of the 5th field minimal 5
1936 });
1937
1938 csv (in => "file.csv", filter => "not_blank");
1939 csv (in => "file.csv", filter => "not_empty");
1940 csv (in => "file.csv", filter => "filled");
1941
1942If the keys to the filter hash contain any character that is not a digit it
1943will also implicitly set L</headers> to C<"auto"> unless L</headers> was
1944already passed as argument. When headers are active, returning an array of
1945hashes, the filter is not applicable to the header itself.
1946
1947 csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
1948
1949All sub results should match, as in AND.
1950
1951The context of the callback sets C<$_> localized to the field indicated by
1952the filter. The two arguments are as with all other callbacks, so the other
1953fields in the current row can be seen:
1954
1955 filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
1956
1957If the context is set to return a list of hashes (L</headers> is defined),
1958the current record will also be available in the localized C<%_>:
1959
1960 filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
1961
1962If the filter is used to I<alter> the content by changing C<$_>, make sure
1963that the sub returns true in order not to have that record skipped:
1964
1965 filter => { 2 => sub { $_ = uc }}
1966
1967will upper-case the second field, and then skip it if the resulting content
1968evaluates to false. To always accept, end with truth:
1969
1970 filter => { 2 => sub { $_ = uc; 1 }}
1971
1972B<Predefined filters>
1973
1974Given a file like (line numbers prefixed for doc purpose only):
1975
1976 1:1,2,3
1977 2:
1978 3:,
1979 4:""
1980 5:,,
1981 6:, ,
1982 7:"",
1983 8:" "
1984 9:4,5,6
1985
1986=over 2
1987
1988=item not_blank
1989
1990Filter out the blank lines
1991
1992This filter is a shortcut for
1993
1994 filter => { 0 => sub { @{$_[1]} > 1 or
1995 defined $_[1][0] && $_[1][0] ne "" } }
1996
1997Due to the implementation, it is currently impossible to also filter lines
1998that consists only of a quoted empty field. These lines are also considered
1999blank lines.
2000
2001With the given example, lines 2 and 4 will be skipped.
2002
2003=item not_empty
2004
2005Filter out lines where all the fields are empty.
2006
2007This filter is a shortcut for
2008
2009 filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
2010
2011A space is not regarded being empty, so given the example data, lines 2, 3,
20124, 5, and 7 are skipped.
2013
2014=item filled
2015
2016Filter out lines that have no visible data
2017
2018This filter is a shortcut for
2019
2020 filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
2021
2022This filter rejects all lines that I<not> have at least one field that does
2023not evaluate to the empty string.
2024
2025With the given example data, this filter would skip lines 2 through 8.
2026
2027=back
2028
2029=item after_in
2030
2031This callback is invoked for each record after all records have been parsed
2032but before returning the reference to the caller. The hook is invoked with
2033two arguments: the current C<CSV> parser object and a reference to the
2034record. The reference can be a reference to a HASH or a reference to an
2035ARRAY as determined by the arguments.
2036
2037This callback can also be passed as an attribute without the C<callbacks>
2038wrapper.
2039
2040=item before_out
2041
2042This callback is invoked for each record before the record is printed. The
2043hook is invoked with two arguments: the current C<CSV> parser object and a
2044reference to the record. The reference can be a reference to a HASH or a
2045reference to an ARRAY as determined by the arguments.
2046
2047This callback can also be passed as an attribute without the C<callbacks>
2048wrapper.
2049
2050This callback makes the row available in C<%_> if the row is a hashref. In
2051this case C<%_> is writable and will change the original row.
2052
2053=item on_in
2054
2055This callback acts exactly as the L</after_in> or the L</before_out> hooks.
2056
2057This callback can also be passed as an attribute without the C<callbacks>
2058wrapper.
2059
2060This callback makes the row available in C<%_> if the row is a hashref. In
2061this case C<%_> is writable and will change the original row. So e.g. with
2062
2063 my $aoh = csv (
2064 in => \"foo\n1\n2\n",
2065 headers => "auto",
2066 on_in => sub { $_{bar} = 2; },
2067 );
2068
2069C<$aoh> will be:
2070
2071 [ { foo => 1,
2072 bar => 2,
2073 }
2074 { foo => 2,
2075 bar => 2,
2076 }
2077 ]
2078
2079=item csv
2080
2081The I<function> L</csv> can also be called as a method or with an existing
2082Text::CSV object. This could help if the function is to be invoked a lot
2083of times and the overhead of creating the object internally over and over
2084again would be prevented by passing an existing instance.
2085
2086 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
2087
2088 my $aoa = $csv->csv (in => $fh);
2089 my $aoa = csv (in => $fh, csv => $csv);
2090
2091both act the same. Running this 20000 times on a 20 lines CSV file, showed
2092a 53% speedup.
2093
2094=back
2095
2096=head1 DIAGNOSTICS
2097
2098This section is also taken from Text::CSV_XS.
2099
2100If an error occurs, C<< $csv->error_diag >> can be used to get information
2101on the cause of the failure. Note that for speed reasons the internal value
2102is never cleared on success, so using the value returned by L</error_diag>
2103in normal cases - when no error occurred - may cause unexpected results.
2104
2105If the constructor failed, the cause can be found using L</error_diag> as a
2106class method, like C<< Text::CSV_PP->error_diag >>.
2107
2108The C<< $csv->error_diag >> method is automatically invoked upon error when
2109the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or
2110C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
2111C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
2112EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
2113
2114Errors can be (individually) caught using the L</error> callback.
2115
2116The errors as described below are available. I have tried to make the error
2117itself explanatory enough, but more descriptions will be added. For most of
2118these errors, the first three capitals describe the error category:
2119
2120=over 2
2121
2122=item *
2123INI
2124
2125Initialization error or option conflict.
2126
2127=item *
2128ECR
2129
2130Carriage-Return related parse error.
2131
2132=item *
2133EOF
2134
2135End-Of-File related parse error.
2136
2137=item *
2138EIQ
2139
2140Parse error inside quotation.
2141
2142=item *
2143EIF
2144
2145Parse error inside field.
2146
2147=item *
2148ECB
2149
2150Combine error.
2151
2152=item *
2153EHR
2154
2155HashRef parse related error.
2156
2157=back
2158
2159And below should be the complete list of error codes that can be returned:
2160
2161=over 2
2162
2163=item *
21641001 "INI - sep_char is equal to quote_char or escape_char"
2165X<1001>
2166
2167The L<separation character|/sep_char> cannot be equal to L<the quotation
2168character|/quote_char> or to L<the escape character|/escape_char>, as this
2169would invalidate all parsing rules.
2170
2171=item *
21721002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
2173X<1002>
2174
2175Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either
2176L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to
2177C<SPACE> or C<TAB> is too ambiguous to allow.
2178
2179=item *
21801003 "INI - \r or \n in main attr not allowed"
2181X<1003>
2182
2183Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
2184L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not
2185allowed.
2186
2187=item *
21881004 "INI - callbacks should be undef or a hashref"
2189X<1004>
2190
2191The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or
2192a hash reference.
2193
2194=item *
21951005 "INI - EOL too long"
2196X<1005>
2197
2198The value passed for EOL is exceeding its maximum length (16).
2199
2200=item *
22011006 "INI - SEP too long"
2202X<1006>
2203
2204The value passed for SEP is exceeding its maximum length (16).
2205
2206=item *
22071007 "INI - QUOTE too long"
2208X<1007>
2209
2210The value passed for QUOTE is exceeding its maximum length (16).
2211
2212=item *
22131008 "INI - SEP undefined"
2214X<1008>
2215
2216The value passed for SEP should be defined and not empty.
2217
2218=item *
22191010 "INI - the header is empty"
2220X<1010>
2221
2222The header line parsed in the L</header> is empty.
2223
2224=item *
22251011 "INI - the header contains more than one valid separator"
2226X<1011>
2227
2228The header line parsed in the L</header> contains more than one (unique)
2229separator character out of the allowed set of separators.
2230
2231=item *
22321012 "INI - the header contains an empty field"
2233X<1012>
2234
2235The header line parsed in the L</header> is contains an empty field.
2236
2237=item *
22381013 "INI - the header contains nun-unique fields"
2239X<1013>
2240
2241The header line parsed in the L</header> contains at least two identical
2242fields.
2243
2244=item *
22451014 "INI - header called on undefined stream"
2246X<1014>
2247
2248The header line cannot be parsed from an undefined sources.
2249
2250=item *
22511500 "PRM - Invalid/unsupported argument(s)"
2252X<1500>
2253
2254Function or method called with invalid argument(s) or parameter(s).
2255
2256=item *
22572010 "ECR - QUO char inside quotes followed by CR not part of EOL"
2258X<2010>
2259
2260When L<C<eol>|/eol> has been set to anything but the default, like
2261C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
2262L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
2263not make up the L<C<eol>|/eol> sequence, this is an error.
2264
2265=item *
22662011 "ECR - Characters after end of quoted field"
2267X<2011>
2268
2269Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
2270field and after the closing double-quote, there should be either a new-line
2271sequence or a separation character.
2272
2273=item *
22742012 "EOF - End of data in parsing input stream"
2275X<2012>
2276
2277Self-explaining. End-of-file while inside parsing a stream. Can happen only
2278when reading from streams with L</getline>, as using L</parse> is done on
2279strings that are not required to have a trailing L<C<eol>|/eol>.
2280
2281=item *
22822013 "INI - Specification error for fragments RFC7111"
2283X<2013>
2284
2285Invalid specification for URI L</fragment> specification.
2286
2287=item *
22882021 "EIQ - NL char inside quotes, binary off"
2289X<2021>
2290
2291Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
2292has been selected with the constructor.
2293
2294=item *
22952022 "EIQ - CR char inside quotes, binary off"
2296X<2022>
2297
2298Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
2299has been selected with the constructor.
2300
2301=item *
23022023 "EIQ - QUO character not allowed"
2303X<2023>
2304
2305Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
2306will cause this error.
2307
2308=item *
23092024 "EIQ - EOF cannot be escaped, not even inside quotes"
2310X<2024>
2311
2312The escape character is not allowed as last character in an input stream.
2313
2314=item *
23152025 "EIQ - Loose unescaped escape"
2316X<2025>
2317
2318An escape character should escape only characters that need escaping.
2319
2320Allowing the escape for other characters is possible with the attribute
2321L</allow_loose_escape>.
2322
2323=item *
23242026 "EIQ - Binary character inside quoted field, binary off"
2325X<2026>
2326
2327Binary characters are not allowed by default. Exceptions are fields that
2328contain valid UTF-8, that will automatically be upgraded if the content is
2329valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
2330
2331=item *
23322027 "EIQ - Quoted field not terminated"
2333X<2027>
2334
2335When parsing a field that started with a quotation character, the field is
2336expected to be closed with a quotation character. When the parsed line is
2337exhausted before the quote is found, that field is not terminated.
2338
2339=item *
23402030 "EIF - NL char inside unquoted verbatim, binary off"
2341X<2030>
2342
2343=item *
23442031 "EIF - CR char is first char of field, not part of EOL"
2345X<2031>
2346
2347=item *
23482032 "EIF - CR char inside unquoted, not part of EOL"
2349X<2032>
2350
2351=item *
23522034 "EIF - Loose unescaped quote"
2353X<2034>
2354
2355=item *
23562035 "EIF - Escaped EOF in unquoted field"
2357X<2035>
2358
2359=item *
23602036 "EIF - ESC error"
2361X<2036>
2362
2363=item *
23642037 "EIF - Binary character in unquoted field, binary off"
2365X<2037>
2366
2367=item *
23682110 "ECB - Binary character in Combine, binary off"
2369X<2110>
2370
2371=item *
23722200 "EIO - print to IO failed. See errno"
2373X<2200>
2374
2375=item *
23763001 "EHR - Unsupported syntax for column_names ()"
2377X<3001>
2378
2379=item *
23803002 "EHR - getline_hr () called before column_names ()"
2381X<3002>
2382
2383=item *
23843003 "EHR - bind_columns () and column_names () fields count mismatch"
2385X<3003>
2386
2387=item *
23883004 "EHR - bind_columns () only accepts refs to scalars"
2389X<3004>
2390
2391=item *
23923006 "EHR - bind_columns () did not pass enough refs for parsed fields"
2393X<3006>
2394
2395=item *
23963007 "EHR - bind_columns needs refs to writable scalars"
2397X<3007>
2398
2399=item *
24003008 "EHR - unexpected error in bound fields"
2401X<3008>
2402
2403=item *
24043009 "EHR - print_hr () called before column_names ()"
2405X<3009>
2406
2407=item *
24083010 "EHR - print_hr () called with invalid arguments"
2409X<3010>
2410
2411=back
2412
2413=head1 SEE ALSO
2414
2415L<Text::CSV_PP>, L<Text::CSV_XS> and L<Text::CSV::Encoded>.
2416
2417
2418=head1 AUTHORS and MAINTAINERS
2419
2420Alan Citterman F<E<lt>alan[at]mfgrtl.comE<gt>> wrote the original Perl
2421module. Please don't send mail concerning Text::CSV to Alan, as
2422he's not a present maintainer.
2423
2424Jochen Wiedmann F<E<lt>joe[at]ispsoft.deE<gt>> rewrote the encoding and
2425decoding in C by implementing a simple finite-state machine and added
2426the variable quote, escape and separator characters, the binary mode
2427and the print and getline methods. See ChangeLog releases 0.10 through
24280.23.
2429
2430H.Merijn Brand F<E<lt>h.m.brand[at]xs4all.nlE<gt>> cleaned up the code,
2431added the field flags methods, wrote the major part of the test suite,
2432completed the documentation, fixed some RT bugs. See ChangeLog releases
24330.25 and on.
2434
2435Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> wrote Text::CSV_PP
2436which is the pure-Perl version of Text::CSV_XS.
2437
2438New Text::CSV (since 0.99) is maintained by Makamaka, and Kenichi Ishigaki
2439since 1.91.
2440
2441
2442=head1 COPYRIGHT AND LICENSE
2443
2444Text::CSV
2445
2446Copyright (C) 1997 Alan Citterman. All rights reserved.
2447Copyright (C) 2007-2015 Makamaka Hannyaharamitu.
2448Copyright (C) 2017- Kenichi Ishigaki
2449A large portion of the doc is taken from Text::CSV_XS. See below.
2450
2451Text::CSV_PP:
2452
2453Copyright (C) 2005-2015 Makamaka Hannyaharamitu.
2454Copyright (C) 2017- Kenichi Ishigaki
2455A large portion of the code/doc are also taken from Text::CSV_XS. See below.
2456
2457Text:CSV_XS:
2458
2459Copyright (C) 2007-2016 H.Merijn Brand for PROCURA B.V.
2460Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
2461Portions Copyright (C) 1997 Alan Citterman. All rights reserved.
2462
2463
2464This library is free software; you can redistribute it and/or modify
2465it under the same terms as Perl itself.
2466
2467=cut