########################################################### # A Perl package for showing/modifying JPEG (meta)data. # # Copyright (C) 2004,2005,2006 Stefano Bettelli # # See the COPYING and LICENSE files for license terms. # ########################################################### package Image::MetaData::JPEG::Segment; use Image::MetaData::JPEG::Tables qw(:JPEGgrammar :Endianness :RecordTypes); use Image::MetaData::JPEG::Backtrace; use Image::MetaData::JPEG::Record; no integer; use strict; use warnings; ########################################################### # These simple methods should be used instead of standard # # "warn" and "die" in this package; they print a much # # more elaborated error message (including a stack trace).# # Warnings can be turned off altogether simply by setting # # Image::MetaData::JPEG::show_warnings to false. # ########################################################### sub warn { my ($this, $message) = @_; warn Image::MetaData::JPEG::Backtrace::backtrace ($message, "Warning" . $this->info(), $this) if $Image::MetaData::JPEG::show_warnings; } sub die { my ($this, $message) = @_; die Image::MetaData::JPEG::Backtrace::backtrace ($message, "Fatal error" . $this->info(), $this);} sub info { my ($this) = @_; my $name = $this->{name} || ''; return " [segment type $name]"; } ########################################################### # JPEG segment header constructor. Its arguments are: the # # segment type (a multicharacter string, not the marker), # # a reference to a raw data buffer and a parse flag. The # # raw buffer is saved internally through its reference # # (no copy is done). If its parse flag does not match # # "NOPARSE", and its type is parseable, the Segment has # # its key-value pairs extracted to JPEG::Record's in the # # 'records' list. # #=========================================================# # The first four bytes in the Segment mean: # # # # 2 bytes segment marker (0xff..) # # 2 bytes length (including this value) # # # # The marker is a two byte value, whose first byte is # # always 0xff. The value of the second byte defines the # # segment type. It is assumed that the buffer which is # # passed to this constructor DOES NOT contain these four # # bytes; in fact, the segment type can be deduced by its # # symbolic name (first argument), and the buffer size can # # be calculated with the length() function. This simpli- # # fies a lot of repetitive code, but it must be kept in # # mind when the file is written back to the filesystem. # #=========================================================# # $this->{endianness} (a private field) contains the # # current endianness, i.e. the endianness to be used for # # reading the next values while parsing the data area. # # Its significant is therefore only transient, and it is # # set to undef at the end of the constructor. # #=========================================================# # $this->{error} is normally set to "undef". If, however, # # an error occurred during the parsing stage in the cons- # # tructor, this variable is set to an error message. The # # intended use is the following: a Segment with errors # # can be inspected (partially, of course, because parsing # # did not terminate correctly) but not modified (that is, # # the update method, which overwrites the area pointed to # # by $this->{dataref}, must be inhibited): it can only be # # rewritten to disk as it is. # ########################################################### sub new { my ($pkg, $name, $dataref, $flag) = @_; # if $dataref is undef, point it to a *modifiable* empty string my $this = bless { name => $name, dataref => defined $dataref ? $dataref : \ (my $ns = ''), records => [], error => undef, endianness => undef, }, $pkg; # die on various error conditions $this->die('Invalid segment name') unless defined $name && ! ref $name; $this->die('Invalid data reference') if defined $dataref && ! ref $dataref; # parse the segment (pass the $flag) $this->parse($flag); # return a reference to the constructed object return $this; } ########################################################### # This method parses or reparses the current segment. It # # only dispatches the flow to specific subroutines based # # on the segment name. The error flag is reset to undef # # before parse_*, so that, at the end, it reflects only # # errors occurred during this parse session. If the $flag # # argument is set to "NOPARSE", this method simulates an # # error and refuses to proceed further. The parsed data # # array "@records" is flushed when entering this routine. # #=========================================================# # Segment parsing is enclosed in an eval block, so that # # errors are not fatal (they work as trapped exceptions, # # and the die-string is converted into a message). # #=========================================================# # See also the notes in the constructor about the private # # var. $this->{endianness} and the use of $this->{error}. # ########################################################### sub parse { my ($this, $flag) = @_; # locally set endianness to big endian local $this->{endianness} = $BIG_ENDIAN; # reset the error flag and clear the data set $this->{error} = undef; $this->{records} = []; # call the specific parse routines inside an eval block, # so that errors are not fatal... eval { # if $flag matches "NOPARSE", we don't need to parse goto STOP_PARSING if ($flag && $flag =~ /NOPARSE/); # this is a stupid Perl-style switch for ($this->{name}) { # parse all informative tags $_ eq 'COM' ? $this->parse_com() : # User comments $_ eq 'APP0' ? $this->parse_app0() : # JFIF $_ eq 'APP1' ? $this->parse_app1() : # Exif or XMP $_ eq 'APP2' ? $this->parse_app2() : # FPXR or ICC_Prof $_ eq 'APP3' ? $this->parse_app3() : # Additonal metadata $_ eq 'APP4' ? $this->parse_unknown() : # HPSC $_ eq 'APP12' ? $this->parse_app12() : # PreExif ascii meta $_ eq 'APP13' ? $this->parse_app13() : # IPTC and Photoshop $_ eq 'APP14' ? $this->parse_app14() : # Adobe tags # parse all JPEG image tags (SOI, EOI and RST* are trivial) /^(SOI|EOI|RST)$/ ? do { /nothing/ } : $_ eq 'DQT' ? $this->parse_dqt() : $_ eq 'DHT' ? $this->parse_dht() : $_ eq 'DAC' ? $this->parse_dac() : /^SOF|DHP/ ? $this->parse_sof() : $_ eq 'SOS' ? $this->parse_sos() : $_ eq 'DNL' ? $this->parse_dnl() : $_ eq 'DRI' ? $this->parse_dri() : $_ eq 'EXP' ? $this->parse_exp() : # this is the fallback case $this->parse_unknown(); }; STOP_PARSING: }; # parsing was ok if no error was catched by the eval. # Update the "error" member here to reflect this fact. $this->{error} = $@ if $@; } ########################################################### # This method re-executes the parsing of a segment after # # changing the segment nature (well, its name). This is # # very handy if you have a JPEG file with a correct appli-# # cation segment exception made for its name. I used it # # the first time for a file having an ICC_profile segment # # (usually in APP2) stored as APP13. Note that the name # # of the segment is permanently changed, so, if the file # # is rewritten to disk, it will be "correct". # ########################################################### sub reparse_as { my ($this, $new_name) = @_; # change the nature of this segment by overwriting its name $this->{name} = $new_name; # re-execute the parsing $this->parse(); } ########################################################### # This method is the entry point for dumping the data # # structures stored in the records into the private data # # area. This method needs to be called before rewriting a # # file to the disk, if any record was changed/added/elimi-# # nated. The routine dispatches to more specific methods. # # ------------------------------------------------------- # # A segment with errors cannot be updated (a security # # measure: do not update what you do not understand). # # Entropy-coded segments or past-the-end garbage do not # # need being updated: the method returns immediately. # # ------------------------------------------------------- # # update() saves a reference to the old segment data area # # and restores it if the specialised update routine fails.# # This only generate a warning! Are there cleverer ways # # to handle this case? It is however better to have a # # corrupt object in memory, than a corrupt object written # # over the original. Currently, this is restricted to the # # possibility that an updated segment becomes too large. # ########################################################### sub update { my ($this) = @_; # get the name of the segment my $name = $this->{name}; # return immediately if this is an entropy-coded segment or # past-the-end garbage. There is no need to "update" them return if $name =~ /ECS|Post-EOI/; # if the segment was not correctly parsed, warn and return $this->die('This segment is faulty') if $this->{error}; # this might come also from 'NOPARSE' $this->die('This segment has no records') unless @{$this->{records}}; # save a copy of the old data area. my $old_content = $this->{dataref}; # blank the data area (do not assign directly to a reference to the # null string, since it is not modifiable in some implementations!) $this->{dataref} = \ (my $ns = ''); # an error variable for specific update routines my $error = undef; # call more specific routines for segments we know how # to update. Generate an error if the type is not managed. # (SOI, EOI and RST* are trivial and should not get here) for ($name) { $error = $this->dump_com(), next if $_ eq 'COM'; $error = $this->dump_app1(), next if $_ eq 'APP1'; $error = $this->dump_app13(), next if $_ eq 'APP13'; $error = "Update routine for '$_' not yet implemented"; } # get the size of the new data area my $length = $this->size(); # if new size is too large, set the error flag $error = "Segment '${name}' too large (len=${length}, " . "max=${JPEG_SEG_MAX_LEN})" if $length > $JPEG_SEG_MAX_LEN; # if the update failed, revert to the old content if ($error) { $this->warn("Update failed [$error]: reverting to old content ..."); $this->{dataref} = $old_content; } } ########################################################### # This method outputs the current segment data area into # # a file handle. The segment "preamble" is prepended, ex- # # ception made for raw data (scans). The preamble always # # includes the 0xff byte followed by the segment marker. # # A Segment which can accept real data also requires a # # two-byte data count. The return value is the error # # status of the print calls. # # ------------------------------------------------------- # # If the segment size is too large, a warning is printed # # and 0 is returned (this can make the file invalid); # # this is however just for debugging, I hope .... # #=========================================================# # Note that the data area of a segment can be void and, # # nonetheless, the segment might require a segment length # # word (e.g., a "" comment). In practise, the only seg- # # ments not needing the length word are SOI, EOI and RST*.# ########################################################### sub output_segment_data { my ($this, $out) = @_; # collect the name of the segment and the length of the data area my $name = $this->{name}; my $length = $this->size(); # Check segment length and throw an exception in case it is too # large. Do not run the check for raw data or past-the-end data. $this->die(sprintf('Segment %s too large (len=%d, max=%d)', $this->{name}, $length, $JPEG_SEG_MAX_LEN)) if $length > $JPEG_SEG_MAX_LEN && $name !~ /ECS|Post-EOI/; # prepare the segment header (not needed for a raw data segment) my $preamble = ( $name =~ /ECS|Post-EOI/ ? "" : pack("CC", $JPEG_PUNCTUATION, $JPEG_MARKER{$name}) ); # prepare the length word (not all segment types need it) $preamble .= pack("n", 2 + $length) unless $name =~ /SOI|EOI|RST|ECS|Post-EOI/; # output the preamble and the data buffer (return the status) return print {$out} $preamble, $this->data(0, $length); } ########################################################### # This method shows the content of the segment. It prints # # a header, then inspects the directory recursively. # ########################################################### sub get_description { my ($this) = @_; # prepare the marker and the error message my $amarker = $JPEG_MARKER{$this->{name}}; my $error = $this->{error}; chomp $error if defined $error; # prepare a header for this segment (was Segment_Banner) my $description = sprintf("%7dB ", $this->size()) . ($amarker ? sprintf "<0x%02x %5s>", $amarker, $this->{name} : sprintf "<%10s>", $this->{name} ) . ($error ? " {Faulty segment:\n $error}" : "") . "\n"; # a list for successive keys for numeric tag descriptions my $names = [ $this->{name} ]; # show all the records we have in our structures (recursively) $description .= $this->show_directory($this->{records}, $names); } ########################################################### # This method shows the content of a record directory in # # a segment; the first argument is a record list refe- # # rence; the second argument is a list to a list of names # # used to resolve numeric tags. A string is returned. # ########################################################### sub show_directory { my ($this, $records, $names) = @_; # protection againts invalid references return "" unless ref $records eq 'ARRAY'; # prepare the string to be returned at the end my $description = ""; # an initially empty list for remembering sub-dirs my @subdirs = (); # show all records in this directory foreach (@$records) { # show the record content $description .= $_->get_description($names); # if this is a subdir, remember its reference push @subdirs, $_ if $_->get_category() eq 'p'; } # for every subdir we found, recurse foreach (@subdirs) { # get the directory name and reference my ($dir_name, $directory) = ($_->{key}, $_->get_value()); # update the $names list push @$names, $dir_name; # print a sub-header for this directory $description .= Directory_Banner($names, $directory); # show the sub directory $description .= $this->show_directory($directory, $names); # pop the last dir name from @$names pop @$names; } # return the string we cooked up return $description; } ########################################################### # This helper function returns a string to be used as a # # generic header for a segment directory. # ########################################################### sub Directory_Banner { my ($names, $dirref) = @_; # protections against invalid references $names = [] unless ref $names eq 'ARRAY'; $dirref = [], push @$names, "[invalid]" unless ref $dirref eq 'ARRAY'; # prepare parts of the description my $buffer = join " --> ", @$names; my $decoration = "*" x 10; my $indentation = " \t" x scalar @$names; # complete the description and return it my $description = sprintf "%s%s %s %s (%2d records)", $indentation, $decoration, $buffer, $decoration, scalar @$dirref; return $description . "\n"; } ########################################################### # This helper method is used to test a size condition, # # i.e. that there is enough data (or exactly some amount # # of data) in the data buffer. If the test fails, it dies # ########################################################### sub test_size { my ($this, $required, $message) = @_; # positive $require: test not greater return if $required >= 0 && $this->size() >= $required; # negative $require: test equality (on -$required) return if $required < 0 && $this->size() == (- $required); # if test fails, call die and hope it is intercepted my $precise = ""; $message = defined $message ? "($message)" : ""; $required *= -1, $precise = "exactly " if $required < 0; $this->die(sprintf 'Size mismatch in segment %s %s:' . ' required %s%dB, found %dB.', $this->{name}, $message, $precise, $required, $this->size()); } ########################################################### # This is a helper method returning the size in bytes of # # the data area, i.e. that pointed to by $this->{dataref} # ########################################################### sub size { return length ${$_[0]{dataref}}; } ########################################################### # This helper method returns a substring of the data area # # (the arguments are offset and length). # ########################################################### sub data { substr(${$_[0]{dataref}}, $_[1], $_[2]); } ########################################################### # This helper method writes into the segment data area. # # The first argument is a scalar or a scalar reference, # # which (or whose content) is appended to the current # # buffer. The method returns the appended string length. # ########################################################### sub set_data { my ($this, $addenda) = @_; # get a reference to new data (remember that the # first argument can be a scalar or a scalar reference) my $addref = (ref $addenda) ? $addenda : \$addenda; # append the new data through the ref ${$this->{dataref}} .= $$addref; # return the amount of appended data return length $$addref; } ########################################################### # This private method processes the arguments for search # # routines, like search_record and provide_subdirectory. # # 1) a start directory is chosen by looking at the last # # argument: if it is an ARRAY ref it is popped out # # and used, otherwise the top-level directory (i.e., # # $this->{records}) is selected; # # 2) a $keystring is created by joining all remaining # # arguments on '@', then this string is exploded into # # a @keylist on the same character; # # 3) the start directory and the @keylist is returned. # ########################################################### sub process_search_args { my $this = shift; # empty list ==> push a single undefined value @_ = (undef) unless @_; # initialise the search directory: use the last argument if # it is an array reference, the top-level directory otherwise my $directory = ref $_[$#_] eq 'ARRAY' ? pop : $this->{records}; # delete all undefined or "false" arguments @_ = grep { defined $_ } @_; # join all remaining arguments my $keystring = join('@', @_); # split the resulting string on '@' my @keylist = split('@', $keystring); # delete all false arguments @keylist = grep { $_ } @keylist; # return processed arguments return ($directory, @keylist); } ########################################################### # This method searches for a record with a given key in a # # given record directory, returning a reference to the # # record if the search was fruitful, undef otherwise. # # The search is specified as follows: # # 1) a start directory is chosen by looking at the last # # argument: if it is an ARRAY ref it is popped out # # and used, otherwise the top-level directory (i.e., # # $this->{records}) is selected; # # 2) a $keystring is created by joining all remaining # # arguments on '@', then this string is exploded into # # a @keylist on the same character; # # 3) these keys are used for an iterative search start- # # ing from the initially chosen directory: all but # # the last key must correspond to $REFERENCE records. # # ------------------------------------------------------- # # If $key is exactly "FIRST_RECORD" / "LAST_RECORD", the # # first/last record in the current directory is selected. # ########################################################### sub search_record { my $this = shift; # transform the arguments my ($directory, @keylist) = $this->process_search_args(@_); # reset the searched $record to a fake record pointing to the root my $record = $this->create_record('Fake', $REFERENCE, \ $this->{records}); # search iteratively with all elements in @keylist for my $key (@keylist) { # exit the loop as soon as a key is undefined ($record = undef), last unless $key; # update the current $record $record = # reserved key "FIRST_RECORD" returns first record $key eq "FIRST_RECORD" ? $$directory[0] : # reserved key "LAST_RECORD" returns last record $key eq "LAST_RECORD" ? $$directory[$#$directory] : # standard search (get first matching record or undef) ((grep { $_->{key} eq $key } @$directory), undef)[0]; # stop if $record is undefined or is not a $REFERENCE last unless $record && $record->get_category() eq 'p'; # update $directory for next search $directory = $record->get_value(); } # return the search result return $record; } ########################################################### # A simple wrapper around search_record(): it returns the # # record value if the search is ok, undef otherwise. # ########################################################### sub search_record_value { my $this = shift; # call search_record passing all arguments through my $record = $this->search_record(@_); # return the record value if record is defined return $record ? $record->get_value() : undef; } ########################################################### # This method looks for a path of subdirectories from a # # given record list. The treatment of arguments is simi- # # lar to that of search_record: all arguments are joined # # to form a path specification, which is followed, and # # the last directory (record list) is returned. An optio- # # nal last argument may specify an initial directory for # # the search (this defaults to $this->{records}). If any # # subdir entry is not there, it is created on the fly. # ########################################################### sub provide_subdirectory { my $this = shift; # transform the arguments my ($dirref, @keylist) = $this->process_search_args(@_); # search iteratively with all elements in @keylist for my $key (@keylist) { # keys cannot be undefined $this->die('Undefined key') unless $key; # search the subdirectory record my $record = $this->search_record($key, $dirref) || $this->store_record($dirref, $key, $REFERENCE, \ []); # die if $record is not a $REFERENCE $this->die('Not a reference') unless $record->get_category() eq 'p'; # update $dirref for next search $dirref = $record->get_value(); } # return the search result return $dirref; } ########################################################### # This method creates a (possibly multi-valued) JPEG seg- # # ment record from a data buffer or from the segment data # # area, and it is the lowest level record-related method, # # the only one actually calling the JPEG::Record ctor. # # It needs the record identifier, the value type, [a sca- # # lar reference to read data from] or [the offset of the # # memory to read in the data area], and an optional count.# # A reference to the record is returned at the end . # #=========================================================# # If a scalar reference is passed, no check is performed # # on the size of the referenced scalar, because it is as- # # sumed that this is dealt with in the caller routine (be # # sure that $count is correct in this case!), and all the # # arguments are simply passed to the Record constructor. # # The correct endianness is read from the value of the # # current endianness, which is a private object member. # ########################################################### sub create_record { my ($this, $identifier, $type, $dataref, $count) = @_; # if the third argument is an offset, we need to convert it unless (ref $dataref) { # the data reference is indeed an offset my $offset = $dataref; # buffer length is calculated by the Record class my $length = Image::MetaData::JPEG::Record->get_size($type, $count); # for variable-length types, $count is the real length $length = $count if $length == 0; # replace the third argument with a scalar reference $dataref = \ $this->data($offset, $length); # update the offset through its alias (dangerous) # but don't complain if we have a read-only offset eval { $_[3] += $length; }; } # call the record constructor and return its value (a reference) return new Image::MetaData::JPEG::Record ($identifier, $type, $dataref, $count, $this->{endianness}); } ########################################################### # This method is a wrapper for create_record returning # # the parsed value and NOT storing the record internally # # (for this reason we can set $identifier = 0). So, the # # arguments are: type, data reference, count. The data # # reference can be replaced by an offset, used to access # # the internal segment data buffer. If the offset is an # # lvalue, it is updated to point after the memory just # # read. The count can be undefined (it defaults to 1). # ########################################################### sub read_record { # @_ = (this, type, dataref/offset, count) my $this = shift; # invoke create_record: the first argument (the identifier) # is dummy, for the others we can use @_. Return the value return $this->create_record(0, @_)->get_value(); } ########################################################### # This method creates a generic JPEG segment record just # # like read_record, stores it in the "records" list, and # # returns a reference to the newly created record. If the # # offset is an lvalue, it is updated to point after the # # memory just read. See read_record for further details. # #=========================================================# # A list reference can be prepended to the argument list; # # in this case it is used instead of $this->{records}. # ########################################################### sub store_record { # @_ = (this, [record list,] identifier, type, dataref/offset, count) my $this = shift; # get a reference to the record list; but if next argument # is a reference, use it instead (and take it out of @_) my $records = $this->{records}; $records = shift if ref $_[0]; # create a new record and insert it into the record # list; we can use @_ for all the arguments. push @$records, $this->create_record(@_); # return a reference to the last record return $$records[$#$records]; } ########################################################### # Load other parts for this package. In order to avoid # # that this file becomes too large, only general interest # # methods are written here. # ########################################################### require 'Image/MetaData/JPEG/Segment_parsers.pl'; require 'Image/MetaData/JPEG/Segment_dumpers.pl'; # successful package load 1;