This queue is for tickets about the Module-Install CPAN distribution.

Report information
The Basics
Id:
28793
Status:
resolved
Priority:
Low/Low

People
Owner:
Nobody in particular
Requestors:
avar [...] cpan.org
Cc:
AdminCc:

BugTracker
Severity:
Normal
Broken in:
(no value)
Fixed in:
(no value)



Subject: [PARTIAL PATCH] author_from() does not support POD escapes
author_from() only supports the lt/gt E<> escapes. The attached patch adds support for this but chr() will map e.g. 240 to a single bit with the value 240, i.e. invalid UTF-8. I'm not sure what's the correct way to have chr() generate UTF-8 in 5.4 and above (can't use Encode.pm).
Subject: m-i-metadata.patch
diff -ru Module-Install-0.67/lib/Module/Install/Metadata.pm Module-Install/lib/Module/Install/Metadata.pm --- Module-Install-0.67/lib/Module/Install/Metadata.pm 2007-05-09 06:52:46.000000000 +0000 +++ Module-Install/lib/Module/Install/Metadata.pm 2007-08-11 01:04:23.000000000 +0000 @@ -266,6 +266,281 @@ } } +# Copied directly from Pod::Escapes-1.04 for use in author_from, this +# is probably the most sane way to support E<escapes> all the way down +# to 5.004 +our %Name2character_number = ( + # General XML/XHTML: + 'lt' => 60, + 'gt' => 62, + 'quot' => 34, + 'amp' => 38, + 'apos' => 39, + + # POD-specific: + 'sol' => 47, + 'verbar' => 124, + + 'lchevron' => 171, # legacy for laquo + 'rchevron' => 187, # legacy for raquo + + # Remember, grave looks like \ (as in virtu\) + # acute looks like / (as in re/sume/) + # circumflex looks like ^ (as in papier ma^che/) + # umlaut/dieresis looks like " (as in nai"ve, Chloe") + + # From the XHTML 1 .ent files: + 'nbsp' , 160, + 'iexcl' , 161, + 'cent' , 162, + 'pound' , 163, + 'curren' , 164, + 'yen' , 165, + 'brvbar' , 166, + 'sect' , 167, + 'uml' , 168, + 'copy' , 169, + 'ordf' , 170, + 'laquo' , 171, + 'not' , 172, + 'shy' , 173, + 'reg' , 174, + 'macr' , 175, + 'deg' , 176, + 'plusmn' , 177, + 'sup2' , 178, + 'sup3' , 179, + 'acute' , 180, + 'micro' , 181, + 'para' , 182, + 'middot' , 183, + 'cedil' , 184, + 'sup1' , 185, + 'ordm' , 186, + 'raquo' , 187, + 'frac14' , 188, + 'frac12' , 189, + 'frac34' , 190, + 'iquest' , 191, + 'Agrave' , 192, + 'Aacute' , 193, + 'Acirc' , 194, + 'Atilde' , 195, + 'Auml' , 196, + 'Aring' , 197, + 'AElig' , 198, + 'Ccedil' , 199, + 'Egrave' , 200, + 'Eacute' , 201, + 'Ecirc' , 202, + 'Euml' , 203, + 'Igrave' , 204, + 'Iacute' , 205, + 'Icirc' , 206, + 'Iuml' , 207, + 'ETH' , 208, + 'Ntilde' , 209, + 'Ograve' , 210, + 'Oacute' , 211, + 'Ocirc' , 212, + 'Otilde' , 213, + 'Ouml' , 214, + 'times' , 215, + 'Oslash' , 216, + 'Ugrave' , 217, + 'Uacute' , 218, + 'Ucirc' , 219, + 'Uuml' , 220, + 'Yacute' , 221, + 'THORN' , 222, + 'szlig' , 223, + 'agrave' , 224, + 'aacute' , 225, + 'acirc' , 226, + 'atilde' , 227, + 'auml' , 228, + 'aring' , 229, + 'aelig' , 230, + 'ccedil' , 231, + 'egrave' , 232, + 'eacute' , 233, + 'ecirc' , 234, + 'euml' , 235, + 'igrave' , 236, + 'iacute' , 237, + 'icirc' , 238, + 'iuml' , 239, + 'eth' , 240, + 'ntilde' , 241, + 'ograve' , 242, + 'oacute' , 243, + 'ocirc' , 244, + 'otilde' , 245, + 'ouml' , 246, + 'divide' , 247, + 'oslash' , 248, + 'ugrave' , 249, + 'uacute' , 250, + 'ucirc' , 251, + 'uuml' , 252, + 'yacute' , 253, + 'thorn' , 254, + 'yuml' , 255, + + 'fnof' , 402, + 'Alpha' , 913, + 'Beta' , 914, + 'Gamma' , 915, + 'Delta' , 916, + 'Epsilon' , 917, + 'Zeta' , 918, + 'Eta' , 919, + 'Theta' , 920, + 'Iota' , 921, + 'Kappa' , 922, + 'Lambda' , 923, + 'Mu' , 924, + 'Nu' , 925, + 'Xi' , 926, + 'Omicron' , 927, + 'Pi' , 928, + 'Rho' , 929, + 'Sigma' , 931, + 'Tau' , 932, + 'Upsilon' , 933, + 'Phi' , 934, + 'Chi' , 935, + 'Psi' , 936, + 'Omega' , 937, + 'alpha' , 945, + 'beta' , 946, + 'gamma' , 947, + 'delta' , 948, + 'epsilon' , 949, + 'zeta' , 950, + 'eta' , 951, + 'theta' , 952, + 'iota' , 953, + 'kappa' , 954, + 'lambda' , 955, + 'mu' , 956, + 'nu' , 957, + 'xi' , 958, + 'omicron' , 959, + 'pi' , 960, + 'rho' , 961, + 'sigmaf' , 962, + 'sigma' , 963, + 'tau' , 964, + 'upsilon' , 965, + 'phi' , 966, + 'chi' , 967, + 'psi' , 968, + 'omega' , 969, + 'thetasym' , 977, + 'upsih' , 978, + 'piv' , 982, + 'bull' , 8226, + 'hellip' , 8230, + 'prime' , 8242, + 'Prime' , 8243, + 'oline' , 8254, + 'frasl' , 8260, + 'weierp' , 8472, + 'image' , 8465, + 'real' , 8476, + 'trade' , 8482, + 'alefsym' , 8501, + 'larr' , 8592, + 'uarr' , 8593, + 'rarr' , 8594, + 'darr' , 8595, + 'harr' , 8596, + 'crarr' , 8629, + 'lArr' , 8656, + 'uArr' , 8657, + 'rArr' , 8658, + 'dArr' , 8659, + 'hArr' , 8660, + 'forall' , 8704, + 'part' , 8706, + 'exist' , 8707, + 'empty' , 8709, + 'nabla' , 8711, + 'isin' , 8712, + 'notin' , 8713, + 'ni' , 8715, + 'prod' , 8719, + 'sum' , 8721, + 'minus' , 8722, + 'lowast' , 8727, + 'radic' , 8730, + 'prop' , 8733, + 'infin' , 8734, + 'ang' , 8736, + 'and' , 8743, + 'or' , 8744, + 'cap' , 8745, + 'cup' , 8746, + 'int' , 8747, + 'there4' , 8756, + 'sim' , 8764, + 'cong' , 8773, + 'asymp' , 8776, + 'ne' , 8800, + 'equiv' , 8801, + 'le' , 8804, + 'ge' , 8805, + 'sub' , 8834, + 'sup' , 8835, + 'nsub' , 8836, + 'sube' , 8838, + 'supe' , 8839, + 'oplus' , 8853, + 'otimes' , 8855, + 'perp' , 8869, + 'sdot' , 8901, + 'lceil' , 8968, + 'rceil' , 8969, + 'lfloor' , 8970, + 'rfloor' , 8971, + 'lang' , 9001, + 'rang' , 9002, + 'loz' , 9674, + 'spades' , 9824, + 'clubs' , 9827, + 'hearts' , 9829, + 'diams' , 9830, + 'OElig' , 338, + 'oelig' , 339, + 'Scaron' , 352, + 'scaron' , 353, + 'Yuml' , 376, + 'circ' , 710, + 'tilde' , 732, + 'ensp' , 8194, + 'emsp' , 8195, + 'thinsp' , 8201, + 'zwnj' , 8204, + 'zwj' , 8205, + 'lrm' , 8206, + 'rlm' , 8207, + 'ndash' , 8211, + 'mdash' , 8212, + 'lsquo' , 8216, + 'rsquo' , 8217, + 'sbquo' , 8218, + 'ldquo' , 8220, + 'rdquo' , 8221, + 'bdquo' , 8222, + 'dagger' , 8224, + 'Dagger' , 8225, + 'permil' , 8240, + 'lsaquo' , 8249, + 'rsaquo' , 8250, + 'euro' , 8364, +); + sub author_from { my ( $self, $file ) = @_; my $content = $self->_slurp($file); @@ -278,8 +553,9 @@ ([^\n]*) /ixms) { my $author = $1 || $2; - $author =~ s{E<lt>}{<}g; - $author =~ s{E<gt>}{>}g; + + $author =~ s/E<([a-z0-9]+)>/chr($Name2character_number{$1}) || "E<$1>"/gei; + $self->author($author); } else {
Hi. Added other unescaping code in the trunk; will (hopefully) be fixed in the next release. Thanks. On 2007-8-10 Fri 21:26:18, AVAR wrote:
Show quoted text
> author_from() only supports the lt/gt E<> escapes. The attached patch > adds support for this but chr() will map e.g. 240 to a single bit with > the value 240, i.e. invalid UTF-8. > > I'm not sure what's the correct way to have chr() generate UTF-8 in
5.4
Show quoted text
> and above (can't use Encode.pm).
Hi. 0.95 is out. If you still have the problem, reopen this. Thanks.


This service runs on Request Tracker, is sponsored by The Perl Foundation, and maintained by Best Practical Solutions.

Please report any issues with rt.cpan.org to rt-cpan-admin@bestpractical.com.