b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * "$Id: mxml-entity.c 408 2010-09-19 05:26:46Z mike $" |
| 3 | * |
| 4 | * Character entity support code for Mini-XML, a small XML-like |
| 5 | * file parsing library. |
| 6 | * |
| 7 | * Copyright 2003-2010 by Michael R Sweet. |
| 8 | * |
| 9 | * These coded instructions, statements, and computer programs are the |
| 10 | * property of Michael R Sweet and are protected by Federal copyright |
| 11 | * law. Distribution and use rights are outlined in the file "COPYING" |
| 12 | * which should have been included with this file. If this file is |
| 13 | * missing or damaged, see the license at: |
| 14 | * |
| 15 | * http://www.minixml.org/ |
| 16 | * |
| 17 | * Contents: |
| 18 | * |
| 19 | * mxmlEntityAddCallback() - Add a callback to convert entities to |
| 20 | * Unicode. |
| 21 | * mxmlEntityGetName() - Get the name that corresponds to the |
| 22 | * character value. |
| 23 | * mxmlEntityGetValue() - Get the character corresponding to a named |
| 24 | * entity. |
| 25 | * mxmlEntityRemoveCallback() - Remove a callback. |
| 26 | * _mxml_entity_cb() - Lookup standard (X)HTML entities. |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * Include necessary headers... |
| 31 | */ |
| 32 | |
| 33 | #include "mxml-private.h" |
| 34 | |
| 35 | |
| 36 | /* |
| 37 | * 'mxmlEntityAddCallback()' - Add a callback to convert entities to Unicode. |
| 38 | */ |
| 39 | |
| 40 | int /* O - 0 on success, -1 on failure */ |
| 41 | mxmlEntityAddCallback( |
| 42 | mxml_entity_cb_t cb) /* I - Callback function to add */ |
| 43 | { |
| 44 | _mxml_global_t *global = _mxml_global(); |
| 45 | /* Global data */ |
| 46 | |
| 47 | |
| 48 | if (global->num_entity_cbs < (int)(sizeof(global->entity_cbs) / sizeof(global->entity_cbs[0]))) |
| 49 | { |
| 50 | global->entity_cbs[global->num_entity_cbs] = cb; |
| 51 | global->num_entity_cbs ++; |
| 52 | |
| 53 | return (0); |
| 54 | } |
| 55 | else |
| 56 | { |
| 57 | mxml_error("Unable to add entity callback!"); |
| 58 | |
| 59 | return (-1); |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | |
| 64 | /* |
| 65 | * 'mxmlEntityGetName()' - Get the name that corresponds to the character value. |
| 66 | * |
| 67 | * If val does not need to be represented by a named entity, NULL is returned. |
| 68 | */ |
| 69 | |
| 70 | const char * /* O - Entity name or NULL */ |
| 71 | mxmlEntityGetName(int val) /* I - Character value */ |
| 72 | { |
| 73 | switch (val) |
| 74 | { |
| 75 | case '&' : |
| 76 | return ("amp"); |
| 77 | |
| 78 | case '<' : |
| 79 | return ("lt"); |
| 80 | |
| 81 | case '>' : |
| 82 | return ("gt"); |
| 83 | |
| 84 | case '\"' : |
| 85 | return ("quot"); |
| 86 | |
| 87 | default : |
| 88 | return (NULL); |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | |
| 93 | /* |
| 94 | * 'mxmlEntityGetValue()' - Get the character corresponding to a named entity. |
| 95 | * |
| 96 | * The entity name can also be a numeric constant. -1 is returned if the |
| 97 | * name is not known. |
| 98 | */ |
| 99 | |
| 100 | int /* O - Character value or -1 on error */ |
| 101 | mxmlEntityGetValue(const char *name) /* I - Entity name */ |
| 102 | { |
| 103 | int i; /* Looping var */ |
| 104 | int ch; /* Character value */ |
| 105 | _mxml_global_t *global = _mxml_global(); |
| 106 | /* Global data */ |
| 107 | |
| 108 | |
| 109 | for (i = 0; i < global->num_entity_cbs; i ++) |
| 110 | if ((ch = (global->entity_cbs[i])(name)) >= 0) |
| 111 | return (ch); |
| 112 | |
| 113 | return (-1); |
| 114 | } |
| 115 | |
| 116 | |
| 117 | /* |
| 118 | * 'mxmlEntityRemoveCallback()' - Remove a callback. |
| 119 | */ |
| 120 | |
| 121 | void |
| 122 | mxmlEntityRemoveCallback( |
| 123 | mxml_entity_cb_t cb) /* I - Callback function to remove */ |
| 124 | { |
| 125 | int i; /* Looping var */ |
| 126 | _mxml_global_t *global = _mxml_global(); |
| 127 | /* Global data */ |
| 128 | |
| 129 | |
| 130 | for (i = 0; i < global->num_entity_cbs; i ++) |
| 131 | if (cb == global->entity_cbs[i]) |
| 132 | { |
| 133 | /* |
| 134 | * Remove the callback... |
| 135 | */ |
| 136 | |
| 137 | global->num_entity_cbs --; |
| 138 | |
| 139 | if (i < global->num_entity_cbs) |
| 140 | memmove(global->entity_cbs + i, global->entity_cbs + i + 1, |
| 141 | (global->num_entity_cbs - i) * sizeof(global->entity_cbs[0])); |
| 142 | |
| 143 | return; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | |
| 148 | /* |
| 149 | * '_mxml_entity_cb()' - Lookup standard (X)HTML entities. |
| 150 | */ |
| 151 | |
| 152 | int /* O - Unicode value or -1 */ |
| 153 | _mxml_entity_cb(const char *name) /* I - Entity name */ |
| 154 | { |
| 155 | int diff, /* Difference between names */ |
| 156 | current, /* Current entity in search */ |
| 157 | first, /* First entity in search */ |
| 158 | last; /* Last entity in search */ |
| 159 | static const struct |
| 160 | { |
| 161 | const char *name; /* Entity name */ |
| 162 | int val; /* Character value */ |
| 163 | } entities[] = |
| 164 | { |
| 165 | { "AElig", 198 }, |
| 166 | { "Aacute", 193 }, |
| 167 | { "Acirc", 194 }, |
| 168 | { "Agrave", 192 }, |
| 169 | { "Alpha", 913 }, |
| 170 | { "Aring", 197 }, |
| 171 | { "Atilde", 195 }, |
| 172 | { "Auml", 196 }, |
| 173 | { "Beta", 914 }, |
| 174 | { "Ccedil", 199 }, |
| 175 | { "Chi", 935 }, |
| 176 | { "Dagger", 8225 }, |
| 177 | { "Delta", 916 }, |
| 178 | { "Dstrok", 208 }, |
| 179 | { "ETH", 208 }, |
| 180 | { "Eacute", 201 }, |
| 181 | { "Ecirc", 202 }, |
| 182 | { "Egrave", 200 }, |
| 183 | { "Epsilon", 917 }, |
| 184 | { "Eta", 919 }, |
| 185 | { "Euml", 203 }, |
| 186 | { "Gamma", 915 }, |
| 187 | { "Iacute", 205 }, |
| 188 | { "Icirc", 206 }, |
| 189 | { "Igrave", 204 }, |
| 190 | { "Iota", 921 }, |
| 191 | { "Iuml", 207 }, |
| 192 | { "Kappa", 922 }, |
| 193 | { "Lambda", 923 }, |
| 194 | { "Mu", 924 }, |
| 195 | { "Ntilde", 209 }, |
| 196 | { "Nu", 925 }, |
| 197 | { "OElig", 338 }, |
| 198 | { "Oacute", 211 }, |
| 199 | { "Ocirc", 212 }, |
| 200 | { "Ograve", 210 }, |
| 201 | { "Omega", 937 }, |
| 202 | { "Omicron", 927 }, |
| 203 | { "Oslash", 216 }, |
| 204 | { "Otilde", 213 }, |
| 205 | { "Ouml", 214 }, |
| 206 | { "Phi", 934 }, |
| 207 | { "Pi", 928 }, |
| 208 | { "Prime", 8243 }, |
| 209 | { "Psi", 936 }, |
| 210 | { "Rho", 929 }, |
| 211 | { "Scaron", 352 }, |
| 212 | { "Sigma", 931 }, |
| 213 | { "THORN", 222 }, |
| 214 | { "Tau", 932 }, |
| 215 | { "Theta", 920 }, |
| 216 | { "Uacute", 218 }, |
| 217 | { "Ucirc", 219 }, |
| 218 | { "Ugrave", 217 }, |
| 219 | { "Upsilon", 933 }, |
| 220 | { "Uuml", 220 }, |
| 221 | { "Xi", 926 }, |
| 222 | { "Yacute", 221 }, |
| 223 | { "Yuml", 376 }, |
| 224 | { "Zeta", 918 }, |
| 225 | { "aacute", 225 }, |
| 226 | { "acirc", 226 }, |
| 227 | { "acute", 180 }, |
| 228 | { "aelig", 230 }, |
| 229 | { "agrave", 224 }, |
| 230 | { "alefsym", 8501 }, |
| 231 | { "alpha", 945 }, |
| 232 | { "amp", '&' }, |
| 233 | { "and", 8743 }, |
| 234 | { "ang", 8736 }, |
| 235 | { "apos", '\'' }, |
| 236 | { "aring", 229 }, |
| 237 | { "asymp", 8776 }, |
| 238 | { "atilde", 227 }, |
| 239 | { "auml", 228 }, |
| 240 | { "bdquo", 8222 }, |
| 241 | { "beta", 946 }, |
| 242 | { "brkbar", 166 }, |
| 243 | { "brvbar", 166 }, |
| 244 | { "bull", 8226 }, |
| 245 | { "cap", 8745 }, |
| 246 | { "ccedil", 231 }, |
| 247 | { "cedil", 184 }, |
| 248 | { "cent", 162 }, |
| 249 | { "chi", 967 }, |
| 250 | { "circ", 710 }, |
| 251 | { "clubs", 9827 }, |
| 252 | { "cong", 8773 }, |
| 253 | { "copy", 169 }, |
| 254 | { "crarr", 8629 }, |
| 255 | { "cup", 8746 }, |
| 256 | { "curren", 164 }, |
| 257 | { "dArr", 8659 }, |
| 258 | { "dagger", 8224 }, |
| 259 | { "darr", 8595 }, |
| 260 | { "deg", 176 }, |
| 261 | { "delta", 948 }, |
| 262 | { "diams", 9830 }, |
| 263 | { "die", 168 }, |
| 264 | { "divide", 247 }, |
| 265 | { "eacute", 233 }, |
| 266 | { "ecirc", 234 }, |
| 267 | { "egrave", 232 }, |
| 268 | { "empty", 8709 }, |
| 269 | { "emsp", 8195 }, |
| 270 | { "ensp", 8194 }, |
| 271 | { "epsilon", 949 }, |
| 272 | { "equiv", 8801 }, |
| 273 | { "eta", 951 }, |
| 274 | { "eth", 240 }, |
| 275 | { "euml", 235 }, |
| 276 | { "euro", 8364 }, |
| 277 | { "exist", 8707 }, |
| 278 | { "fnof", 402 }, |
| 279 | { "forall", 8704 }, |
| 280 | { "frac12", 189 }, |
| 281 | { "frac14", 188 }, |
| 282 | { "frac34", 190 }, |
| 283 | { "frasl", 8260 }, |
| 284 | { "gamma", 947 }, |
| 285 | { "ge", 8805 }, |
| 286 | { "gt", '>' }, |
| 287 | { "hArr", 8660 }, |
| 288 | { "harr", 8596 }, |
| 289 | { "hearts", 9829 }, |
| 290 | { "hellip", 8230 }, |
| 291 | { "hibar", 175 }, |
| 292 | { "iacute", 237 }, |
| 293 | { "icirc", 238 }, |
| 294 | { "iexcl", 161 }, |
| 295 | { "igrave", 236 }, |
| 296 | { "image", 8465 }, |
| 297 | { "infin", 8734 }, |
| 298 | { "int", 8747 }, |
| 299 | { "iota", 953 }, |
| 300 | { "iquest", 191 }, |
| 301 | { "isin", 8712 }, |
| 302 | { "iuml", 239 }, |
| 303 | { "kappa", 954 }, |
| 304 | { "lArr", 8656 }, |
| 305 | { "lambda", 955 }, |
| 306 | { "lang", 9001 }, |
| 307 | { "laquo", 171 }, |
| 308 | { "larr", 8592 }, |
| 309 | { "lceil", 8968 }, |
| 310 | { "ldquo", 8220 }, |
| 311 | { "le", 8804 }, |
| 312 | { "lfloor", 8970 }, |
| 313 | { "lowast", 8727 }, |
| 314 | { "loz", 9674 }, |
| 315 | { "lrm", 8206 }, |
| 316 | { "lsaquo", 8249 }, |
| 317 | { "lsquo", 8216 }, |
| 318 | { "lt", '<' }, |
| 319 | { "macr", 175 }, |
| 320 | { "mdash", 8212 }, |
| 321 | { "micro", 181 }, |
| 322 | { "middot", 183 }, |
| 323 | { "minus", 8722 }, |
| 324 | { "mu", 956 }, |
| 325 | { "nabla", 8711 }, |
| 326 | { "nbsp", 160 }, |
| 327 | { "ndash", 8211 }, |
| 328 | { "ne", 8800 }, |
| 329 | { "ni", 8715 }, |
| 330 | { "not", 172 }, |
| 331 | { "notin", 8713 }, |
| 332 | { "nsub", 8836 }, |
| 333 | { "ntilde", 241 }, |
| 334 | { "nu", 957 }, |
| 335 | { "oacute", 243 }, |
| 336 | { "ocirc", 244 }, |
| 337 | { "oelig", 339 }, |
| 338 | { "ograve", 242 }, |
| 339 | { "oline", 8254 }, |
| 340 | { "omega", 969 }, |
| 341 | { "omicron", 959 }, |
| 342 | { "oplus", 8853 }, |
| 343 | { "or", 8744 }, |
| 344 | { "ordf", 170 }, |
| 345 | { "ordm", 186 }, |
| 346 | { "oslash", 248 }, |
| 347 | { "otilde", 245 }, |
| 348 | { "otimes", 8855 }, |
| 349 | { "ouml", 246 }, |
| 350 | { "para", 182 }, |
| 351 | { "part", 8706 }, |
| 352 | { "permil", 8240 }, |
| 353 | { "perp", 8869 }, |
| 354 | { "phi", 966 }, |
| 355 | { "pi", 960 }, |
| 356 | { "piv", 982 }, |
| 357 | { "plusmn", 177 }, |
| 358 | { "pound", 163 }, |
| 359 | { "prime", 8242 }, |
| 360 | { "prod", 8719 }, |
| 361 | { "prop", 8733 }, |
| 362 | { "psi", 968 }, |
| 363 | { "quot", '\"' }, |
| 364 | { "rArr", 8658 }, |
| 365 | { "radic", 8730 }, |
| 366 | { "rang", 9002 }, |
| 367 | { "raquo", 187 }, |
| 368 | { "rarr", 8594 }, |
| 369 | { "rceil", 8969 }, |
| 370 | { "rdquo", 8221 }, |
| 371 | { "real", 8476 }, |
| 372 | { "reg", 174 }, |
| 373 | { "rfloor", 8971 }, |
| 374 | { "rho", 961 }, |
| 375 | { "rlm", 8207 }, |
| 376 | { "rsaquo", 8250 }, |
| 377 | { "rsquo", 8217 }, |
| 378 | { "sbquo", 8218 }, |
| 379 | { "scaron", 353 }, |
| 380 | { "sdot", 8901 }, |
| 381 | { "sect", 167 }, |
| 382 | { "shy", 173 }, |
| 383 | { "sigma", 963 }, |
| 384 | { "sigmaf", 962 }, |
| 385 | { "sim", 8764 }, |
| 386 | { "spades", 9824 }, |
| 387 | { "sub", 8834 }, |
| 388 | { "sube", 8838 }, |
| 389 | { "sum", 8721 }, |
| 390 | { "sup", 8835 }, |
| 391 | { "sup1", 185 }, |
| 392 | { "sup2", 178 }, |
| 393 | { "sup3", 179 }, |
| 394 | { "supe", 8839 }, |
| 395 | { "szlig", 223 }, |
| 396 | { "tau", 964 }, |
| 397 | { "there4", 8756 }, |
| 398 | { "theta", 952 }, |
| 399 | { "thetasym", 977 }, |
| 400 | { "thinsp", 8201 }, |
| 401 | { "thorn", 254 }, |
| 402 | { "tilde", 732 }, |
| 403 | { "times", 215 }, |
| 404 | { "trade", 8482 }, |
| 405 | { "uArr", 8657 }, |
| 406 | { "uacute", 250 }, |
| 407 | { "uarr", 8593 }, |
| 408 | { "ucirc", 251 }, |
| 409 | { "ugrave", 249 }, |
| 410 | { "uml", 168 }, |
| 411 | { "upsih", 978 }, |
| 412 | { "upsilon", 965 }, |
| 413 | { "uuml", 252 }, |
| 414 | { "weierp", 8472 }, |
| 415 | { "xi", 958 }, |
| 416 | { "yacute", 253 }, |
| 417 | { "yen", 165 }, |
| 418 | { "yuml", 255 }, |
| 419 | { "zeta", 950 }, |
| 420 | { "zwj", 8205 }, |
| 421 | { "zwnj", 8204 } |
| 422 | }; |
| 423 | |
| 424 | |
| 425 | /* |
| 426 | * Do a binary search for the named entity... |
| 427 | */ |
| 428 | |
| 429 | first = 0; |
| 430 | last = (int)(sizeof(entities) / sizeof(entities[0]) - 1); |
| 431 | |
| 432 | while ((last - first) > 1) |
| 433 | { |
| 434 | current = (first + last) / 2; |
| 435 | |
| 436 | if ((diff = strcmp(name, entities[current].name)) == 0) |
| 437 | return (entities[current].val); |
| 438 | else if (diff < 0) |
| 439 | last = current; |
| 440 | else |
| 441 | first = current; |
| 442 | } |
| 443 | |
| 444 | /* |
| 445 | * If we get here, there is a small chance that there is still |
| 446 | * a match; check first and last... |
| 447 | */ |
| 448 | |
| 449 | if (!strcmp(name, entities[first].name)) |
| 450 | return (entities[first].val); |
| 451 | else if (!strcmp(name, entities[last].name)) |
| 452 | return (entities[last].val); |
| 453 | else |
| 454 | return (-1); |
| 455 | } |
| 456 | |
| 457 | |
| 458 | /* |
| 459 | * End of "$Id: mxml-entity.c 408 2010-09-19 05:26:46Z mike $". |
| 460 | */ |