definição e significado de MediaWiki:TextCleaner.js | sensagent.com


   Publicitade R▼


 » 
alemão búlgaro chinês croata dinamarquês eslovaco esloveno espanhol estoniano farsi finlandês francês grego hebraico hindi holandês húngaro indonésio inglês islandês italiano japonês korean letão língua árabe lituano malgaxe norueguês polonês português romeno russo sérvio sueco tailandês tcheco turco vietnamês
alemão búlgaro chinês croata dinamarquês eslovaco esloveno espanhol estoniano farsi finlandês francês grego hebraico hindi holandês húngaro indonésio inglês islandês italiano japonês korean letão língua árabe lituano malgaxe norueguês polonês português romeno russo sérvio sueco tailandês tcheco turco vietnamês

Definição e significado de MediaWiki:TextCleaner.js

Definição

definição - Wikipedia

   Publicidade ▼

Wikipedia

MediaWiki:TextCleaner.js

                   

注意:在保存以后,您必须绕过浏览器缓存才能看到所作出的改变。 火狐(Firefox)/Safari:按住“Shift”键再点击“刷新”,或按下“Ctrl-F5”或“Ctrl-R”(Mac上为“⌘-R”)。谷歌浏览器(Google Chrome):按下“Ctrl-Shift-R”(Mac上为“⌘-Shift-R”)。 Internet Explorer:按住“Ctrl”键再点击“刷新”,或按下“Ctrl-F5。 Konqueror:点击“刷新”或按下“F5”。 Opera:在“工具→首选项”中清除缓存。请参阅Help:绕过缓存以获取更多帮助。

// <source lang="javascript">
/*
  Wikitext sanitation for MediaWiki
 
  Author: [[User:Lupo]], January 2008
  License: Quadruple licensed GFDL, GPL, LGPL and Creative Commons Attribution 3.0 (CC-BY-3.0)
 
  Choose whichever license of these you like best :-)
*/
 
var TextCleaner =
{
 
  imgNamespaceNames : null,
 
  // This function attempts to construct well-formed wikitext from input that may contain
  // possibly broken wikitext.
  //
  // Note: even just a half-baked sanitation of wikitext is hyper-complex due to the presence
  // of templates, and due to the fact that image thumbnail captions may themselves contain
  // links. This implementation catches the most common errors (such as forgetting to close a
  // template or a link), and even some more elaborate ones. With enough malice, this sanitation
  // can still be broken by user input such that the result is not well-formed wikitext as the
  // parser at the servers would like to have it. (It's still possible that the result is broken
  // wikitext, if the input was broken wikitext. But it never transforms well-formed wikitext
  // into broken wikitext.)
  //
  // If 'only_thumbs' is true, all [[Image: links are changed to [[:Image:, unless the original
  // image link was a thumbnail or had a width smaller than 300px specified.
  //
  // WARNING: do *not* attempt to use this to process large texts (e.g., a whole article). It is
  // probably rather inefficient due to the many substrings that are generated. This function is
  // primarily intended to be used to clean up user input in forms, which are typically rather
  // short.
  sanitizeWikiText : function (input, only_thumbs)
  {
    if (input.search (/[\][}{]|<nowiki(\s[^>]*)?>|<\!--/) < 0) return input;
    // No critical characters
 
    if (!TextCleaner.imgNamespaceNames) {
      TextCleaner.imgNamespaceNames = [];
      if (wgNamespaceIds) {
        for (name in wgNamespaceIds) {
          if (wgNamespaceIds[name] == 6) // Image namespace
            TextCleaner.imgNamespaceNames[TextCleaner.imgNamespaceNames.length] = name;
        }
      }
      // Make sure that we have the two canonical names
      TextCleaner.imgNamespaceNames[TextCleaner.imgNamespaceNames.length] = 'Image';
      TextCleaner.imgNamespaceNames[TextCleaner.imgNamespaceNames.length] = 'File';
      // If your Wiki does not have wgNamespaceIds, add aliases or localized namespace names here!
    }
 
    var consumed       = new Array (0, 0);
    // For image captions. Image caption may contain links, and may even contain images.
    // The current MediaWiki parser actually allows this only once. For deeper recursions,
    // it fails. But here, it's actually easier to implement no limit.
 
    var base_regexp    =
      new RegExp
            (   "[\\x01\\x02\\x03\\x04[\\]\\|\\x05\\x06\\x07\\x08]"
              + "|\<nowiki(\\s[^>]*)?\>|\<\!--"
            , "i"); // Ignore case
    var nowiki_regexp  = new RegExp ("\<\\/nowiki(\\s[^>]*)?\>|\<\!--", "i");
 
    var allow_only_thumbs = only_thumbs;
 
    function sanitize
      (s, with_links, caption_level, allow_thumbs, break_at_pipe, with_tables, with_galleries)
    {
      if (!s || s.length == 0) {
        if (caption_level > 0) {
          if (consumed.length < caption_level)
            consumed.push (0);
          else
            consumed[caption_level-1] = 0;
        }
        return s;
      }
 
      var result         = "";
      var initial_length = s.length;
      var get_out        = false;
      var in_nowiki      = false;
      var endings        = null;
      // Stack recording template and table nesting
      var next;
 
      function push_end (val)
      {
        if (endings == null) {
          endings = new Array (1);
          endings[0] = val;
        } else {
          endings[endings.length] = val;
        }
      }
 
      function pop_end ()
      {
        if (endings == null) return null; // Shouldn't happen
        var result;
        if (endings.length == 1) {
          result = endings[0];
          endings = null;
        } else {
          result = endings[endings.length -1];
          endings.length = endings.length - 1;
        }
        return result;
      }
 
      regexp = base_regexp;
      while (s.length > 0 && !get_out) {
        next = s.search (regexp);
 
        if (next < 0) {
          result = result + s;
          break;
        }
        var ch = s.charAt (next);
        var i  = -1;
        var j  = -1;
        var k  = -1;
        switch (ch) {
          case '<':
            // Nowiki or HTML comment. Must be closed.
            if (s.charAt (next+1) == '!') {
              // HTML comment. Cannot be nested.
              i = s.indexOf ('--\>', next+3);
              if (i < 0) {
                result = result + s + '--\>';
                s = "";
              } else {
                result = result + s.substring (0, i + 3);
                s = s.substring (i+3);
              }
            } else if (s.charAt (next+1) == 'n') {
              // Nowiki may contain HTML comments!
              in_nowiki = true;
              regexp = nowiki_regexp;
              result = result + s.substring (0, next + 7);
              s = s.substring (next + 7);
            } else {
              // End of nowiki. Searched for and found only if in_nowiki == true
              in_nowiki = false;
              regexp = base_regexp;
              i = s.indexOf ('>', next+1); // End of tag
              result = result + s.substring (0, i+1);
              s = s.substring (i+1);
            }
            break;
          case '\x05':
            // Table start
            if (!with_tables) {
              result  = result + s.substring (0, next);
              get_out = true;
              break;
            }
            // Fall through
          case '\x07':
            if (ch == '\x07' && !with_galleries) {
              result = result + s.substring (0, next);
              get_out = true;
              break;
            }
          case '\x01':
            // Start of template, table, or gallery
            result = result + s.substring (0, next+1);
            push_end (String.fromCharCode(ch.charCodeAt (0)+1).charAt (0));
            s = s.substring (next+1);
            break;
          case '\x06':
            // Table end
            if (break_at_pipe && endings == null) {
              result = result + s.substring (0, next);
              get_out = true;
              break;
            }
            // Fall through
          case '\x02':
            // End of a template or table
            result = result + s.substring (0, next);
            if (endings == null || endings[endings.length - 1] != ch) {
              // Spurious template or table end
              if (ch == '\x02')
                result = result + '&#x7D;&#x7D;';
              else
                result = result + '&#x7C;&#x7D;';
            } else {            
              result = result + pop_end ();
            }
            s = s.substring (next+1);
            break;
          case '\x08':
            // End of gallery
            result = result + s.substring (0, next+1);
            if (endings != null && endings[endings.length - 1] == ch) pop_end (); 
            s = s.substring (next+1);
            break; 
          case '\x03':
          case '[':
            {
              if (!with_links && endings == null) {
                get_out = true;
                break;
              }
              // Image links must be treated specially, since they may contain nested links
              // in the caption!
              var initial = null;  // If set, it's 'image:' or 'file:' and we have an image link
              i = next;
              while (i < s.length && s.charAt (i) == ch) i++;
              if (ch == '\x03' && i < s.length && s.charAt (i) == '[') i++;
              function get_initial (i, s)
              {
                for (var j = 0; j < TextCleaner.imgNamespaceNames.length; j++) {
                  if (s.length >= i + TextCleaner.imgNamespaceNames[j].length + 1) {
                    var t = s.substr (i, TextCleaner.imgNamespaceNames[j].length + 1);
                    if (t.toLowerCase() == (TextCleaner.imgNamespaceNames[j].toLowerCase () + ':'))
                      return t;
                  }
                }
                return null;
              }
              initial = get_initial (i, s);
 
              // Scan ahead. We'll break at the next top-level | or ] or ]] or [ or [[ or {| or |}
              var lk_text = sanitize (s.substring (i),
                                      false,           // No links at top-level allowed
                                      caption_level + 1,
                                      false,           // No thumbs
                                      true,            // Break at pipe
                                      false,           // No tables
                                      false);          // No galleries
              var lk_text_length = consumed[caption_level];
              j = i + lk_text_length;
              if (j >= s.length) {
                // Used up the whole text: [[Foo or [bar
                if (initial != null && allow_only_thumbs)
                  // Should in any case have started with [[, not [
                  result = result + s.substring (0, i-1) + '\x03:' + initial
                         + lk_text.substring (initial.length) + '\x04';
                else
                  result = result + s.substring (0, i) + lk_text
                         + ((s.charAt (i-1) == '[') ? ']' : '\x04');
                s = "";
                break;
              }
              if (s.charAt (j) == '|') k = j; else k = -1;
              if (k < 0) {
                // No pipe found: we should be on the closing ]] or ] or [[Foo]] or [bar]
                if (initial != null && allow_only_thumbs)
                  // Should in any case have started with [[, not [
                  result = result + s.substring (0, i-1) + '\x03:' + initial
                         + lk_text.substring (initial.length) + '\x04';
                else
                  result = result + s.substring (0, i) + lk_text
                         + ((s.charAt (i-1) == '[') ? ']' : '\x04');
                if (s.charAt (j) == ']' || s.charAt (j) == '\x04') {
                  // Indeed closing the link
                  s = s.substring (j+1);
                } else {
                  s = s.substring (j);
                }
                break;
              } else {
                var caption = null;
                var used    = 0;
                // Pipe found.
                if (initial == null) {
                  // Not an image link. Must be something like [[Foo|Bar]].
                  caption = sanitize
                              ( s.substring (k+1)
                               , false             // No links, please
                               , caption_level + 1
                               , false             // No thumbs either
                               , false             // Don't care about pipes
                               , true              // Allow tables (yes, parser allows that!)
                               , true);            // Allow galleries (?)
                  // Now we're at [[, [, ]], or ]
                  used = consumed[caption_level];
                  result = result + s.substring (0, i) + lk_text + '|' + caption
                         + ((s.charAt (i-1) == '[') ? ']' : '\x04');
                } else {
                  var q = s.substring (k);
                  // We assume that there are no templates, nowikis, and other nasty things
                  // in the parameters. Search forward until the next [, {, ], }
                  l = q.search(/[\x01\x02\x03[\x04\]\{\}\x05\x06\x07\x08]/);
                  if (l < 0) l = q.length;
                  if (l+1 < q.length) q = q.substring (0, l+1);
                  var is_thumb = q.search (/\|\s*thumb(nail)?\s*[\|\x04]/) >= 0;
                  var img_width = /\|\s*(\d+)px\s*[\|\x04]/.exec (q);
                  if (img_width && img_width.length > 1) {
                    img_width = parseInt (img_width[1], 10);
                    if (isNaN (img_width)) img_width = null;
                  } else
                    img_width = null;
                  if (img_width === null && is_thumb) img_width = 180;
                  var is_small = img_width < 300;
 
                  // Caption starts at the last pipe before l. If that is a parameter,
                  // it doesn't hurt.
                  var m = k + q.lastIndexOf ('|', l);
                  caption = sanitize
                              (  s.substring (m+1)
                               , is_thumb                  // Allow links only if it's a thumb
                               , caption_level + 1
                               , allow_thumbs && is_thumb
                               , false                     // Don't break at pipe
                               , is_thumb                  // Tables only if it's a thumb
                               , is_thumb);                // Allow galleries for thumbs (?)
                  used = consumed[caption_level];
                  // caption used 'used' chars from m+1, s.charAt (m+1+used) == '\x04'
                  is_thumb = allow_thumbs && is_small;
                  if (is_thumb || !allow_only_thumbs)
                    result = result + s.substring (0, i-1) + '\x03' + lk_text ;
                  else
                    result = result + s.substring (0, i-1) + '\x03:' + initial
                           + lk_text.substring (initial.length);
                  result = result + s.substring (k, m+1) + caption + '\x04';
                  k = m;
                }
                next = k+1+used;
                if (next < s.length) {
                  if (s.charAt (next) != '\x04')
                    s = s.substring (next);
                  else
                    s = s.substring (next+1);
                } else
                  s = "";
              }
              break;
            }
          case '\x04':
          case ']':
            // Extra bracket.
            result = result + s.substring (0, next);
            if (caption_level == 0 && !break_at_pipe) {
              result = result + (ch == ']' ? '&#x5D;' : '&#x5D;&#x5D;');
              s = s.substring (next+1);
            } else
              get_out = true;
            break;
          case '|':
            result = result + s.substring (0, next);
            if (break_at_pipe && endings == null) {
              // Pipe character at top level
              get_out = true;
            } else {
              if (caption_level == 0 && !break_at_pipe && endings == null)
                result = result + '&#x7C;'; // Top-level pipe character
              else
                result = result + '|';
              s = s.substring (next+1);
            }
            break;
        } // end switch
      } // end while
      if (in_nowiki) result = result + "\<\/nowiki>"; // Make sure this nowiki is closed.
      // Close open templates and tables
      while (endings != null) {
        ch = pop_end ();
        result = result + (ch == '\x06' ? '\n' : "") + ch;
      }
      if (caption_level > 0) {
        var used_up = initial_length - (get_out ? (s.length - next) : 0);
        if (consumed.length < caption_level)
          consumed[consumed.length] = used_up;
        else
          consumed[caption_level-1] = used_up;
      }
      return result;      
    }
 
    // Replace multi-character tokens by one-character placeholders, simplifying the
    // subsequent processing.
    var s = input.replace (/\{\{/g, '\x01')
                 .replace (/\n\s*\|\}\}\}/g, '\n\x06\x02') // Table end + template end
                 .replace (/\}\}/g, '\x02')
                 .replace (/\[\[/g, '\x03')
                 .replace (/\]\]/g, '\x04')
                 .replace (/\n\s*\{\|/g, '\n\x05')       // Table start and end must be on own line
                 .replace (/^\s*\{\|/, '\x05')           // Table start at the very beginning
                 .replace (/\n\s*\|\}/g, '\n\x06')       // (we strip leading whitespace)
                 .replace (/\<\s*gallery\s*\>/g, '\x07')
                 .replace (/\<\/\s*gallery\s*\>/g, '\x08');
 
    s = sanitize (s, true, 0, true, false, true, true);
    // with links, allow thumbs, don't break at pipe, allow tables, allow galleries
    return s.replace (/\x01/g, '\{\{')
            .replace (/\x02/g, '\}\}')
            .replace (/\x03/g, '\[\[')
            .replace (/\x04/g, '\]\]')
            .replace (/\x05/g, '\{\|')
            .replace (/\x06/g, '\|\}')
            .replace (/\x07/g, '<gallery>')
            .replace (/\x08/g, '</gallery>');
  }
}
 
// </source>
               

 

todas as traduções do MediaWiki:TextCleaner.js


Conteùdo de sensagent

  • definição
  • sinónimos
  • antónimos
  • enciclopédia

 

5684 visitantes em linha

calculado em 0,031s