[prev in list] [next in list] [prev in thread] [next in thread] 

List:       nedit-discuss
Subject:    sh2html 2.3
From:       Thorsten Haude <yoo () vranx ! de>
Date:       2003-05-22 20:14:56
[Download RAW message or body]

[Attachment #2 (multipart/mixed)]


Hi,

I put up a new version of my macro to get colored HTML from a syntax
highlighted NEdit file:
http://www.vranx.de/nedit/sh2html.nm
http://www.vranx.de/nedit/sh2html.nm.html

I need some help now and would ask you to have a look at it and tell
me what you think:
- I wrote and re-wrote the interface again and again, so I just can't
judge it anymore. Please have a look at the documentation and tell me
what you think about it, what could be improved and in what way. I
planned to write a wrapper around it with some dialogs but I am to
betriebsblind right now for a nice interface.


- The current implementation adds each fragment to the target buffer
immediately. This is abysmally slow: Using sh2html() on sh2html.nm
takes about 30 seconds on my notebook. Using it on the resulting
sh2html.nm.html takes more than 20 Minutes.

There is another implementation (look for the '^#'), doing the whole
thing in memory and inserting only the result. That speeds it up
considerably (4 seconds for sh2html.nm), but uses insane amounts of
memory (more than the ~700MB I can offer for sh2html.nm.html). I'm
pretty sure that this memory is not freed after the macro ends.

The question here: Does someone see a way to speed things up without
supporting the memory chip industry? (For the developers: Is there
something wrong with the macro interpreter's memory handling?)


- Still missing is a line mode, which would allow easy tab handling. I
see some follow-ups there and don't have the time at the moment.
Someone?


Thorsten
--=20
When machines and computers, profit motives and property rights are
considered more important than people; the giant triplets of racism,
militarism, and economic exploitation are incapable of being conquered.
    - Martin Luther King

["sh2html.nm" (text/plain)]

#   sh2html()
#
#   Version 2.3
#   Copyright 2002 Thorsten Haude
#   HTML validification by Joor Loohuis
#
#   This is free software; you may modify and redistribute it under
#   the terms of the GNU General Public License, Version 2.
#   (http://www.gnu.org/licenses/gpl.html)
#
#   The single parameter is an array containing the following keys:
#       source      source file name; relative pathes ('^[^/]') are prepended
#                   by $PWD (mandatory)
#       html        html file name; relative pathes ('^[^/]') are prepended
#                   by source's dirname (defaults to source".html")
#       css         css file name or "INLINE" for inline css; relative pathes
#                   ('^[^/]') are prepended by source's dirname (defaults to
#                   source".css")
#       numbered    set for applying line numbers (defaults to unset)
#       mode        language mode to use (defaults to whatever your NEdit
#                   comes up with for the source file)
#
define sh2html
{
    #   Setting up options
    if ($n_args != 1)
    {
        #    Wrong number of arguments
        beep()
        return ""
    }
    params = $1

    if ("source" in params)
    {
        sourceFile = params["source"]
        if (search_string(sourceFile, "^/", 0, "regex") == -1)
        {
            #   relative path, prepend by $filepath
            sourceFile = $filepath "/" sourceFile
        }
    } else
    {
        #   required key missing
        beep()
        return ""
    }
        
    if ("html" in params)
    {
        htmlFile = params["html"]
    } else
    {
        htmlFile = sourceFile ".html"
    }

    if ("css" in params)
    {
        cssFile = params["css"]
    } else
    {
        cssFile = sourceFile ".css"
    }

    if ("numbered" in params)
    {
        numbered = 1
    } else
    {
        numbered = 0
    }

    #   Begin and end of the fragment to work with
    cursorPos = 0
    fragmentEnd = 1

    #   Array to keep the names of all styles already in CSS
    stylesInCSS = $empty_array

    #   Setup some bits which will become part of the document
    tab = "    "
    date = chomp(shell_command("date +%Y-%m-%d", ""))
    myVersion = "2.3"

    #   Set up HTML templates
    htmlHeader = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 \
                Transitional//EN\"\n"\
            "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"\
            "<html>\n"\
            "<head>\n"\
            "    <title><filename></title>\n"\
            "    <meta name=\"keywords\" content=\"NEdit\" />\n"\
            "    <meta name=\"date\" content=\"" date "\" />\n"\
            "    <meta name=\"generator\" content=\"NEdit/sh2html " myVersion "\" \
                />\n"\
            "    <meta http-equiv=\"content-type\" content=\"text/html; \
charset=iso-8859-15\" />\n\n"\  "    <css>\n"\
            "    <link rel=\"shortcut icon\" \
href=\"http://www.nedit.org/download/logos/16-t.png\" />\n"\  "</head>\n\n"\
            "<body>\n"\
            "<pre><code>\n"
    htmlHeader = replace_in_string(htmlHeader, "<filename>", basename(sourceFile))
    htmlFooter = "\n</code></pre>\n</body>\n</html>\n"
    codeTemplate = "<span class=\"<style>\"><content></span>"

    #   Set up CSS templates
    if (cssFile == "INLINE")
    {
        cssHeader = "<style content=\"text/css\">\n        code {font-family: \
Courier, 'Courier New', monospace}\n"  } else
    {
        cssHeader = "/* Syntax Highlighting style sheet for file " \
basename(sourceFile) " */\n"\  "/* Created by NEdit and sh2html() */\n"\
                "code\n"\
                "{\n"\
                "    font-family: Courier, 'Courier New', monospace\n"\
                "}\n\n"
    }

    #   Open source file
    open(sourceFile)
    focus_window(sourceFile)
    if ("mode" in params)
    {
        set_language_mode(params["mode"])
    }

    #   for numbered output, learn the line number's width
    #   and insert first line number
    if (numbered != 0)
    {
        #   Find out the source's last line
        set_cursor_pos($text_length)
        lastLine = $line
        set_cursor_pos(0)
        
        lineDigits = length(lastLine)

        htmlHeader = htmlHeader " " padNumber(numbered++, lineDigits) " "
    }

    #   Create html file
    new()
    focus_window("last")
    set_language_mode("SGML HTML")
    save_as(htmlFile)

    #   let's get rollin'
#    htmlString = htmlHeader
    focus_window(htmlFile)
    insert_string(htmlHeader)
    cssString = cssHeader
    focus_window(sourceFile)

    while (cursorPos < $text_length - 1)
    {
        #   New fragment
        fragmentStart = cursorPos
        pattern = get_pattern(cursorPos)

        #   Get rid of those spaces in the style's names
        style = get_style(cursorPos)
        styleName = replace_in_string(style["style"], "\\s", "_", "regex", \
                "copy")
        
        if (get_character(fragmentStart) == "\n")
        {
            #   Newlines are a special case, we don't want them to be in a
            #   <span> so that the resulting document looks nicer and we can
            #   work on lines.
            fragmentEnd = fragmentStart + 1
            htmlFragment = "\n"

            if (numbered != 0)
            {
                htmlFragment = htmlFragment " " padNumber(numbered++, lineDigits) " "
            }
        } else
        {
            #   Get the next fragment
            patternEnd = fragmentStart + pattern["extension"]
            lineEnd = max(search("$", fragmentStart, "regex"), 0)
            fragmentEnd = min(patternEnd, lineEnd)
            sourceFragment = get_range(fragmentStart, fragmentEnd)

            #   Replace HTML special characters in the source
            sourceFragment = replace_in_string(sourceFragment, "&", "&amp;", \
                    "copy")
            sourceFragment = replace_in_string(sourceFragment, "<", "&lt;", \
                    "copy")
            sourceFragment = replace_in_string(sourceFragment, ">", "&gt;", \
                    "copy")

            #   Put the important things in
            htmlFragment = replace_in_string(codeTemplate, "<style>", \
                    styleName, "copy")
            htmlFragment = replace_in_string(htmlFragment, "<content>", \
                    sourceFragment, "copy")

            #   Convert tabs to spaces
            htmlFragment = replace_in_string(htmlFragment, "\t", tab, "regex", \
                    "copy")
        }

        #   Put into the result string what we came up with
#        htmlString = htmlString htmlFragment
        focus_window(htmlFile)
        insert_string(htmlFragment)

        #   Let's make sure that each style is only added once to the CSS
        if (!(styleName in stylesInCSS))
        {
            stylesInCSS[styleName] = ""
            if (cssFile == "INLINE")
            {
                cssString = cssString createCSS(style, "INLINE")
            } else
            {
                cssString = cssString createCSS(style)
            }
        }

#
        focus_window(sourceFile)

        #   Next part, please
        cursorPos = fragmentEnd
    }

    #   Where do we dump the CSS inforation?
    if (cssFile == "INLINE")
    {
        #   insert CSS in HTML file
#        htmlString = replace_in_string(htmlString, "<css>", \
#                cssString "    </style>")
        focus_window(htmlFile)
        t_print($file_name "\n")
        replace("<css>", cssString "    </style>", "literal", "wrap")
        set_cursor_pos($text_length)
    } else
    {
        #   Put CSS in its own file and put link in HTML file
        new()
        focus_window("last")
        set_language_mode("CSS")
        save_as(cssFile)
        insert_string(cssString)
        cssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" cssFile "\" \
/>" #        htmlString = replace_in_string(htmlString, "<css>", cssLink)
        focus_window(htmlFile)
        replace("<css>", cssLink)
    }

    focus_window(htmlFile)
#    insert_string(htmlString)
    insert_string(htmlFooter)
}

#   createCSS()
#
#   Helper function for sh2html()
#
#   Parameters are:
#       $1  Style array as returned by get_style()
#       $2  (optional) Flag "INLINE" to produce smaller footprint result
#
define createCSS
{
    if ($n_args == 1)
    {
        neditStyle = $1
        inline = 0
    } else if ($n_args == 2 && $2 == "INLINE")
    {
        neditStyle = $1
        inline = 1
    } else
    {
        #    Wrong number of arguments
        beep()
        return ""
    }

    #   Get rid of those spaces in the style's names
    styleName = replace_in_string(neditStyle["style"], "\\s", "_", "regex", "copy")

    #   Use a slim template for inline CSS
    if (inline == 1)
    {
        cssTemplate = "        .<name> {color: <color>; font-weight: <bold>; \
font-style: <italic>;}\n"  } else
    {
        cssTemplate = ".<name>\n"\
                "{\n"\
                "    color: <color>;\n"\
                "    font-weight: <bold>;\n"\
                "    font-style: <italic>;\n"\
                "}\n\n"
    }

    #   Put in name ...
    cssStyle = replace_in_string(cssTemplate, "<name>", styleName, "copy")
    
    #   ... color ...
    cssStyle = replace_in_string(cssStyle, "<color>", neditStyle["rgb"], "copy")

    #   ... boldness ...
    if (neditStyle["bold"])
    {
        fontWeight = "bold"
    } else
    {
        fontWeight = "normal"
    }
    cssStyle = replace_in_string(cssStyle, "<bold>", fontWeight, "copy")

    #   ... and italicity.
    if (neditStyle["italic"])
    {
        fontStyle = "italic"
    } else
    {
        fontStyle = "normal"
    }
    cssStyle = replace_in_string(cssStyle, "<italic>", fontStyle, "copy")

    return cssStyle
}

#   padNumber()
#
#   Helper function for sh2html()
#
#   Parameters are:
#       $1  a number
#       $2  the length the number is padded to
#
define padNumber
{
    if ($n_args != 2)
    {
        #    Wrong number of arguments
        beep()
        return ""
    }

    number = $1
    totalLen = $2
    numberLen = length(number)
    pad = ""

    for (i = 0; i < totalLen - numberLen; i++)
    {
        pad = pad " "
    }

    return pad number
}

#
#   chomp() and basename() are part of perl.nm
#
#define chomp
#{
#    if ($n_args != 1)
#    {
#        beep()
#        return
#    }
#
#    tmp = replace_in_string($1, "(.*)(?:\n$)", "\\1", "regex")
#    if (tmp == "")
#    {
#        return $1
#    } else
#    {
#        return tmp
#    }
#}
#
#define basename
#{
#    if ($n_args != 1)
#    {
#        beep()
#        return
#    }
#    
#    fullname = $1
#    len = length(fullname)
#
#    begin = search_string($1, "/", len, "backward")
#    result = substring(fullname, begin + 1, len)
#
#    return result
#}
#
#define dirname
#{
#    if ($n_args != 1)
#    {
#        beep()
#        return
#    }
#    
#    fullname = $1
#    len = length(fullname)
#
#    end = search_string($1, "/", len, "backward")
#    result = substring(fullname, 0, end)
#
#    return result
#}
#


[Attachment #6 (application/pgp-signature)]

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic