Lunarpedia talk:Autostub2

From Lunarpedia
Jump to: navigation, search

Database

James Rogers is working on improving the data set. -- Strangelv 10:51, 2 February 2007 (PST)


Source Code

Revision as of Test 2, with successful generation of an XML file for importation. Please note that the <PRE> tag fails to stop the ampersand markup from being converted into symbols that the importer will choke on if the code is run as is. -- Strangelv 10:51, 2 February 2007 (PST)


#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#
#                             #
# Element Autostub Generator  #
#                             #
# Public Domain               #
#                             #
#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#


def DePluralize(wordish):
    wordish=wordish.strip()
    #print "WORDISH=", wordish
    #print "TRUNC=", wordish[:-2], wordish[:-1]
    if   wordish[-2:] == "es": return wordish[:-2]
    elif wordish[-1:] ==  "s": return wordish[:-1]
    elif 1: raise InputError, "No plural???"
    

def XMLproof(textish):
    outp = ""
    for q in textish:
        if   q == "<": outp += "<"
        elif q == ">": outp += ">"
        elif q == "&": outp += "&"
        elif q == '"': outp += """
        elif q == ":": outp += "%3A"
        else: outp += q
    return outp

def ShellBRK(shell):
    # parse into chunks
    shells = shell.split(" ")
    
    # escape if only one chunk
    if len(shells) == 1: return shell
    
    # reassemble with breaks
    last = "0"
    outp = ""
    for q in shells:
        if last == "0": outp = q+" "; last=q[0]
        else:
             if q[0] == last: outp += q+" "; last = q[0]     #; print "nobreak"
             else: outp += "<BR/>"+q+" "; last = q[0]  #; print last, q[0]
    return outp        
                

def TmpBRK(stringish):
    # parse into chunks
    textish = stringish.split(" ")
    
    # escape if only one chunk
    if len(textish) == 1: return stringish
    
    # reassemble with breaks
    last = "0"
    outp = ""
    for q in textish:
        if last == "0": outp = q; last = q
        else:
             outp += "<BR/>"+q
    return outp        


def linebrk(listish):
    newlist = []
    for q in listish:
        newlist += [q+"\n"]
    return newlist 

def dostub(thisentry): 
    """Create stub article from database
    """
    #                                     
    # Set up variables to generate        
    # the element article stub            
    #                                     
    # database: 00 Atomic number          
    #           01 Symbol                 
    #           02 Name                   
    #           03 Availability           
    #           04 Importance             
    #           05 Group                  
    #           06 Period                 
    #           07 Series                 
    #           08 Normal Phase           
    #           09 Mass                   
    #           10 Density                
    #           11 Melt Point             
    #           12 Boil Point             
    #           13 Above Prev             
    #           14 Above                  
    #           15 Above Next             
    #           16 Prev                   
    #           17 Next                   
    #           18 Below Prev             
    #           19 Below                  
    #           20 Below Next             
    #           21 Atomic Radius (pm)     
    #           22 Bohr Atomic Radius (pm)
    #           23 Covalent Radius (pm)   
    #           24 Van der Waals Radius (pm)
    #           25 Ionic Radius (pm)      
    #           26 1st Ion Potential (energy required to remove one electron, forming a cation, measured in eV 
    #           27 Electron Shell Configuration 
    #           28 Electrons per Shell    
    #           29 Electronegativity      
    #           30 Electron Affinity      
    #           31 Oxidation States       
    #           32 Magnetism              
    #           33 Crystal Structure      
    #                                     
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%%#%#%#%#%#                             
                                         #                             
    name    = thisentry[ 2]              # name of element (Iron)      
    symbol  = thisentry[ 1]              # element symbol  (Fe)        
    number  = thisentry[ 0]              # atomic number   (26)        
    mass    = thisentry[ 9]              # atomic mass     (55.845)    
    gprev   = thisentry[14]              # up one          (none)      
    gnext   = thisentry[19]              # down one        (Ruthenium) 
    imp     = thisentry[ 4]              # importance      (important) 
    avail   = thisentry[ 3]              # availability    (ubiquitous)
    group   = thisentry[ 5]              # in this group   (8)         
                                         #                             
    phase   = thisentry[ 8]              # normal phase                
    density = thisentry[10]              # density                     
    melt    = thisentry[11]              # melt point                  
    boil    = thisentry[12]              # boil point                  
    period  = thisentry[ 6]              # period                      
    series  = thisentry[ 7]              # series                      
    arad    = thisentry[21]              # atomic radius               
    brad    = thisentry[22]              # Bohr atomic radius          
    crad    = thisentry[23]              # covalant radius             
    vrad    = thisentry[24]              # Van der Waals radius        
    irad    = thisentry[25]              # Ionic Radius                
    fionp   = thisentry[26]              # First Ion Potential         
    elesh   = thisentry[27]              # electron shell config       
    epersh  = thisentry[28]              # electrons per shell         
    eneg    = thisentry[29]              # electronegativity           
    eaff    = thisentry[30]              # electron affinity           
    oxid    = thisentry[31]              # oxidation states            
    mag     = thisentry[32]              # magnetism                   
    cry     = thisentry[33]              # crystalline structure       
                                         #                             
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%%#%#%#%#%#                             
    #                                     
    # start generating the article here:  
    #                                     
    to_out  = []
    to_out += ["{{Test Element      "]
    to_out += ["name=" + name]
    to_out += ["symbol=" + symbol]
    to_out += ["available=" + avail]
    to_out += ["need=" + imp]
    to_out += ["number=" + number]
    if (mass[0] == "["):
       to_out += ["mass=" + mass] 
    else: 
         to_out += ["mass=" + str(float(mass))]
    to_out += ["group=" + group]
    to_out += ["period=" + period]
    to_out += ["phase=" + phase]
    to_out += ["series=" + series]
    to_out += ["density=" + density]
    to_out += ["melts=" + TmpBRK(melt)]
    to_out += ["boils=" + TmpBRK(boil)]
    #to_out+= ["isotopes="]
    to_out += ["prior="+get_element(number,-1)]
    to_out += ["next="+get_element(number,1)]
    to_out += ["above="+get_element(gprev,0)]
    to_out += ["aprior="+get_element(gprev,-1)]
    to_out += ["anext="+get_element(gprev,1)]
    to_out += ["below="+get_element(gnext,0)]
    to_out += ["bprior="+get_element(gnext,-1)]
    to_out += ["bnext="+get_element(gnext,1)]
    #for q in to_out: print q
    #print; print

    to_out += ["radius=" + arad] 
    to_out += ["bohr=" + brad]
    to_out += ["covalent=" + crad]
    to_out += ["vdwr=" + vrad]
    to_out += ["irad=" + irad]
    to_out += ["ipot=" + fionp]
    # print elesh
    #to_out+= ["econfig=" + ShellBRK(elesh)]
    to_out += ["econfig=" + XMLproof(elesh)]
    to_out += ["eshell=" + epersh]
    to_out += ["enega=" + eneg]
    to_out += ["eaffin=" + eaff]
    to_out += ["oxstat=" + oxid]
    to_out += ["magn=" + mag]
    to_out += ["cryst=" + cry]
    
    
    
    # Tidily install pipes for the table 
    to_out = tidypipes(to_out)
    
    to_out += ["}}"]

    to_out +=["{{Script Test}}"]    
    # print "VARS=", name, series, group, DePluralize(series)
    if group != "19":
       #print "name is ", type(name)     
       #print "series is ", type(series)     
       #print "series_de is ", type(DePluralize(series))     
       #print "group is ", type(group)     
       to_out += ["'''"+name+"''' is a "+DePluralize(series)+" in group "+group+"."]
    else: 
       to_out += ["'''"+name+"''' is a "+DePluralize(series)+" metal."]        
    if cry:
       to_out += ["It has a "+cry+" crystalline structure."]
    #print mass, mass[0]
    if mass[0] == "[":
       #print "RAD" 
       to_out += ["It does not have any isotopes considered to be natural.  Its longest-lived known isotope has an atomic number of "+mass[1:-1]] 
    to_out += ["<BR/><BR/>\n\n"]
    to_out += [""]
    to_out += [""]
    to_out += [""]
    to_out += ["{"+"{Test Autostub}"+"}"]
    # to_out += docategories(typecat) #["[[Category:"+typecat+"]]"]
    to_out += ['<DIV ID="catlinks"><P CLASS="catlinks">[[Special:Categories&article=Lunarpedia%3AAutostub2_test7a|Categories]]: [[:Category:Stubs|Stubs]] | [[:Category:Elements|Elements]]']
    to_out += ["</P></DIV><!-- fake category box -- see commented out text above for actual script output -->"]
    to_out += ["<!-- Generated by a late alpha version of Autostub2 -->"]

    to_out = linebrk(to_out)

    return to_out




# def main():
#     # load database
#     didxml = templatetop()
#     for q in database: didxml += dostub(q) + templatemid(); print ".",
#     didxml += templatefin()
#     # save didxml


def tidypipes(table):
    """takes a list oy strings, finds the longest one, and tidily adds pipes to the right of each line.

    maxlen: maximum length
    table:  input list of strings
    table2: output list of strings
    """
    maxlen = 0
    for q in table:
        if len(q)>maxlen: maxlen = len(q)
    table2 = []
    for q in table:
        qq = q
        while (len(qq)<maxlen):
              qq += " "
        table2 += [qq+" |"]
    return table2

def LtBlau(textish):
    return "<FONT color="#7F7FFF">"+textish+"</FONT>"

def replacer(stringie, old, new):
    """
    """
    name2=""
    for q in stringie:
        if q == old: name2 += new
        else: name2 += q
    return name2

def refbreak(ref):
    """
    """
    return replacer(ref,";",";<BR/>")


def get_element(number,shift):
    """if a number is a number, return the element symbol corresponding to number+shift
    """
    global DB
    #if number = "N/A": return "N/A"
    #print number, shift
    try:
        number = int(number)
    except: return "<SMALL><FONT color="#7F7F7F">N/A</FONT></SMALL>"
    if number+shift>118: return "<SMALL><FONT color="#7F7F7F">N/A</FONT></SMALL>" #; print ">=118"
    if number+shift<1: return "<SMALL><FONT color="#7F7F7F">N/A</FONT></SMALL>" #; print "<=1"
    #print "no special cases"
    #print "[[Mediawiki:Sandbox|"+str(DB[number+int(shift)][1])+"]]"
    return "[[Mediawiki:Sandbox|"+LtBlau(str(DB[number+int(shift)][1]))+"]]"

##def ():
##    """
##    """
##    return "() not implemented"


def StartXML():
    out  = [['<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">\n']]
    out +=  ['    <siteinfo>\n']
    out +=  ['        <sitename>Lunarpedia</sitename>\n']
    out +=  ['    </siteinfo>\n']
    return out

def EndXML():
    out  = [['</mediawiki>\n']]
    return out

def ArtXML(title, contrib, date, text):
    '''XML markup for article in file
    title --   title of article
    contrib -- name of script (ie Autostub2)
    text --    the article
    '''
    out  = [['    <page>\n']]
    out +=  ['        <title>'+title+'</title>\n']
    out +=  ['        <revision>\n']
    out +=  ['            <timestamp>'+date+'</timestamp>']
    out +=  ['            <contributor>\n']
    out +=  ['                <username>'+contrib+'</username>\n']
    out +=  ['            </contributor>\n']
    #out += ['            <text xml:space="preserve">'+text+'</text>']
    out +=  ['            <text xml:space="preserve">']
    out +=  text
    out +=  ['</text>\n']
    out +=  ['        </revision>\n']
    out +=  ['    </page>\n']
    return out

# TSV sorter
# Public Domain

def TSVinput(filename):
    """tab separated database parser
    """
    # Open filename
    tsv=open(filename)
    # Convert to list
    ## get list of lines
    biglist = tsv.readlines()
    #for q in tsv:
    #    biglist += q
    tsv.close()
    ## parse lines
    #for q in biglist: print q
    newlist = []
    for q in biglist:
        newlist += [q.split("\t")]
        # print max(max(newlist))
    # return list
    return newlist


#Item = ["28","Ni","Nickel","58.6934000000","N/A","46","","","10"]
#Item = ["26","Fe","Iron","55.8450000000","N/A","44","important","ubiquietous","8"]
#Atomic number, symbol, name, atomic mass, previous in group, next in group, importance, availahbility, group number


#What=dostub(Item)

DB=TSVinput("/home/Luna/Elements_H206.csv")

gotit =  StartXML()
#print DB[26][2]
#print DB[6][2]
gotit += ArtXML("Lunarpedia:Autostub2 test 7a","Autostub2","2007-02-09T12:00:00Z",dostub(DB[  1]))
gotit += ArtXML("Lunarpedia:Autostub2 test 7b","Autostub2","2007-02-09T12:00:00Z",dostub(DB[ 72]))
gotit += ArtXML("Lunarpedia:Autostub2 test 7c","Autostub2","2007-02-09T12:00:00Z",dostub(DB[118]))
gotit += EndXML()


def Stringify(listish):
    stringish = "" 
    for q in listish:
        if type(q) == type('str'):
           stringish += q
           #print "str"
        elif type(q) == type([]):
           stringish += Stringify(q)
           #print "list"
        else: 
             print type(q)
             print q
             raise TypeError, "non-string non-list!!!"
    return stringish   

     

outdone = Stringify(gotit)

#print outdone

do_xml=open('/home/Luna/autostub2test7.xml', 'w')
do_xml.write(outdone)
do_xml.close()

# shell = "1s2 2s2 2p6 3s2 3p6 3d10 4s2 4p6 4d10 4f14 5s2 5p6 5d10 5f14 6s2 6p6 6d10 7s2 7p1"
# print ShellBRK(shell)