ExoDictionary:Autostub3/Source

From ExoDictionary
Jump to: navigation, search

Version of Autostub that ran H through Z

#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#
#                                                 #
# Autostub 3                                      #
#                                                 #
# Definition Autostub Generator                   #
#                                                 #
# PUBLIC DOMAIN                                   #
#                                                 #
#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#

TESTNO = "Fix1" #? lost count here ; should now be superceeded

NewCats = []

import re # an end to a long run of using only built-in functions
##import random # only for debugging attempt

#Parser
def Parser(List):
    #Setup variables and buffers
    ListPosition = 0  # position in List; forgot why I needed this
    TagID  = ""       # buffer for the current tag
    LinkID = ""       # buffer for the link name
    Name   = ""       # this is hopefully the correct name for the article
    Output = ""       # output text
    mode   = "Text"   # what mode we're in
    Flag   = ""       # Is this article flagged for something?
    Named  = 0        # Is this already named?
    Defno  = 1        # Which definition?
    
    #Loop
    for q in List:
        # Text Mode -- dumps text directly to output buffer until put into 
        # another mode; ends with a '<' and switches to tag mode
        if mode == "Text":
           if q == "<":
              mode = "Tag"
              TagID = ""
           else: Output += q
        
        # Tag mode -- stores text into TagID buffer; ends with a '>', 
        # identifies the ID buffer and switches to the appropriate mode or
        # outputs the appropriate text and returns to Text mode.
        # 'A' -- enter A mode
        # 'STRONG' -- enter Strong mode
        # 'I' or '/I' -- output "''"
        # 'B' or '/B' -- output "'''"
        # 'MATH' or '/MATH' -- output '<math>' or '</math>'
        # 'SUP' or '/SUP' -- output '<sup>' or '</sup>'
        # 'SUB' or '/SUB' -- output '<sub>' or '</sub>'
        # 'DD' -- output '<BR/><BR/>' 
        # 'BR' or 'BR/' -- output '<BR/>'
        # 'CITE' or '/CITE' -- output 'CITE' or '/CITE'
        # 'IMG' -- set missing image tag; return name of missing image
        # P -- replace with nothing
        # unknown: return verbatim in angle brackets and switch to Text mode
        
        #Reworked with regular expressions to address case inconsistencies in 
        #source hmtl after the letter C
        elif mode == "Tag":
             if q == ">":
                # "A"
                if ReadTag(TagID) == "A":
                   mode = "A"
                #"STRONG" or "B" (but will this hack work?)
                elif ((ReadTag(TagID) == "STRONG") or
                      (ReadTag(TagID) == "B")):       
                      if not Named:  
                         mode = "Strong"
                      else: 
                           mode = "Text"  # see if I get away with this... 
                           ##mode = "End" # previous solution here
                           Output += "'''"  
                #"I"        
                elif ReadTag(TagID) == "I":
                     Output += "''"
                     mode = "Text"   
                ###"B" -- merging with STRONG mode to try to parse 'H' definitions
                ##elif ReadTag(TagID) == "B":
                ##     Output += "'''"
                ##     mode = "Text"   
                #"/I"    
                elif ReadTag(TagID) == "/I":
                     Output += "''"
                     mode = "Text"   
                #"/B"                
                elif ReadTag(TagID) == "/B":
                     Output += "'''"
                     mode = "Text"   
                #"CITE"
                elif ReadTag(TagID) == "CITE":
                     Output += "<ref>"
                     mode = "Text"   
                     Flag += "R"
                #"/CITE"
                elif ReadTag(TagID) == "/CITE":
                     Output += "</ref>"
                     mode = "Text"   
                #"/A"
                elif ReadTag(TagID) == "/A":
                     mode = "Text" 
                #"P"
                elif ReadTag(TagID) == "P":
                     Output += "<BR/>"
                     mode = "Text" 
                #"MATH"
                elif ReadTag(TagID) == "MATH":
                     Output += "<"+TagID.lower()+">"
                     mode = "Text"   
                #"/MATH"
                elif ReadTag(TagID) == "/MATH":
                     Output += "</math>"
                     mode = "Text"   
                #"DD"
                elif ReadTag(TagID) == "DD":
                     Output += "<BR/>" # '''"+str(Defno)+".'''"
                     Defno += 1
                     mode = "Text"   
                #"IMG"
                elif ReadTag(TagID) == "IMG":
                     Output += "'''Missing Image:"+TagID+"'''"
                     mode = "Text" 
                     Flag += "I"
                #?
                else:
                     Output += "<"+TagID+">"
                     mode = "Text" 
             else: TagID += q
            
        # A mode -- stores text into LinkID buffer; ends with a '<', outputs 
        # a Wikilink version of the text accumilated in its buffer.
        elif mode == "A":
             if q == "<":
                if LinkID != "":
                   Output += "[["+CapIt(LinkID)+"|"+LinkID+"]]"
                   mode = "End"
                   LinkID = ""
             else:
                  LinkID += q      
                                         
        # Strong mode -- stores text into Name buffer; ends with a '<' and 
        # capitalizes 
        elif mode == "Strong":
             if q == "<":
                Name = CapIt(Name)
                mode = "End"
                Named = 1
             else:
                  Name += q      
                                                 
        # Weak mode -- when the boson is hit by a neutrino it changes the 
        # flavor of the nearest... Oh. Sorry, wrong model.
        
        # End mode -- ignores all text until the ending '>' is recieved; 
        # switches to Text mode.  Nested tags are not supported and it is
        # assumed that any new tag is the correct end tag.
        elif mode == "End":
             if q == ">":
                mode = "Text"
                                                
        # Um, what mode are we in again?        
        else: raise TypeError, "Parser internal error: WTF is "+mode+" mode??"
        
        # Incriment ListPosition
        # Q: Do I still need this or did I eliminate its usefulness?
        ListPosition += 1
    
    return [Output, Name, Flag]

def Stringify(listish):
    stringish = "" 
    for q in listish:
        if type(q) == type('str'):
           stringish += q
           #print "str"
        elif type(q) == type([]):
           stringish += Stringify(q)
           #print "list"
        else: 
             print type(q)
             print q
             raise TypeError, "non-string non-list!!!"
    return stringish  

def KingMe(stringy):
    '''capitalizer that doesn't uncapitalize'''
    return stringy[0].upper()+stringy[1:]

def KingMe2(stringy):
    '''capitalizer that doesn't uncapitalize, but doesn't capitalise some things'''
    if   stringy.lower() == "to":    return stringy
    elif stringy.lower() == "from":  return stringy
    elif stringy.lower() == "and":   return stringy
    elif stringy.lower() == "of":    return stringy
    elif stringy.lower() == "for":   return stringy
    elif stringy.lower() == "with":  return stringy
    elif stringy.lower() == "in":    return stringy
    elif stringy.lower() == "on":    return stringy
    elif stringy.lower() == "by":    return stringy
    elif stringy.lower() == "at":    return stringy
    elif stringy.lower() == "auf":   return stringy
    elif stringy.lower() == "about": return stringy
    elif stringy.lower() == "until": return stringy
    elif stringy.lower() == "into":  return stringy
    else:                            return stringy[0].upper()+stringy[1:]

def ReadTag(rawtext):
    # should return the leftmost part of the string in SCREAMING CAPITALS.
    return rawtext.split(' ',1)[0].upper().strip('\\. ')

def CapIt(Name):
    # Needs to consistently return a Capitalized Form of whatever name is 
    # plugged into it
    Name    = Name.split()
    CapName = ""
    Terms   = 0
    for q in Name:
        if Terms == 0:
           CapName = KingMe2(q.strip(',.'))
           Terms = 1
        else:
             CapName = CapName + " " + KingMe2(q.strip(',.'))
    return CapName   

def CapAll(Name):
    # Needs to consistently return a Capitalized Form of whatever name is 
    # plugged into it -- calls the original KingMe()
    Name    = Name.split()
    CapName = ""
    Terms   = 0
    for q in Name:
        if Terms == 0:
           CapName = KingMe(q.strip(',.'))
           Terms = 1
        else:
             CapName = CapName + " " + KingMe(q.strip(',.'))
    return CapName   

def FindRedir(gunk):
    link      = []
    foundlink = 0
    buffer    = ""
    mode      = "ignore"
    Count     = 0 # was added only for debugging
    Modemap   = "" # also added only for debugging
    for q in gunk:
        if mode == "ignore":
           Modemap += "i" 
           if q == "[":
              mode = "maybe"
        elif mode == "maybe":
             Modemap += "m" 
             if q == "[":
                mode = "link"
             else: mode = "ignore" #; print "FAILED LINK" 
        elif mode == "link":
             Modemap += "L" 
             if ((q == "|") or (q == "]")):
                foundlink += 1
                mode = "ignore"           
                link += [buffer]
                buffer = ""
                # print "found link at "+str(Count)
             else: buffer += q 
        Count += 1    
    #print Modemap    
    #print "foundlink ==", foundlink, link
        
    if foundlink: 
       if len (link) > 1: 
          # print link
          themax = 0    # length of longest
          theout = None # which is longest
          for q in link:
              # print len(q), themax
              if len(q) > themax:
                 themax=len(q)
                 theout = q
          # print theout
          return theout   
       else: return link[0]
    else: return None    

# Leading-Space-Remove-O-Mat, embedded version
def deforespace(filish):
    ##nameit = raw_input("filename: ")
    ##
    ##tsv=open(nameit)
    ##Raw = tsv.readlines()
    ##tsv.close()
    ##
    filed = []
    ##
    for q in filish: 
        filed += [q.lstrip()]
    ##    
    ##qqq=open(nameit+"_", 'w')
    ##for q in qq:
    ##    qqq.write(q)
    ##qqq.close()
    return Stringify(filed)

#Taken from Hosta... Welcome guest, ACCWOM2
# three functions to help take klutz behind keyb out of picture better
def Listify(stringy):
    listy = []
    for q in stringy:
        listy += q
    return listy

#print Listify("Peter Piper Picked a Peck of Prickly Peppercorns")

def Dehyphenation(stringy):
    listy = Listify(stringy) # convert to mutable type
    newish = []              # anyone got any gator hypen ade?
    qq = 0                   # where are we?
    foundat = []             # where was it?
    
    for q in listy:
        if q == "-":
           foundat += [qq]     # it was here
           newish += " "     # it's getting dehyphenated
        else:
             newish += q
        qq += 1
    
    # return to presumed string form and show where
    # the hyphens used to be
    return (Stringify(newish).strip(),foundat)

def Rehyphenation(stringy,findat):
    listy = Listify(stringy) # convert to mutable type
    newish = []              # ahhhh... much better.
    qq = 0                   # where are we?
    
    for q in listy:
        done = 0             # are we there yet?
        for qqq in findat:
            # print qq,qqq, (not done)
            if ((qq == qqq) and (not done)):
               # print "hit"
               newish += "-"
               done = 1 
        if (not done):
             newish += q
        qq += 1
    
    # return to presumed string form with presumably added hyphens
    return (Stringify(newish))

# okay, four
def deparenth(stringy):
    """removes parentheticals and now with disequality!"""    
    #"""strips out everything ith and after the first sign of parentheses"""
    #return stringy.split("(")[0].strip()

    # strip out everything after the first sign of parentheses
    stringy = stringy.split("(")[0].strip()
    
    # if that's it, send it along 
    if not(stringy.count("=")): return stringy

    # if it starts with an equal sign, lose it
    if stringy[0] == "=": stringy=stringy[1:].strip()
    
    # strip out everything after the first sign of equality
    stringy = stringy.split("=")[0].strip()
    
    # We brighten up your picture and we send you on your way
    # So don't forget to turn to us, MTV, today!
    return stringy
    
    


#Get raw html
#RawML = raw_input()

##RawML = ""

##tsv=open('/home/Luna/Raw_SP-7_A.txt')
##RawML += tsv.read()
##tsv.close()

##tsv=open('/home/Luna/Raw_SP-7_B.txt_')
##RawML += tsv.read()
##tsv.close()

##tsv=open('/home/Luna/Raw_SP-7_C.txt_')
##RawML += tsv.read()
##tsv.close()
##
##tsv=open('/home/Luna/SP7-d.html_')
##RawML += tsv.read()
##tsv.close()
##
##tsv=open('/home/Luna/SP7-e.html_')
##RawML += tsv.read()
##tsv.close()
##
##tsv=open('/home/Luna/SP7-f.html_')
##RawML += tsv.read()
##tsv.close()
##
##tsv=open('/home/Luna/SP7-g.html_')
##RawML += tsv.read()
##tsv.close()


RawML = []

tsv=open('/home/Luna/SP7_h.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_i.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_j.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_k.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_l.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_m.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_n.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_o.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_p.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_q.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_r.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_s.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_t.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_u.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_v.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_w.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_x.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_y.html')
RawML += tsv.readlines()
tsv.close()

tsv=open('/home/Luna/SP7_z.html')
RawML += tsv.readlines()
tsv.close()


RawML = deforespace(RawML)

##raise Exception, "stopping here"

# Parse Page into slices using <DT> tags
#RawList = RawML.split('<DT>')
# regular expression module gets used here
RawList = re.split("<[Dd][Tt]>",RawML)

# print "Rawlist:", RawList

#Create ProtoArticles and Redirects and stuff them full of proto-articles
ProtoArticles = []
Redirects = []
bailout = 0
punchout = 0
for q in RawList:
    #print "RawList Process loop: q ==",q
    if q != "":
       artie = Parser(q)
       #print "KILROY IS HERE"
       #constellation definions now on topic
       #if artie[0].count("constellation"):
       #   print "constellation check removing entry:",artie[1]
       if (artie[0].count('=',0,5) or artie[0].upper().count('SEE',0,20)):
            #print "REDIR:",artie
            artie2 = FindRedir(artie[0])
            #print artie2
            # avoid_com_backpatch_offset_too_large = 0... wait
            # I can probably fix this by making the code less efficient via 
            # turning a few elifs into ifs and do a lot of useless checking.
            #
            # For the first time ever, I wish Python had GOTO.
            #
            # No go.  Embedding ACCWOM2 a little further down...
            
            if artie2:
               if   artie[1] == "": punchout = 1            
               #if   artie[1] == "ADF (abbr)": artie[1] = "ADF"            
               #elif artie[1] == "ADP (abbr)": artie[1] = "ADP"            
               #elif artie[1] == "AFC (abbr)": artie[1] = "AFC"            
               #elif artie[1] == "Andromeda (abbr And Andr)": artie[1] = "Andromeda"            
               #elif artie[1] == "Antlia (abbr Ant Antl.)": artie[1] = "Antlia"            
               #elif artie[1] == "APU (abbr)": artie[1] = "APU"            
               #elif artie[1] == "AU (abbr)": artie[1] = "AU"            
               #elif artie[1] == "Atomic Mass Unit (abbr Amu)": artie[1] = "Atomic Mass Unit";          
               #elif artie[1] == "Atomic Weight Unit (abbr Awu)": artie[1] = "Atomic Weight Unit"            
               #elif artie[1] == "Anti-g Suit": artie[1] = "Anti G Suit"            
               #elif artie[1] == "": artie[1] = ""            
               #elif artie[1] == "": artie[1] = ""            
               #elif artie[1] == "": punchout = 1
##               elif artie[1] == "Cae cael": punchout = 1
##               elif artie[1] == "Caelum (abbr Cae Cael)": artie[1] = "Caelum"
##               elif artie[1] == "Cam caml": punchout = 1
##               elif artie[1] == "Camelopardus (abbr Cam Caml)": artie[1] = "Camelopardus"
##               elif artie[1] == "Cancer (abbr Cnc Canc)": artie[1] = "Cancer"
##               elif artie[1] == "Canes Venatici (abbr Cvn C Ven)": artie[1] = "Canes Venatici"
##               elif artie[1] == "Canis Major (abbr CMa C Maj)": artie[1] = "Canis Major"
##               elif artie[1] == "Canis Minor (abbr CMi C Min)": artie[1] = "Canis Minor"
##               elif artie[1] == "Cap capr": punchout = 1
##               elif artie[1] == "Capricornus (abbr Cap Capr)": artie[1] = "Capricornus"
##               elif artie[1] == "Car cari": punchout = 1
##               elif artie[1] == "Cas cass": punchout = 1
##               elif artie[1] == "Cassiopeia (abbr Cas Cass)": artie[1] = "Cassiopeia"
##               elif artie[1] == "Cen cent": punchout = 1
##               elif artie[1] == "Centaurus (abbr Cen Cent)": artie[1] = "Centaurus"
##               elif artie[1] == "Centi (abbr c)": artie[1] = "Centi"
##               elif artie[1] == "CEP (abbr)": artie[1] = "CEP"
##               elif artie[1] == "Cep ceph": punchout = 1
##               elif artie[1] == "Cepheus (abbr Cep Ceph)": artie[1] = "Cepheus"
##               elif artie[1] == "Cetus (abbr Cet Ceti)": artie[1] = "Cetus"
##               elif artie[1] == "Chamaeleon (abbr Cha Cham)": artie[1] = "Chamaeleon"
##               elif artie[1] == "Cir circ": punchout = 1
##               elif artie[1] == "Circinus (abbr Cir Circ)": artie[1] = "Circinus"
##               elif artie[1] == "Circle of equal probability (abbr cep)": artie[1] = "Circle of equal probability"
##               elif artie[1] == "Circle Of Equal Probability (abbr CEP)": artie[1] = "Circle Of Equal Probability"
##               elif artie[1] == "Cma c maj": punchout = 1
##               elif artie[1] == "Cmi c min": punchout = 1
##               elif artie[1] == "Cnc canc": punchout = 1
##               elif artie[1] == "Coho (abbr)": artie[1] = "Coho"
##               elif artie[1] == "Col colm": punchout = 1
##               elif artie[1] == "Columba (abbr Col Colm)": artie[1] = "Columba"
##               elif artie[1] == "Coma Berenices (abbr Com Coma)": artie[1] = "Coma Berenices"
##               elif artie[1] == "Cor a": punchout = 1
##               elif artie[1] == "Cor b": punchout = 1
##               elif artie[1] == "Corona Borealis (abbr CrB Cor B)": artie[1] = "Corona Borealis"
##               elif artie[1] == "Correlation tracking and ranging (abbr cotar)": artie[1] = "Correlation tracking and ranging"
##               elif artie[1] == "Correlation Tracking And Ranging (abbr Cotar)": artie[1] = "Correlation Tracking And Ranging"
##               elif artie[1] == "Correlation tracking and triangulation (abbr cotat)": artie[1] = "Correlation tracking and triangulation"
##               elif artie[1] == "Correlation Tracking And Triangulation (abbr Cotat)": artie[1] = "Correlation Tracking And Triangulation"
##               elif artie[1] == "Corv": punchout = 1
##               elif artie[1] == "Corvus (abbr Crv Corv)": artie[1] = "Corvus"
##               elif artie[1] == "Cotar (abbr)": artie[1] = "Cotar"
##               elif artie[1] == "Cotat (abbr)": artie[1] = "Cotat"
##               elif artie[1] == "Cra cor a": punchout = 1
##               elif artie[1] == "Crater (abbr Crt Crat)": artie[1] = "Crater"
##               elif artie[1] == "Craterlets": punchout = 1
##               elif artie[1] == "Crb cor b": punchout = 1
##               elif artie[1] == "Crt crat": punchout = 1
##               elif artie[1] == "Crux (abbr Cru Cruc)": artie[1] = "Crux"
##               elif artie[1] == "Crt (abbr)": artie[1] = "Crt"
##               elif artie[1] == "Crv corv": punchout = 1
##               elif artie[1] == "Cutoff or cut-off": punchout = 1
##               elif artie[1] == "Cvn c ven": punchout = 1
##               elif artie[1] == "Cyg cygn": punchout = 1
##               elif artie[1] == "Cygnus (abbr Cyg Cygn)": artie[1] = "Cygnus"
##               elif artie[1] == "Fahrenheit temperature scale (abbr f)": artie[1] = "Fahrenheit temperature scale"
##               elif artie[1] == "Farad (abbr f)": artie[1] = "Farad"
##               elif artie[1] == "Femto (abbr f)": artie[1] = "Femto"
##               elif artie[1] == "Fermi (abbr f)": artie[1] = "Fermi"
##               elif artie[1] == "FM (abbr)": artie[1] = "FM"
##               elif artie[1] == "Fm/am (abbr)": artie[1] = "Fm/am"
##               elif artie[1] == "Fm/fm (abbr)": artie[1] = "Fm/fm"
##               elif artie[1] == "Fm/pm (abbr)": artie[1] = "Fm/pm"
##               elif artie[1] == "Focus (plural focuses)": artie[1] = "Focus"
##               elif artie[1] == "Foot (abbr ft)": artie[1] = "Foot"
##               elif artie[1] == "Foot-candle (abbr ft-c)": artie[1] = "Foot-candle"
##               elif artie[1] == "Foot-lambert (abbr ft-l)": artie[1] = "Foot-lambert"
##               elif artie[1] == "For forn": punchout = 1
##               elif artie[1] == "Fornax (abbr for forn)": artie[1] = "Fornax"
##               elif artie[1] == "Frequency modulation (abbr fm)": artie[1] = "Frequency modulation"
##               elif artie[1] == "Frequency-shift keying (abbr fsk)": artie[1] = "Frequency-shift keying"
##               elif artie[1] == "FSK (abbr)": artie[1] = "FSK"
##               elif artie[1] == "G or g": punchout = 1
##               elif artie[1] == "Gas constant (symbol r r*)": artie[1] = "Gas constant"
##               elif artie[1] == "Gaussian constant (symbol": artie[1] = "Gaussian constant"
##               elif artie[1] == "GCI (abbr)": artie[1] = "GCI"
##               elif artie[1] == "Gem gemi": punchout = 1
##               elif artie[1] == "Gemini (abbr Gem Gemi)": artie[1] = "Gemini"
##               elif artie[1] == "Georef (abbr)": artie[1] = "Georef"
##               elif artie[1] == "Gibbs free energy = gibbs function": punchout = 1
##               elif artie[1] == "Giga (abbr g)": artie[1] = "Giga"
##               elif artie[1] == "Grashof number (symbol n": artie[1] = "Grashof number"
##               elif artie[1] == "Gravitational constant (symbol g)": artie[1] = "Gravitational constant"
##               elif artie[1] == "Greenwich Civil Time (abbr GCT)": artie[1] = "Greenwich Civil Time"
##               elif artie[1] == "Greenwich hour angle (abbr gha)": artie[1] = "Greenwich hour angle"
##               elif artie[1] == "Greenwich mean time (abbr gmt)": artie[1] = "Greenwich mean time"
##               elif artie[1] == "Gru grus": punchout = 1
##               elif artie[1] == "Grus (abbr Gru Grus)": artie[1] = "Grus"
##               elif artie[1] == "GSE (abbr)": artie[1] = "GSE"
##               elif artie[1] == "G-suit or g-suit": punchout = 1
##               elif artie[1] == "Earth-rate unit (abbr eru)": artie[1] = "Earth-rate unit"
##               elif artie[1] == "EDP (abbr)": artie[1] = "EDP"
##               elif artie[1] == "EHF (abbr)": artie[1] = "EHF"
##               elif artie[1] == "Electronic missile acquisition (abbr ema)": artie[1] = "Electronic missile acquisition"
##               elif artie[1] == "Electron-volt (abbr ev)": artie[1] = "Electron-volt"
##               elif artie[1] == "ELF (abbr)": artie[1] = "ELF"
##               elif artie[1] == "Elsse (abbr)": artie[1] = "Elsse"
##               elif artie[1] == "EMA (abbr)": artie[1] = "EMA"
##               elif artie[1] == "Ephemeris (plural ephemerides)": artie[1] = "Ephemeris"
##               elif artie[1] == "Ephemeris second (abbr s)": artie[1] = "Ephemeris second"
##               elif artie[1] == "Ephemeris time (abbr e.t.)": artie[1] = "Ephemeris time"
##               elif artie[1] == "Equ equl": punchout = 1
##               elif artie[1] == "Equuleus (abbr Equ Equl)": artie[1] = "Equuleus"
##               elif artie[1] == "Eridanus (abbr Eri Erid)": artie[1] = "Eridanus"
##               elif artie[1] == "E.T (abbr)": artie[1] = "E.T"
##               elif artie[1] == "Extended range dovap (abbr extradop)": artie[1] = "Extended range dovap"
##               elif artie[1] == "Extradop (abbr)": artie[1] = "Extradop"
##               elif artie[1] == "Extremely High Frequency (abbr EHF)": artie[1] = "Extremely High Frequency"
##               elif artie[1] == "Extremely Low Frequency (abbr ELF)": artie[1] = "Extremely Low Frequency"
##               elif artie[1] == "Daughter Daughter Element": punchout = 1
##               elif artie[1] == "Dead reckoning (abbr dr)": artie[1] = "Dead reckoning"
##               elif artie[1] == "Deci (abbr d)": artie[1] = "Deci"
##               elif artie[1] == "Decibel (abbr db)": artie[1] = "Decibel"
##               elif artie[1] == "Deka (abbr da)": artie[1] = "Deka"
##               elif artie[1] == "Del dlph": punchout = 1
##               elif artie[1] == "Delphinus (abbr Del Dlph)": artie[1] = "Delphinus"
##               elif artie[1] == "DF (abbr)": artie[1] = "DF"
##               elif artie[1] == "Direction Finder (abbr DF)": artie[1] = "Direction Finder"
##               elif artie[1] == "Distance measuring equipment (abbr dme)": artie[1] = "Distance measuring equipment"
##               elif artie[1] == "DME (abbr)": artie[1] = "DME"
##               elif artie[1] == "Doppler ranging (abbr doran)": artie[1] = "Doppler ranging"
##               elif artie[1] == "Doppler velocity and position (abbr": artie[1] = "Doppler velocity and position"
##               elif artie[1] == "Doppler Velocity And Position (abbr": artie[1] = "Doppler Velocity And Position"
##               elif artie[1] == "Dorado (abbr Dor Dora)": artie[1] = "Dorado"
##               elif artie[1] == "Doran (abbr)": artie[1] = "Doran"
##               elif artie[1] == "Dovap (abbr)": artie[1] = "Dovap"
##               elif artie[1] == "DR (abbr)": artie[1] = "DR"
##               elif artie[1] == "Dra drac": punchout = 1
##               elif artie[1] == "Draco (abbr Dra Drac)": artie[1] = "Draco"
##               elif artie[1] == "Draperies (abbr D)": artie[1] = "Draperies"
##               elif artie[1] == "Dsif (abbr)": artie[1] = "Dsif"
            
              
               #table for collections and bailouts 
               if not punchout: Redirects += [(deparenth(artie[1]),artie2)]
               else: punchout = 0
             
          #print "artie2",artie2
       #print Parser(q)[2]
       #print "*****"
       else:
            #name patch list
            #now to remove bailouts that are for article suddenly on topic with 
            #exodictionary
            #if artie[1][0:4] == "Atom": print "ART:",artie[1]
            #if artie[1][0:3] == "AND": print "ART:",artie[1]
            
            if   artie[1] == "":  bailout = 1           
            # ***** ***** INSERT CODE HERE ***** ***** #       

            # Autostub3 Correction Code Write-O-Mat II -- now no longer an 
            # independant piece of code.  Everything is fine.  You will be 
            # happy about it or Dr X will be displeased and unsuccessfully try 
            # to send his roommate's girlfriend after you.
            # for q in listy:
            # print q

            ##debuggy = random.random()
            base,hyphs = Dehyphenation(artie[1])    
            base = deparenth(base)
            newname = CapIt(base)    

            ##if debuggy > 0.99:
            ##   print "base     ==", base
            ##   print "hyphs    ==", hyphs    
            ##   print "artie[1] ==", artie[1]
            ##   print "newname  ==", newname
            
            if artie[1].count("("): print "artie[1] ==", artie[1]
            if newname.count("("): print "newname ==", newname            
            if artie[1].count("="): print "artie[1] ==", artie[1]
            if newname.count("="): print "newname ==", newname            
            
            artie[1] = newname
        
            # solid caps, no hyphens
            perm1 = CapIt(base)

            # not quite solid caps, no hyphens
            perm2 = CapAll(base)

            # original caps, no hyphens
            perm3 = base
    
            # only first is capped, no hyphens
            perm4 = base.capitalize()
    
            #adding above plus with hyphens
            newentry = [newname, perm1, perm2, perm3, perm4, Rehyphenation(perm1,hyphs), 
                        Rehyphenation(perm2,hyphs), Rehyphenation(perm3,hyphs), 
                        Rehyphenation(perm4,hyphs)]

            ##if debuggy > 0.99:
            ##   print "newentry ==", newentry

    
            #print "newentry ==", newentry
            #attempting to pare out the redundants            
            renewentry = []
            county = 0
            for qp in newentry:
                #if county < 2: renewentry += [q] (or qp, as the case would be...)
                
                already = 0
                for qqp in renewentry:
                    if (qp == qqp):
                       #print "q == qq;",q,qq 
                       already = 1
                if not already: 
                   renewentry += [qp]   
                   #print "already =",already
                
                county += 1
                
            #newentry = renewentry
            ##if debuggy > 0.99:
            ##       print "renewentry ==", renewentry   
    
            #and now to create the redirects...
            if len(renewentry) > 1:
               for qp in renewentry: 
                   if qp != artie[1]:
                      Redirects += [[qp,"#REDIRECT: [["+artie[1]+"]]"]]
                      ##if debuggy > 0.99:
                      ##   print "qp (in renewentry) ==", qp
                   ##else: print qp,"==",artie[1]     
            
            # ***** ***** ***** **** ***** ***** ***** #
 
            #add to final list Acceleration Of Gravity (
            if not bailout: ProtoArticles += [artie]
            else: bailout = 0
            
            # Redirect-Creation-O-Mat -- Obsolete!  Go sit in the corner with 
            # Devourer of Worlds, Andy's submarine that flies, and the Landmark 
            # Destruct-O-Mat
            #if artie[1].count(" "):
            #   notartie = artie[1].capitalize()
            #   if artie[1] != notartie:
            #      Redirects += [[notartie,"#REDIRECT: [["+artie[1]+"]]"]]
            #   notartie = CapAll(artie[1])
            #   if artie[1] != notartie:
            #      Redirects += [[notartie,"#REDIRECT: [["+artie[1]+"]]"]]
            #      print len(Redirects),"redirects and counting..."
            #   ##print "creating: "+artie[1].capitalize()+" -- #REDIRECT: [["+artie[1]+"]]"

Rechecked = []

#post everything corrections (should have thought of this earlier)
for q in Redirects:
    bailout = 0
    if   q[0] == "Her herc": q[0] == "Herc"
    elif q[0] == "Hor horo": q[0] == "Horo"
    elif q[0] == "Hya hyda": q[0] == "Hyda"
    elif q[0] == "Hyi hydi": q[0] == "Hydi"
    elif q[0] == "Impact strength or impact energy": q[0] == "Impact Energy"
    elif q[0] == "Ind indi": q[0] == "Indi"
    elif q[0] == "J D-splay": q[0] == "J-Display"
    elif q[0] == "J d-splay": q[0] == "J-display"
    elif q[0] == "Lep leps": q[0] == "Leps"
    elif q[0] == "Lib libr": q[0] == "Libr"
    elif q[0] == "Lmi l min": q[0] == "Lmi"
    elif q[0] == "Lup lupi": q[0] == "Lupi"
    elif q[0] == "Lyn lyne": q[0] == "Lyne"
    elif q[0] == "Lyr lyra": q[0] == "Lyr"
    elif q[0] == "Men mens": q[0] == "Mens"
    elif q[0] == "Mic micr": q[0] == "Micr"
    elif q[0] == "Mon mono": q[0] == "Mon"
    elif q[0] == "Nor norm": q[0] == "Norm"
    elif q[0] == "Oct octn": q[0] == "Octn"
    elif q[0] == "Oph ophi": q[0] == "Ophi"
    elif q[0] == "Ori orio": q[0] == "Orio"
    elif q[0] == "Pav pavo": q[0] == "Pavo"
    elif q[0] == "Peg pegs": q[0] == "Pegs"
    elif q[0] == "Per pers": q[0] == "Pers"
    elif q[0] == "Phe phoe": q[0] == "Phoe"
    elif q[0] == "Pic pict": q[0] == "Pict"
    elif q[0] == "Positive G Or Positive G": q[0] == "Positive G"
    elif q[0] == "Psa psc a": q[0] == "Psa"
    elif q[0] == "Psc pisc": q[0] == "Pisc"
    elif q[0] == "Pup pupp": q[0] == "Pupp"
    elif q[0] == "Scl scul": q[0] == "Scul"
    elif q[0] == "Sco scor": q[0] == "Scor"
    elif q[0] == "Sct scut": q[0] == "Scut"
    elif q[0] == "Ser serp": q[0] == "Serp"
    elif q[0] == "Sex sext": q[0] == "Sext"
    elif q[0] == "Sge sgte": q[0] == "Sgte"
    elif q[0] == "Sgr sgtr": q[0] == "Sgtr"
    elif q[0] == "Tau taur": q[0] == "Taur"
    elif q[0] == "Tra tr au": q[0] == "Tra"
    elif q[0] == "Tri tria": q[0] == "Tria"
    elif q[0] == "Tuc tucn": q[0] == "Tucn"
    elif q[0] == "Uma u maj": q[0] == "Uma"
    elif q[0] == "Umi u min": q[0] == "Umi"
    elif q[0] == "Vel velr": q[0] == "Velr"
    elif q[0] == "Vir virg": q[0] == "Virg"
    elif q[0] == "Vol voln": q[0] == "Voln"
    elif q[0] == "Vul vulp": q[0] == "Vulp"
    elif q[0] == "Impact strength or impact energy": q[0] == "impact energy"
    elif q[0] == "Jato jato or jato": q[0] == "Jato"
    elif q[0] == "Rato rato or rato": q[0] == "Rato"


    if not bailout: Rechecked += [q] 

Redirects = Rechecked       

print len(ProtoArticles), "articles"
print len(Redirects), "redirects"
### So tell me, why are full articles mixed in with the redirect list? --fixed
##to_out = []
##for q in Redirects:     to_out += q[0]+"\n"
##execute = Stringify(to_out)
##do_w=open("G_H501_2321B.txt", 'w')
##do_w.write(execute)
##do_w.close()

##raise Exception, "Reached designated stopping point"

##qqq=0
##for q in ProtoArticles:
##    if q[1] == "Aurora": print "aurora is",qqq
##    if q[1] == "Absolute Magnitude": print "absolute magnitude is",qqq
##    qqq += 1


#                                                     #
# from PreAutostub3:                                  #
#                                                     #
# ProtoArticles (article text, article name, flags)   #
# Redirects (redirect name, redirect to this article) #
#                                                     #

def XMLproof(textish):
    outp = ""
    for q in textish:
        if   q == "<": outp += "<"
        elif q == ">": outp += ">"
        elif q == "&": outp += "&"
        elif q == '"': outp += """
        ##elif q == ":": outp += "%3A"
        else: outp += q
    return outp


def StartXML():
    out  = [['<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">\n']]
    out +=  ['    <siteinfo>\n']
    out +=  ['        <sitename>Lunarpedia</sitename>\n']
    out +=  ['    </siteinfo>\n']
    return out

def EndXML():
    out  = [['</mediawiki>\n']]
    return out

def ArtXML(title, contrib, date, text):
    '''XML markup for article in file
    title --   title of article
    contrib -- name of script (ie Autostub2)
    text --    the article
    '''
    out  = [['    <page>\n']]
    out +=  ['        <title>'+title+'</title>\n']
    out +=  ['        <revision>\n']
    out +=  ['            <timestamp>'+date+'</timestamp>']
    out +=  ['            <contributor>\n']
    out +=  ['                <username>'+contrib+'</username>\n']
    out +=  ['            </contributor>\n']
    ##out += ['            <text xml:space="preserve">'+text+'</text>']
    out +=  ['            <text xml:space="preserve">']
    ##print text
    out +=  text
    out +=  ['</text>\n']
    out +=  ['        </revision>\n']
    out +=  ['    </page>\n']
    return out

def linebrk(listish):
    newlist = []
    for q in listish:
        newlist += [q+"\n"]
    return newlist 


def dodef(thisentry): 
    """Create stub article from sequence
    """
    global TESTNO
    global NewCats
    
    #                                     
    # 0 -- body of definition article     
    # 1 -- name of article                
    # 2 -- flags                          
    #      I: missing image               
    #      R: needs references section    
    #                                     
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%%#%#%#%#%#                             
                                         #    
    C_Acoustics    = 0                   # Acoustics categorization hit count
    C_Aerospace    = 0                   # Aerospace categorization hit count
    C_Aircraft     = 0                   # Aircraft categorization hit count
    C_Astronomy    = 0                   # Astronomy categorization hit count
    C_Chemistry    = 0                   # Chemistry categorization hit count
    C_Components   = 0                   # Components categorization hit count
    C_Computing    = 0                   # Computing categorization hit count
    C_Electronics  = 0                   # Electronics categorization hit count
    C_Engineering  = 0                   # Engineering categorization hit count
    C_Hardware     = 0                   # Hardware categorization hit count
    C_Instruments  = 0                   # Instruments categorization hit count
    C_Mathematics  = 0                   # Mathematics categorization hit count
    C_Measurements = 0                   # Measurements categorization hit count
    C_Medical      = 0                   # Medical categorization hit count
    C_Navigation   = 0                   # Navigation categorization hit count
    C_Nuclear      = 0                   # Nuclear categorization hit count
    C_Optics       = 0                   # Optics categorization hit count
    C_Photography  = 0                   # Photography categorization hit count
    C_Physics      = 0                   # Physics categorization hit count
    C_Radio        = 0                   # Radio categorization hit count
    C_Rocketry     = 0                   # Rocketry categorization hit count
    C_Statistics   = 0                   # Statistics categorization hit count
    C_Time         = 0                   # Time categorization hit count                                                                 
                                         #                             
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%%#%#%#%#%#                             
    #                                     
    # start generating the article here:  
    #                                     

    to_out = []
    to_out  = ["{{Autostub}}"]    
    #to_out += ["{{Autostub}}"]
    to_out += ["{{Initial Proof Needed}}"]
    to_out += ["{{Initial Categorization Proof Needed}}"]
    ##print thisentry[0]
    ##print "*****"
    ##print thisentry[1]
    ##print "*****"
    ##print thisentry[2]

    to_out += ["'''"+thisentry[1]+"'''"]
    to_out += [thisentry[0]]
    #print "*****"
    #print to_out
    
    to_out += ["==References=="]
    to_out += ["''This article is based on NASA's [[NASA SP-7|Dictionary of Technical Terms for Aerospace Use]]''"]

    if thisentry[2].count("R"):
       to_out += ["<references/>"] 


    to_out += ["[[Category%3ADefinitions]]"]  
    to_out += ["[[Category%3ANASA SP-7]]"]
    if thisentry[2].count("I"):
       to_out += ["[[Category%3ADefinitions with Missing Images]]"]

    #print "Bob was there too."

    # assign alphabetical categorizations
    ##print thisentry
    to_out += ["[[Category%3A"+thisentry[1][0].upper()+" (all)]]"]  
    twodigit = thisentry[1][0:2].upper()
    if len(twodigit) == 2:    
       to_out += ["[[Category%3A"+twodigit+"]]"]  
       #print twodigit
       if not NewCats.count(twodigit): NewCats += [twodigit]
        
    # assigning provisional categorizations based on keystrings
    if thisentry[0].lower().count("acoustic"):
       C_Acoustics += 1
    if thisentry[0].lower().count("aero"):
       C_Aerospace += 1
       C_Aircraft += 1
    if thisentry[0].lower().count("aerospace"):
       C_Aerospace += 1
    if thisentry[0].lower().count("altitud"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Instruments += 1
       C_Measurements += 1
    if thisentry[0].lower().count("angle"):
       C_Mathematics += 1
       C_Measurements += 1
       C_Navigation += 1
    if thisentry[0].lower().count("antenna"):
       C_Components += 1
       C_Instruments += 1
       C_Radio += 1
    if thisentry[0].lower().count("aphelion"):
       C_Astronomy += 1
    if thisentry[0].lower().count("apogee"):
       C_Astronomy += 1
    if thisentry[0].lower().count("astronaut"):
       C_Aerospace += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("astronomy"):
       C_Astronomy += 1
    if thisentry[0].lower().count("atmospher"):
       C_Aerospace += 1
       C_Aircraft += 1
    if thisentry[0].lower().count("atom"):
       C_Nuclear += 1
       C_Physics += 1
    if thisentry[0].lower().count("attitude"):
       C_Mathematics += 1
       C_Navigation += 1
    if thisentry[0].lower().count("axes"):
       C_Mathematics += 1
       C_Navigation += 1
    if thisentry[0].lower().count("axis"):
       C_Mathematics += 1
       C_Navigation += 1
    if thisentry[0].lower().count("azimuth"):
       C_Mathematics += 1
       C_Navigation += 1
    if thisentry[0].lower().count("baro"):
       C_Instruments += 1
    if thisentry[0].lower().count("binary"):
       C_Computing += 1
    if thisentry[0].lower().count("bodily"):
       C_Medical += 1
    if thisentry[0].lower().count("body"):
       C_Medical += 1
    if thisentry[0].lower().count("candle"):
       C_Astronomy += 1
       C_Optics += 1
    if thisentry[0].lower().count("celestial"):
       C_Astronomy += 1
    if thisentry[0].lower().count("cells"):
       C_Medical += 1
    if thisentry[0].lower().count("chemical"):
       C_Chemistry += 1
    if thisentry[0].lower().count("chromat"):
       C_Optics += 1
       C_Photography += 1
    if thisentry[0].lower().count("circuit"):
       C_Electronics += 1
    if thisentry[0].lower().count("coeff"):
       C_Mathematics += 1
       C_Statistics += 1
    if thisentry[0].lower().count("compression"):
       C_Acoustics += 1
       C_Mathematics += 1
    if thisentry[0].lower().count("computer"):
       C_Computing += 1
    if thisentry[0].lower().count("construct"):
       C_Engineering += 1
    if thisentry[0].lower().count("convuls"):
       C_Medical += 1
    if thisentry[0].lower().count("cooling"):
       C_Components += 1
    if thisentry[0].lower().count("cosmic"):
       C_Astronomy += 1
    if thisentry[0].lower().count("deci"):
       C_Measurements += 1
    if thisentry[0].lower().count("dental"):
       C_Medical += 1
    if thisentry[0].lower().count("dimen"):
       C_Mathematics += 1
       C_Physics += 1
    if thisentry[0].lower().count("displ"):
       C_Components += 1
       C_Electronics += 1
       C_Instruments += 1
    if thisentry[0].lower().count("duct"):
       C_Aircraft += 1
       C_Components += 1
       C_Engineering += 1
       C_Hardware += 1
    if thisentry[0].lower().count("ear"):
       C_Medical += 1
    if thisentry[0].lower().count("east"):
       C_Measurements += 1
       C_Navigation += 1
    if thisentry[0].lower().count("eclipse"):
       C_Astronomy += 1
    if thisentry[0].lower().count("electr"):
       C_Electronics += 1
    if thisentry[0].lower().count("elevation"):
       C_Measurements += 1
       C_Navigation += 1
    if thisentry[0].lower().count("ellip"):
       C_Mathematics += 1
       C_Navigation += 1
    if thisentry[0].lower().count("emit"):
       C_Physics += 1
       C_Radio += 1
    if thisentry[0].lower().count("emuls"):
       C_Photography += 1
    if thisentry[0].lower().count("energy"):
       C_Chemistry += 1
       C_Nuclear += 1
       C_Physics += 1
    if thisentry[0].lower().count("engine"):
       C_Aircraft += 1
       C_Engineering += 1
       C_Hardware += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("equat"):
       C_Mathematics += 1
    if thisentry[0].lower().count("flight"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("floating"):
       C_Computing += 1
       C_Mathematics += 1
    if thisentry[0].lower().count("fluid"):
       C_Chemistry += 1
       C_Engineering += 1
       C_Photography += 1
    if thisentry[0].lower().count("frequenc"):
       C_Acoustics += 1
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("galactic"):
       C_Astronomy += 1
    if thisentry[0].lower().count("galax"):
       C_Astronomy += 1
    if thisentry[0].lower().count("gas"):
       C_Acoustics += 1
       C_Astronomy += 1
       C_Chemistry += 1
       C_Physics += 1
    if thisentry[0].lower().count("gate"):
       C_Computing += 1
       C_Electronics += 1
    if thisentry[0].lower().count("great circ"):
       C_Mathematics += 1
       C_Navigation += 1
    if thisentry[0].lower().count("guidance"):
       C_Instruments += 1
       C_Navigation += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("gyro"):
       C_Components += 1
       C_Instruments += 1
       C_Navigation += 1
    if thisentry[0].lower().count("heat"):
       C_Nuclear += 1
       C_Physics += 1
    if thisentry[0].lower().count("helio"):
       C_Astronomy += 1
    if thisentry[0].lower().count("human"):
       C_Medical += 1
    if thisentry[0].lower().count("indicator"):
       C_Instruments += 1
    if thisentry[0].lower().count("inertia"):
       C_Navigation += 1
       C_Physics += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("instrument"):
       C_Instruments += 1
    if thisentry[0].lower().count("lagrang"):
       C_Astronomy += 1
    if thisentry[0].lower().count("launching"):
       C_Aerospace += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("leg"):
       C_Medical += 1
    if thisentry[0].lower().count("limb"):
       C_Medical += 1
    if thisentry[0].lower().count("liquid"):
       C_Photography += 1
       C_Physics += 1
    if thisentry[0].lower().count("logic"):
       C_Computing += 1
       C_Electronics += 1
       C_Mathematics += 1
    if thisentry[0].lower().count("magnet"):
       C_Electronics += 1
       C_Instruments += 1
       C_Physics += 1
    if thisentry[0].lower().count("malfunction"):
       C_Engineering += 1
    if thisentry[0].lower().count("maneuv"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Navigation += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("measure"):
       C_Mathematics += 1
       C_Measurements += 1
    if thisentry[0].lower().count("medic"):
       C_Medical += 1
    if thisentry[0].lower().count("memory"):
       C_Computing += 1
    if thisentry[0].lower().count("meter"):
       C_Measurements += 1
    if thisentry[0].lower().count("mile"):
       C_Measurements += 1
    if thisentry[0].lower().count("modulation"):
       C_Acoustics += 1
       C_Electronics += 1
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("molar"):
       C_Chemistry += 1
    if thisentry[0].lower().count("mole"):
       C_Chemistry += 1
       C_Physics += 1
    if thisentry[0].lower().count("moon"):
       C_Astronomy += 1
    if thisentry[0].lower().count("motion picture"):
       C_Photography += 1
    if thisentry[0].lower().count("muscle"):
       C_Mathematics += 1
    if thisentry[0].lower().count("navigat"):
       C_Navigation += 1
    if thisentry[0].lower().count("neutron"):
       C_Physics += 1
    if thisentry[0].lower().count("newton"):
       C_Measurements += 1
       C_Physics += 1
    if thisentry[0].lower().count("north"):
       C_Measurements += 1
       C_Navigation += 1
    if thisentry[0].lower().count("notation"):
       C_Mathematics += 1
       C_Physics += 1
    if thisentry[0].lower().count("nozz"):
       C_Components += 1
       C_Engineering += 1
    if thisentry[0].lower().count("nuclear"):
       C_Nuclear += 1
    if thisentry[0].lower().count("number"):
       C_Mathematics += 1
       C_Statistics += 1
    if thisentry[0].lower().count("occultation"):
       C_Astronomy += 1
    if thisentry[0].lower().count("ometer"):
       C_Instruments += 1
    if thisentry[0].lower().count("orbit"):
       C_Aerospace += 1
       C_Astronomy += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("oscill"):
       C_Acoustics += 1
       C_Instruments += 1
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("osphere"):
       C_Aerospace += 1
    if thisentry[0].lower().count("particle"):
       C_Physics += 1
    if thisentry[0].lower().count("perhelion"):
       C_Astronomy += 1
    if thisentry[0].lower().count("perigee"):
       C_Astronomy += 1
    if thisentry[0].lower().count("photograph"):
       C_Photography += 1
    if thisentry[0].lower().count("physicist"):
       C_Physics += 1
    if thisentry[0].lower().count("physiolog"):
       C_Medical += 1
    if thisentry[0].lower().count("pitch"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Instruments += 1
       C_Navigation += 1
    if thisentry[0].lower().count("planet"):
       C_Astronomy += 1
    if thisentry[0].lower().count("pressure"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Engineering += 1
    if thisentry[0].lower().count("prob"):
       C_Mathematics += 1
       C_Statistics += 1
    if thisentry[0].lower().count("processing"):
       C_Computing += 1
    if thisentry[0].lower().count("proton"):
       C_Physics += 1
    if thisentry[0].lower().count("radar"):
       C_Electronics += 1
       C_Instruments += 1
    if thisentry[0].lower().count("radio "):
       C_Radio += 1
    if thisentry[0].lower().count("radioactive"):
       C_Nuclear += 1
    if thisentry[0].lower().count("ray"):
       C_Astronomy += 1
       C_Mathematics += 1
    if thisentry[0].lower().count("reactions"):
       C_Chemistry += 1
       C_Nuclear += 1
    if thisentry[0].lower().count("reactor"):
       C_Nuclear += 1
    if thisentry[0].lower().count("refract"):
       C_Optics += 1
    if thisentry[0].lower().count("reliability"):
       C_Engineering += 1
    if thisentry[0].lower().count("roll"):
       C_Instruments += 1
       C_Navigation += 1
    if thisentry[0].lower().count("rotation"):
       C_Astronomy += 1
       C_Instruments += 1
       C_Navigation += 1
    if thisentry[0].lower().count("scope"):
       C_Instruments += 1
    if thisentry[0].lower().count("sea level"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Instruments += 1
    if thisentry[0].lower().count("signal"):
       C_Instruments += 1
       C_Radio += 1
    if thisentry[0].lower().count("solar"):
       C_Astronomy += 1
    if thisentry[0].lower().count("solid"):
       C_Physics += 1
    if thisentry[0].lower().count("sonic"):
       C_Acoustics += 1
       C_Aircraft += 1
    if thisentry[0].lower().count("sound"):
       C_Acoustics += 1
    if thisentry[0].lower().count("south"):
       C_Navigation += 1
    if thisentry[0].lower().count("spacecraft"):
       C_Aerospace += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("spectrum"):
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("stability"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Instruments += 1
       C_Navigation += 1
    if thisentry[0].lower().count("star"):
       C_Astronomy += 1
    if thisentry[0].lower().count("statistic"):
       C_Statistics += 1
    if thisentry[0].lower().count("structural"):
       C_Engineering += 1
    if thisentry[0].lower().count("takeoff"):
       C_Aircraft += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("teeth"):
       C_Medical += 1
    if thisentry[0].lower().count("terrestrial"):
       C_Navigation += 1
       C_Optics += 1
    if thisentry[0].lower().count("testing"):
       C_Engineering += 1
    if thisentry[0].lower().count("thermodynamic"):
       C_Physics += 1
    if thisentry[0].lower().count("time"):
       C_Time += 1
    if thisentry[0].lower().count("tooth"):
       C_Medical += 1
    if thisentry[0].lower().count("tracking"):
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("trajectory"):
       C_Aerospace += 1
       C_Aircraft += 1
       C_Mathematics += 1
       C_Navigation += 1
       C_Physics += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("transmis"):
       C_Radio += 1
    if thisentry[0].lower().count("transmit"):
       C_Radio += 1
    if thisentry[0].lower().count("transponder"):
       C_Components += 1
       C_Radio += 1
    if thisentry[0].lower().count("tron"):
       C_Electronics += 1
       C_Nuclear += 1
       C_Physics += 1
    if thisentry[0].lower().count("unit of"):
       C_Measurements += 1
    if thisentry[0].lower().count("vacuum"):
       C_Aerospace += 1
       C_Physics += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("value"):
       C_Mathematics += 1
       C_Measurements += 1
    if thisentry[0].lower().count("vector"):
       C_Mathematics += 1
       C_Navigation += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("vehicle"):
       C_Aerospace += 1
       C_Rocketry += 1
    if thisentry[0].lower().count("velocity"):
       C_Navigation += 1
       C_Physics += 1
    if thisentry[0].lower().count("visco"):
       C_Physics += 1
    if thisentry[0].lower().count("wave"):
       C_Acoustics += 1
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("wavelength"):
       C_Optics += 1
       C_Radio += 1
    if thisentry[0].lower().count("west"):
       C_Navigation += 1
    if thisentry[0].lower().count("yaw"):
       C_Instruments += 1
       C_Navigation += 1
    if thisentry[0].lower().count("year"):
       C_Time += 1

    # inserting assigned provisional categorizations and noting times it was set off
    if C_Acoustics:    to_out += ["[[Category%3AAcoustics]]    <!-- "+str(C_Acoustics)+" -->"]
    if C_Aerospace:    to_out += ["[[Category%3AAerospace]]    <!-- "+str(C_Aerospace)+" -->"]
    if C_Aircraft:     to_out += ["[[Category%3AAircraft]]     <!-- "+str(C_Aircraft)+" -->"]
    if C_Astronomy:    to_out += ["[[Category%3AAstronomy]]    <!-- "+str(C_Astronomy)+" -->"]
    if C_Chemistry:    to_out += ["[[Category%3AChemistry]]    <!-- "+str(C_Chemistry)+" -->"]
    if C_Components:   to_out += ["[[Category%3AComponents]]   <!-- "+str(C_Components)+" -->"]
    if C_Computing:    to_out += ["[[Category%3AComputing]]    <!-- "+str(C_Computing)+" -->"]
    if C_Electronics:  to_out += ["[[Category%3AElectronics]]  <!-- "+str(C_Electronics)+" -->"]
    if C_Engineering:  to_out += ["[[Category%3AEngineering]]  <!-- "+str(C_Engineering)+" -->"]
    if C_Hardware:     to_out += ["[[Category%3AHardware]]     <!-- "+str(C_Hardware)+" -->"]
    if C_Instruments:  to_out += ["[[Category%3AInstruments]]  <!-- "+str(C_Instruments)+" -->"]
    if C_Mathematics:  to_out += ["[[Category%3AMathematics]]  <!-- "+str(C_Mathematics)+" -->"]
    if C_Measurements: to_out += ["[[Category%3AMeasurements]] <!-- "+str(C_Measurements)+" -->"]
    if C_Medical:      to_out += ["[[Category%3AMedical]]      <!-- "+str(C_Medical)+" -->"]
    if C_Navigation:   to_out += ["[[Category%3ANavigation]]   <!-- "+str(C_Navigation)+" -->"]
    if C_Nuclear:      to_out += ["[[Category%3ANuclear]]      <!-- "+str(C_Nuclear)+" -->"]
    if C_Optics:       to_out += ["[[Category%3AOptics]]       <!-- "+str(C_Optics)+" -->"]
    if C_Photography:  to_out += ["[[Category%3APhotography]]  <!-- "+str(C_Photography)+" -->"]
    if C_Physics:      to_out += ["[[Category%3APhysics]]      <!-- "+str(C_Physics)+" -->"]
    if C_Radio:        to_out += ["[[Category%3ARadio]]        <!-- "+str(C_Radio)+" -->"]
    if C_Rocketry:     to_out += ["[[Category%3ARocketry]]     <!-- "+str(C_Rocketry)+" -->"]
    if C_Statistics:   to_out += ["[[Category%3AStatistics]]   <!-- "+str(C_Statistics)+" -->"]
    if C_Time:         to_out += ["[[Category%3ATime]]         <!-- "+str(C_Time)+" -->"]    

    if (C_Acoustics + C_Aerospace + C_Aircraft + C_Astronomy + C_Chemistry + 
        C_Components + C_Computing + C_Electronics + C_Engineering + 
        C_Hardware + C_Instruments + C_Mathematics + C_Measurements + 
        C_Medical + C_Navigation + C_Nuclear + C_Optics + C_Photography + 
        C_Physics + C_Radio + C_Rocketry + C_Statistics + C_Time) < 1:
       to_out += ["[[Category%3AMiscellaneous]] <!-- Autostub3 unable to provisionally categorize -->"] 


    ##to_out += [""]
    ##to_out += ["<!-- Generated by a gamma candidate version of Autostub3 (Test "+TESTNO+") -->"]

    to_out = linebrk(to_out)

    return to_out


def doredir(thisentry): 
    """Create redirect from two item sequence
    """
    #                                     
    # 0 -- name of redirect               
    # 1 -- redirect to this article       
    #                                     
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#                             
                                          #                             
                                          #                             
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#                             
    #                                     
    # start generating the redirect here: 
    #                                     

    if thisentry[1][0:9] == "#REDIRECT": return thisentry[1]
    else: return ["#REDIRECT: [["+thisentry[1]+"]]"]

def TLcat_o_mat(cats):
    """Create category pages for two letter indices"""
    #                                     
    # 0 -- name of category               
    # 1 -- text and subcategorization     
    #                                     
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#                             
                                          #                             
                                          #                             
    #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#                             
    #                                     
    # start generating the redirect here: 
    #                                     
    all_out = []
    #print cats
    for q in cats:
        #print "Bob was there, too."
        all_out += [["Category:"+q,"This category includes definitions starting with the letters "+
                    q+". \n\n\n [[Category:"+q[0]+"]]"]]
    #print all_out
    return all_out

def doare(tup):
    proofme = doredir(tup)
    proofed = []
    for q in proofme: 
        proofed += XMLproof(q)
    return ArtXML(tup[0], "Autostub3", "2007-05-02T00:00:00Z", proofed)    

def doanart(tup):
    proofme = dodef(tup)
    proofed = []
    for q in proofme: 
        proofed += XMLproof(q)
    return ArtXML(tup[1], "Autostub3", "2007-05-01T00:00:00Z", proofed)    

def doacat(tup):
    #proofme = dodef(tup)
    proofed = []
    for q in tup[1]: 
        proofed += XMLproof(q)
    return ArtXML(tup[0], "Autostub3", "2007-05-02T00:00:00Z", proofed)    

def file_o_mat(glubby, namebase):
    #namebase  = "/home/Luna/autostub3_A"  # base name
    glubglub   = []  # pieces parts in one big part
    maxout     = 64  # about how many to each xml file
    tomax      = 0   # how many so far
    glublet    = []  # the miniglub presently being assembled
    execute    = []  # to be fused.  And parts is parts.
    nowserving = 1   # which chunk is being saved.  If you wait for it to get cold it's not my problem.

    for q in glubby:
        if tomax < maxout:
           glublet += [q]
           tomax += 1
        else:
             glubglub += [glublet]
             glublet = [q]
             tomax = 1
            
    glubglub += [glublet]
    
    for miniglub in glubglub:
        print str(miniglub[0][1])+" -- "+str(miniglub[-1][1])
        execute = StartXML()
        execute += miniglub
        execute += EndXML()
        execute = Stringify(execute)
        do_xml=open(namebase+"_"+str(nowserving)+".xml", 'w')
        #print "creating "+namebase+"_"+str(nowserving)+".xml"
        do_xml.write(execute)
        do_xml.close()
        nowserving += 1
        #print execute
        #raise Exception, "stopping here"




glub = [] # stick them all in here

# uncomment for actual run
for q in ProtoArticles:
    glub += [doanart(q)]
      
for q in Redirects:
    glub += [doare(q)]

DoCats = TLcat_o_mat(NewCats)

for q in DoCats:
    glub += [doacat(q)]    

##print "ready to run file-o-mat()"

file_o_mat(glub,"/home/Luna/autostub3_H503")    

#uncomment to test
##import random
##for q in ProtoArticles:
##    if (random.random() > 0.00): glub += [doanart(q)]
##      
##for q in Redirects:
##    if (random.random() > 0.92): glub += [doare(q)]
##
##DoCats = TLcat_o_mat(NewCats)
##
##for q in DoCats:
##    if (random.random() > 0.5): glub += [doacat(q)]    
##    
##print glub    
##    
##file_o_mat(glub,"/home/Luna/autostub3_A2_test2")    

#testbench activities
##print NewCats
##print DoCats
##print Stringify(glub)

to_out  = []
##to_out += "==Articles==\n"
##for q in ProtoArticles: to_out += "[["+q[1]+"]]<BR/>\n"
##for q in ProtoArticles: to_out += q[1]+"\n"
##to_out += "==Some of the Redirects==\n"
##for q in Redirects:     to_out += "[["+q[0]+"]]<BR/>\n"
##for q in Redirects:     to_out += q[0]+"\n"
##execute = Stringify(to_out)
##do_w=open("G_H501_0832.txt", 'w')
##do_w.write(execute)
##do_w.close()