ExoDictionary:Autostub3/Source
From ExoDictionary
Version of Autostub that ran H through Z
#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%# # # # Autostub 3 # # # # Definition Autostub Generator # # # # PUBLIC DOMAIN # # # #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%# TESTNO = "Fix1" #? lost count here ; should now be superceeded NewCats = [] import re # an end to a long run of using only built-in functions ##import random # only for debugging attempt #Parser def Parser(List): #Setup variables and buffers ListPosition = 0 # position in List; forgot why I needed this TagID = "" # buffer for the current tag LinkID = "" # buffer for the link name Name = "" # this is hopefully the correct name for the article Output = "" # output text mode = "Text" # what mode we're in Flag = "" # Is this article flagged for something? Named = 0 # Is this already named? Defno = 1 # Which definition? #Loop for q in List: # Text Mode -- dumps text directly to output buffer until put into # another mode; ends with a '<' and switches to tag mode if mode == "Text": if q == "<": mode = "Tag" TagID = "" else: Output += q # Tag mode -- stores text into TagID buffer; ends with a '>', # identifies the ID buffer and switches to the appropriate mode or # outputs the appropriate text and returns to Text mode. # 'A' -- enter A mode # 'STRONG' -- enter Strong mode # 'I' or '/I' -- output "''" # 'B' or '/B' -- output "'''" # 'MATH' or '/MATH' -- output '<math>' or '</math>' # 'SUP' or '/SUP' -- output '<sup>' or '</sup>' # 'SUB' or '/SUB' -- output '<sub>' or '</sub>' # 'DD' -- output '<BR/><BR/>' # 'BR' or 'BR/' -- output '<BR/>' # 'CITE' or '/CITE' -- output 'CITE' or '/CITE' # 'IMG' -- set missing image tag; return name of missing image # P -- replace with nothing # unknown: return verbatim in angle brackets and switch to Text mode #Reworked with regular expressions to address case inconsistencies in #source hmtl after the letter C elif mode == "Tag": if q == ">": # "A" if ReadTag(TagID) == "A": mode = "A" #"STRONG" or "B" (but will this hack work?) elif ((ReadTag(TagID) == "STRONG") or (ReadTag(TagID) == "B")): if not Named: mode = "Strong" else: mode = "Text" # see if I get away with this... ##mode = "End" # previous solution here Output += "'''" #"I" elif ReadTag(TagID) == "I": Output += "''" mode = "Text" ###"B" -- merging with STRONG mode to try to parse 'H' definitions ##elif ReadTag(TagID) == "B": ## Output += "'''" ## mode = "Text" #"/I" elif ReadTag(TagID) == "/I": Output += "''" mode = "Text" #"/B" elif ReadTag(TagID) == "/B": Output += "'''" mode = "Text" #"CITE" elif ReadTag(TagID) == "CITE": Output += "<ref>" mode = "Text" Flag += "R" #"/CITE" elif ReadTag(TagID) == "/CITE": Output += "</ref>" mode = "Text" #"/A" elif ReadTag(TagID) == "/A": mode = "Text" #"P" elif ReadTag(TagID) == "P": Output += "<BR/>" mode = "Text" #"MATH" elif ReadTag(TagID) == "MATH": Output += "<"+TagID.lower()+">" mode = "Text" #"/MATH" elif ReadTag(TagID) == "/MATH": Output += "</math>" mode = "Text" #"DD" elif ReadTag(TagID) == "DD": Output += "<BR/>" # '''"+str(Defno)+".'''" Defno += 1 mode = "Text" #"IMG" elif ReadTag(TagID) == "IMG": Output += "'''Missing Image:"+TagID+"'''" mode = "Text" Flag += "I" #? else: Output += "<"+TagID+">" mode = "Text" else: TagID += q # A mode -- stores text into LinkID buffer; ends with a '<', outputs # a Wikilink version of the text accumilated in its buffer. elif mode == "A": if q == "<": if LinkID != "": Output += "[["+CapIt(LinkID)+"|"+LinkID+"]]" mode = "End" LinkID = "" else: LinkID += q # Strong mode -- stores text into Name buffer; ends with a '<' and # capitalizes elif mode == "Strong": if q == "<": Name = CapIt(Name) mode = "End" Named = 1 else: Name += q # Weak mode -- when the boson is hit by a neutrino it changes the # flavor of the nearest... Oh. Sorry, wrong model. # End mode -- ignores all text until the ending '>' is recieved; # switches to Text mode. Nested tags are not supported and it is # assumed that any new tag is the correct end tag. elif mode == "End": if q == ">": mode = "Text" # Um, what mode are we in again? else: raise TypeError, "Parser internal error: WTF is "+mode+" mode??" # Incriment ListPosition # Q: Do I still need this or did I eliminate its usefulness? ListPosition += 1 return [Output, Name, Flag] def Stringify(listish): stringish = "" for q in listish: if type(q) == type('str'): stringish += q #print "str" elif type(q) == type([]): stringish += Stringify(q) #print "list" else: print type(q) print q raise TypeError, "non-string non-list!!!" return stringish def KingMe(stringy): '''capitalizer that doesn't uncapitalize''' return stringy[0].upper()+stringy[1:] def KingMe2(stringy): '''capitalizer that doesn't uncapitalize, but doesn't capitalise some things''' if stringy.lower() == "to": return stringy elif stringy.lower() == "from": return stringy elif stringy.lower() == "and": return stringy elif stringy.lower() == "of": return stringy elif stringy.lower() == "for": return stringy elif stringy.lower() == "with": return stringy elif stringy.lower() == "in": return stringy elif stringy.lower() == "on": return stringy elif stringy.lower() == "by": return stringy elif stringy.lower() == "at": return stringy elif stringy.lower() == "auf": return stringy elif stringy.lower() == "about": return stringy elif stringy.lower() == "until": return stringy elif stringy.lower() == "into": return stringy else: return stringy[0].upper()+stringy[1:] def ReadTag(rawtext): # should return the leftmost part of the string in SCREAMING CAPITALS. return rawtext.split(' ',1)[0].upper().strip('\\. ') def CapIt(Name): # Needs to consistently return a Capitalized Form of whatever name is # plugged into it Name = Name.split() CapName = "" Terms = 0 for q in Name: if Terms == 0: CapName = KingMe2(q.strip(',.')) Terms = 1 else: CapName = CapName + " " + KingMe2(q.strip(',.')) return CapName def CapAll(Name): # Needs to consistently return a Capitalized Form of whatever name is # plugged into it -- calls the original KingMe() Name = Name.split() CapName = "" Terms = 0 for q in Name: if Terms == 0: CapName = KingMe(q.strip(',.')) Terms = 1 else: CapName = CapName + " " + KingMe(q.strip(',.')) return CapName def FindRedir(gunk): link = [] foundlink = 0 buffer = "" mode = "ignore" Count = 0 # was added only for debugging Modemap = "" # also added only for debugging for q in gunk: if mode == "ignore": Modemap += "i" if q == "[": mode = "maybe" elif mode == "maybe": Modemap += "m" if q == "[": mode = "link" else: mode = "ignore" #; print "FAILED LINK" elif mode == "link": Modemap += "L" if ((q == "|") or (q == "]")): foundlink += 1 mode = "ignore" link += [buffer] buffer = "" # print "found link at "+str(Count) else: buffer += q Count += 1 #print Modemap #print "foundlink ==", foundlink, link if foundlink: if len (link) > 1: # print link themax = 0 # length of longest theout = None # which is longest for q in link: # print len(q), themax if len(q) > themax: themax=len(q) theout = q # print theout return theout else: return link[0] else: return None # Leading-Space-Remove-O-Mat, embedded version def deforespace(filish): ##nameit = raw_input("filename: ") ## ##tsv=open(nameit) ##Raw = tsv.readlines() ##tsv.close() ## filed = [] ## for q in filish: filed += [q.lstrip()] ## ##qqq=open(nameit+"_", 'w') ##for q in qq: ## qqq.write(q) ##qqq.close() return Stringify(filed) #Taken from Hosta... Welcome guest, ACCWOM2 # three functions to help take klutz behind keyb out of picture better def Listify(stringy): listy = [] for q in stringy: listy += q return listy #print Listify("Peter Piper Picked a Peck of Prickly Peppercorns") def Dehyphenation(stringy): listy = Listify(stringy) # convert to mutable type newish = [] # anyone got any gator hypen ade? qq = 0 # where are we? foundat = [] # where was it? for q in listy: if q == "-": foundat += [qq] # it was here newish += " " # it's getting dehyphenated else: newish += q qq += 1 # return to presumed string form and show where # the hyphens used to be return (Stringify(newish).strip(),foundat) def Rehyphenation(stringy,findat): listy = Listify(stringy) # convert to mutable type newish = [] # ahhhh... much better. qq = 0 # where are we? for q in listy: done = 0 # are we there yet? for qqq in findat: # print qq,qqq, (not done) if ((qq == qqq) and (not done)): # print "hit" newish += "-" done = 1 if (not done): newish += q qq += 1 # return to presumed string form with presumably added hyphens return (Stringify(newish)) # okay, four def deparenth(stringy): """removes parentheticals and now with disequality!""" #"""strips out everything ith and after the first sign of parentheses""" #return stringy.split("(")[0].strip() # strip out everything after the first sign of parentheses stringy = stringy.split("(")[0].strip() # if that's it, send it along if not(stringy.count("=")): return stringy # if it starts with an equal sign, lose it if stringy[0] == "=": stringy=stringy[1:].strip() # strip out everything after the first sign of equality stringy = stringy.split("=")[0].strip() # We brighten up your picture and we send you on your way # So don't forget to turn to us, MTV, today! return stringy #Get raw html #RawML = raw_input() ##RawML = "" ##tsv=open('/home/Luna/Raw_SP-7_A.txt') ##RawML += tsv.read() ##tsv.close() ##tsv=open('/home/Luna/Raw_SP-7_B.txt_') ##RawML += tsv.read() ##tsv.close() ##tsv=open('/home/Luna/Raw_SP-7_C.txt_') ##RawML += tsv.read() ##tsv.close() ## ##tsv=open('/home/Luna/SP7-d.html_') ##RawML += tsv.read() ##tsv.close() ## ##tsv=open('/home/Luna/SP7-e.html_') ##RawML += tsv.read() ##tsv.close() ## ##tsv=open('/home/Luna/SP7-f.html_') ##RawML += tsv.read() ##tsv.close() ## ##tsv=open('/home/Luna/SP7-g.html_') ##RawML += tsv.read() ##tsv.close() RawML = [] tsv=open('/home/Luna/SP7_h.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_i.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_j.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_k.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_l.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_m.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_n.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_o.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_p.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_q.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_r.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_s.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_t.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_u.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_v.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_w.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_x.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_y.html') RawML += tsv.readlines() tsv.close() tsv=open('/home/Luna/SP7_z.html') RawML += tsv.readlines() tsv.close() RawML = deforespace(RawML) ##raise Exception, "stopping here" # Parse Page into slices using <DT> tags #RawList = RawML.split('<DT>') # regular expression module gets used here RawList = re.split("<[Dd][Tt]>",RawML) # print "Rawlist:", RawList #Create ProtoArticles and Redirects and stuff them full of proto-articles ProtoArticles = [] Redirects = [] bailout = 0 punchout = 0 for q in RawList: #print "RawList Process loop: q ==",q if q != "": artie = Parser(q) #print "KILROY IS HERE" #constellation definions now on topic #if artie[0].count("constellation"): # print "constellation check removing entry:",artie[1] if (artie[0].count('=',0,5) or artie[0].upper().count('SEE',0,20)): #print "REDIR:",artie artie2 = FindRedir(artie[0]) #print artie2 # avoid_com_backpatch_offset_too_large = 0... wait # I can probably fix this by making the code less efficient via # turning a few elifs into ifs and do a lot of useless checking. # # For the first time ever, I wish Python had GOTO. # # No go. Embedding ACCWOM2 a little further down... if artie2: if artie[1] == "": punchout = 1 #if artie[1] == "ADF (abbr)": artie[1] = "ADF" #elif artie[1] == "ADP (abbr)": artie[1] = "ADP" #elif artie[1] == "AFC (abbr)": artie[1] = "AFC" #elif artie[1] == "Andromeda (abbr And Andr)": artie[1] = "Andromeda" #elif artie[1] == "Antlia (abbr Ant Antl.)": artie[1] = "Antlia" #elif artie[1] == "APU (abbr)": artie[1] = "APU" #elif artie[1] == "AU (abbr)": artie[1] = "AU" #elif artie[1] == "Atomic Mass Unit (abbr Amu)": artie[1] = "Atomic Mass Unit"; #elif artie[1] == "Atomic Weight Unit (abbr Awu)": artie[1] = "Atomic Weight Unit" #elif artie[1] == "Anti-g Suit": artie[1] = "Anti G Suit" #elif artie[1] == "": artie[1] = "" #elif artie[1] == "": artie[1] = "" #elif artie[1] == "": punchout = 1 ## elif artie[1] == "Cae cael": punchout = 1 ## elif artie[1] == "Caelum (abbr Cae Cael)": artie[1] = "Caelum" ## elif artie[1] == "Cam caml": punchout = 1 ## elif artie[1] == "Camelopardus (abbr Cam Caml)": artie[1] = "Camelopardus" ## elif artie[1] == "Cancer (abbr Cnc Canc)": artie[1] = "Cancer" ## elif artie[1] == "Canes Venatici (abbr Cvn C Ven)": artie[1] = "Canes Venatici" ## elif artie[1] == "Canis Major (abbr CMa C Maj)": artie[1] = "Canis Major" ## elif artie[1] == "Canis Minor (abbr CMi C Min)": artie[1] = "Canis Minor" ## elif artie[1] == "Cap capr": punchout = 1 ## elif artie[1] == "Capricornus (abbr Cap Capr)": artie[1] = "Capricornus" ## elif artie[1] == "Car cari": punchout = 1 ## elif artie[1] == "Cas cass": punchout = 1 ## elif artie[1] == "Cassiopeia (abbr Cas Cass)": artie[1] = "Cassiopeia" ## elif artie[1] == "Cen cent": punchout = 1 ## elif artie[1] == "Centaurus (abbr Cen Cent)": artie[1] = "Centaurus" ## elif artie[1] == "Centi (abbr c)": artie[1] = "Centi" ## elif artie[1] == "CEP (abbr)": artie[1] = "CEP" ## elif artie[1] == "Cep ceph": punchout = 1 ## elif artie[1] == "Cepheus (abbr Cep Ceph)": artie[1] = "Cepheus" ## elif artie[1] == "Cetus (abbr Cet Ceti)": artie[1] = "Cetus" ## elif artie[1] == "Chamaeleon (abbr Cha Cham)": artie[1] = "Chamaeleon" ## elif artie[1] == "Cir circ": punchout = 1 ## elif artie[1] == "Circinus (abbr Cir Circ)": artie[1] = "Circinus" ## elif artie[1] == "Circle of equal probability (abbr cep)": artie[1] = "Circle of equal probability" ## elif artie[1] == "Circle Of Equal Probability (abbr CEP)": artie[1] = "Circle Of Equal Probability" ## elif artie[1] == "Cma c maj": punchout = 1 ## elif artie[1] == "Cmi c min": punchout = 1 ## elif artie[1] == "Cnc canc": punchout = 1 ## elif artie[1] == "Coho (abbr)": artie[1] = "Coho" ## elif artie[1] == "Col colm": punchout = 1 ## elif artie[1] == "Columba (abbr Col Colm)": artie[1] = "Columba" ## elif artie[1] == "Coma Berenices (abbr Com Coma)": artie[1] = "Coma Berenices" ## elif artie[1] == "Cor a": punchout = 1 ## elif artie[1] == "Cor b": punchout = 1 ## elif artie[1] == "Corona Borealis (abbr CrB Cor B)": artie[1] = "Corona Borealis" ## elif artie[1] == "Correlation tracking and ranging (abbr cotar)": artie[1] = "Correlation tracking and ranging" ## elif artie[1] == "Correlation Tracking And Ranging (abbr Cotar)": artie[1] = "Correlation Tracking And Ranging" ## elif artie[1] == "Correlation tracking and triangulation (abbr cotat)": artie[1] = "Correlation tracking and triangulation" ## elif artie[1] == "Correlation Tracking And Triangulation (abbr Cotat)": artie[1] = "Correlation Tracking And Triangulation" ## elif artie[1] == "Corv": punchout = 1 ## elif artie[1] == "Corvus (abbr Crv Corv)": artie[1] = "Corvus" ## elif artie[1] == "Cotar (abbr)": artie[1] = "Cotar" ## elif artie[1] == "Cotat (abbr)": artie[1] = "Cotat" ## elif artie[1] == "Cra cor a": punchout = 1 ## elif artie[1] == "Crater (abbr Crt Crat)": artie[1] = "Crater" ## elif artie[1] == "Craterlets": punchout = 1 ## elif artie[1] == "Crb cor b": punchout = 1 ## elif artie[1] == "Crt crat": punchout = 1 ## elif artie[1] == "Crux (abbr Cru Cruc)": artie[1] = "Crux" ## elif artie[1] == "Crt (abbr)": artie[1] = "Crt" ## elif artie[1] == "Crv corv": punchout = 1 ## elif artie[1] == "Cutoff or cut-off": punchout = 1 ## elif artie[1] == "Cvn c ven": punchout = 1 ## elif artie[1] == "Cyg cygn": punchout = 1 ## elif artie[1] == "Cygnus (abbr Cyg Cygn)": artie[1] = "Cygnus" ## elif artie[1] == "Fahrenheit temperature scale (abbr f)": artie[1] = "Fahrenheit temperature scale" ## elif artie[1] == "Farad (abbr f)": artie[1] = "Farad" ## elif artie[1] == "Femto (abbr f)": artie[1] = "Femto" ## elif artie[1] == "Fermi (abbr f)": artie[1] = "Fermi" ## elif artie[1] == "FM (abbr)": artie[1] = "FM" ## elif artie[1] == "Fm/am (abbr)": artie[1] = "Fm/am" ## elif artie[1] == "Fm/fm (abbr)": artie[1] = "Fm/fm" ## elif artie[1] == "Fm/pm (abbr)": artie[1] = "Fm/pm" ## elif artie[1] == "Focus (plural focuses)": artie[1] = "Focus" ## elif artie[1] == "Foot (abbr ft)": artie[1] = "Foot" ## elif artie[1] == "Foot-candle (abbr ft-c)": artie[1] = "Foot-candle" ## elif artie[1] == "Foot-lambert (abbr ft-l)": artie[1] = "Foot-lambert" ## elif artie[1] == "For forn": punchout = 1 ## elif artie[1] == "Fornax (abbr for forn)": artie[1] = "Fornax" ## elif artie[1] == "Frequency modulation (abbr fm)": artie[1] = "Frequency modulation" ## elif artie[1] == "Frequency-shift keying (abbr fsk)": artie[1] = "Frequency-shift keying" ## elif artie[1] == "FSK (abbr)": artie[1] = "FSK" ## elif artie[1] == "G or g": punchout = 1 ## elif artie[1] == "Gas constant (symbol r r*)": artie[1] = "Gas constant" ## elif artie[1] == "Gaussian constant (symbol": artie[1] = "Gaussian constant" ## elif artie[1] == "GCI (abbr)": artie[1] = "GCI" ## elif artie[1] == "Gem gemi": punchout = 1 ## elif artie[1] == "Gemini (abbr Gem Gemi)": artie[1] = "Gemini" ## elif artie[1] == "Georef (abbr)": artie[1] = "Georef" ## elif artie[1] == "Gibbs free energy = gibbs function": punchout = 1 ## elif artie[1] == "Giga (abbr g)": artie[1] = "Giga" ## elif artie[1] == "Grashof number (symbol n": artie[1] = "Grashof number" ## elif artie[1] == "Gravitational constant (symbol g)": artie[1] = "Gravitational constant" ## elif artie[1] == "Greenwich Civil Time (abbr GCT)": artie[1] = "Greenwich Civil Time" ## elif artie[1] == "Greenwich hour angle (abbr gha)": artie[1] = "Greenwich hour angle" ## elif artie[1] == "Greenwich mean time (abbr gmt)": artie[1] = "Greenwich mean time" ## elif artie[1] == "Gru grus": punchout = 1 ## elif artie[1] == "Grus (abbr Gru Grus)": artie[1] = "Grus" ## elif artie[1] == "GSE (abbr)": artie[1] = "GSE" ## elif artie[1] == "G-suit or g-suit": punchout = 1 ## elif artie[1] == "Earth-rate unit (abbr eru)": artie[1] = "Earth-rate unit" ## elif artie[1] == "EDP (abbr)": artie[1] = "EDP" ## elif artie[1] == "EHF (abbr)": artie[1] = "EHF" ## elif artie[1] == "Electronic missile acquisition (abbr ema)": artie[1] = "Electronic missile acquisition" ## elif artie[1] == "Electron-volt (abbr ev)": artie[1] = "Electron-volt" ## elif artie[1] == "ELF (abbr)": artie[1] = "ELF" ## elif artie[1] == "Elsse (abbr)": artie[1] = "Elsse" ## elif artie[1] == "EMA (abbr)": artie[1] = "EMA" ## elif artie[1] == "Ephemeris (plural ephemerides)": artie[1] = "Ephemeris" ## elif artie[1] == "Ephemeris second (abbr s)": artie[1] = "Ephemeris second" ## elif artie[1] == "Ephemeris time (abbr e.t.)": artie[1] = "Ephemeris time" ## elif artie[1] == "Equ equl": punchout = 1 ## elif artie[1] == "Equuleus (abbr Equ Equl)": artie[1] = "Equuleus" ## elif artie[1] == "Eridanus (abbr Eri Erid)": artie[1] = "Eridanus" ## elif artie[1] == "E.T (abbr)": artie[1] = "E.T" ## elif artie[1] == "Extended range dovap (abbr extradop)": artie[1] = "Extended range dovap" ## elif artie[1] == "Extradop (abbr)": artie[1] = "Extradop" ## elif artie[1] == "Extremely High Frequency (abbr EHF)": artie[1] = "Extremely High Frequency" ## elif artie[1] == "Extremely Low Frequency (abbr ELF)": artie[1] = "Extremely Low Frequency" ## elif artie[1] == "Daughter Daughter Element": punchout = 1 ## elif artie[1] == "Dead reckoning (abbr dr)": artie[1] = "Dead reckoning" ## elif artie[1] == "Deci (abbr d)": artie[1] = "Deci" ## elif artie[1] == "Decibel (abbr db)": artie[1] = "Decibel" ## elif artie[1] == "Deka (abbr da)": artie[1] = "Deka" ## elif artie[1] == "Del dlph": punchout = 1 ## elif artie[1] == "Delphinus (abbr Del Dlph)": artie[1] = "Delphinus" ## elif artie[1] == "DF (abbr)": artie[1] = "DF" ## elif artie[1] == "Direction Finder (abbr DF)": artie[1] = "Direction Finder" ## elif artie[1] == "Distance measuring equipment (abbr dme)": artie[1] = "Distance measuring equipment" ## elif artie[1] == "DME (abbr)": artie[1] = "DME" ## elif artie[1] == "Doppler ranging (abbr doran)": artie[1] = "Doppler ranging" ## elif artie[1] == "Doppler velocity and position (abbr": artie[1] = "Doppler velocity and position" ## elif artie[1] == "Doppler Velocity And Position (abbr": artie[1] = "Doppler Velocity And Position" ## elif artie[1] == "Dorado (abbr Dor Dora)": artie[1] = "Dorado" ## elif artie[1] == "Doran (abbr)": artie[1] = "Doran" ## elif artie[1] == "Dovap (abbr)": artie[1] = "Dovap" ## elif artie[1] == "DR (abbr)": artie[1] = "DR" ## elif artie[1] == "Dra drac": punchout = 1 ## elif artie[1] == "Draco (abbr Dra Drac)": artie[1] = "Draco" ## elif artie[1] == "Draperies (abbr D)": artie[1] = "Draperies" ## elif artie[1] == "Dsif (abbr)": artie[1] = "Dsif" #table for collections and bailouts if not punchout: Redirects += [(deparenth(artie[1]),artie2)] else: punchout = 0 #print "artie2",artie2 #print Parser(q)[2] #print "*****" else: #name patch list #now to remove bailouts that are for article suddenly on topic with #exodictionary #if artie[1][0:4] == "Atom": print "ART:",artie[1] #if artie[1][0:3] == "AND": print "ART:",artie[1] if artie[1] == "": bailout = 1 # ***** ***** INSERT CODE HERE ***** ***** # # Autostub3 Correction Code Write-O-Mat II -- now no longer an # independant piece of code. Everything is fine. You will be # happy about it or Dr X will be displeased and unsuccessfully try # to send his roommate's girlfriend after you. # for q in listy: # print q ##debuggy = random.random() base,hyphs = Dehyphenation(artie[1]) base = deparenth(base) newname = CapIt(base) ##if debuggy > 0.99: ## print "base ==", base ## print "hyphs ==", hyphs ## print "artie[1] ==", artie[1] ## print "newname ==", newname if artie[1].count("("): print "artie[1] ==", artie[1] if newname.count("("): print "newname ==", newname if artie[1].count("="): print "artie[1] ==", artie[1] if newname.count("="): print "newname ==", newname artie[1] = newname # solid caps, no hyphens perm1 = CapIt(base) # not quite solid caps, no hyphens perm2 = CapAll(base) # original caps, no hyphens perm3 = base # only first is capped, no hyphens perm4 = base.capitalize() #adding above plus with hyphens newentry = [newname, perm1, perm2, perm3, perm4, Rehyphenation(perm1,hyphs), Rehyphenation(perm2,hyphs), Rehyphenation(perm3,hyphs), Rehyphenation(perm4,hyphs)] ##if debuggy > 0.99: ## print "newentry ==", newentry #print "newentry ==", newentry #attempting to pare out the redundants renewentry = [] county = 0 for qp in newentry: #if county < 2: renewentry += [q] (or qp, as the case would be...) already = 0 for qqp in renewentry: if (qp == qqp): #print "q == qq;",q,qq already = 1 if not already: renewentry += [qp] #print "already =",already county += 1 #newentry = renewentry ##if debuggy > 0.99: ## print "renewentry ==", renewentry #and now to create the redirects... if len(renewentry) > 1: for qp in renewentry: if qp != artie[1]: Redirects += [[qp,"#REDIRECT: [["+artie[1]+"]]"]] ##if debuggy > 0.99: ## print "qp (in renewentry) ==", qp ##else: print qp,"==",artie[1] # ***** ***** ***** **** ***** ***** ***** # #add to final list Acceleration Of Gravity ( if not bailout: ProtoArticles += [artie] else: bailout = 0 # Redirect-Creation-O-Mat -- Obsolete! Go sit in the corner with # Devourer of Worlds, Andy's submarine that flies, and the Landmark # Destruct-O-Mat #if artie[1].count(" "): # notartie = artie[1].capitalize() # if artie[1] != notartie: # Redirects += [[notartie,"#REDIRECT: [["+artie[1]+"]]"]] # notartie = CapAll(artie[1]) # if artie[1] != notartie: # Redirects += [[notartie,"#REDIRECT: [["+artie[1]+"]]"]] # print len(Redirects),"redirects and counting..." # ##print "creating: "+artie[1].capitalize()+" -- #REDIRECT: [["+artie[1]+"]]" Rechecked = [] #post everything corrections (should have thought of this earlier) for q in Redirects: bailout = 0 if q[0] == "Her herc": q[0] == "Herc" elif q[0] == "Hor horo": q[0] == "Horo" elif q[0] == "Hya hyda": q[0] == "Hyda" elif q[0] == "Hyi hydi": q[0] == "Hydi" elif q[0] == "Impact strength or impact energy": q[0] == "Impact Energy" elif q[0] == "Ind indi": q[0] == "Indi" elif q[0] == "J D-splay": q[0] == "J-Display" elif q[0] == "J d-splay": q[0] == "J-display" elif q[0] == "Lep leps": q[0] == "Leps" elif q[0] == "Lib libr": q[0] == "Libr" elif q[0] == "Lmi l min": q[0] == "Lmi" elif q[0] == "Lup lupi": q[0] == "Lupi" elif q[0] == "Lyn lyne": q[0] == "Lyne" elif q[0] == "Lyr lyra": q[0] == "Lyr" elif q[0] == "Men mens": q[0] == "Mens" elif q[0] == "Mic micr": q[0] == "Micr" elif q[0] == "Mon mono": q[0] == "Mon" elif q[0] == "Nor norm": q[0] == "Norm" elif q[0] == "Oct octn": q[0] == "Octn" elif q[0] == "Oph ophi": q[0] == "Ophi" elif q[0] == "Ori orio": q[0] == "Orio" elif q[0] == "Pav pavo": q[0] == "Pavo" elif q[0] == "Peg pegs": q[0] == "Pegs" elif q[0] == "Per pers": q[0] == "Pers" elif q[0] == "Phe phoe": q[0] == "Phoe" elif q[0] == "Pic pict": q[0] == "Pict" elif q[0] == "Positive G Or Positive G": q[0] == "Positive G" elif q[0] == "Psa psc a": q[0] == "Psa" elif q[0] == "Psc pisc": q[0] == "Pisc" elif q[0] == "Pup pupp": q[0] == "Pupp" elif q[0] == "Scl scul": q[0] == "Scul" elif q[0] == "Sco scor": q[0] == "Scor" elif q[0] == "Sct scut": q[0] == "Scut" elif q[0] == "Ser serp": q[0] == "Serp" elif q[0] == "Sex sext": q[0] == "Sext" elif q[0] == "Sge sgte": q[0] == "Sgte" elif q[0] == "Sgr sgtr": q[0] == "Sgtr" elif q[0] == "Tau taur": q[0] == "Taur" elif q[0] == "Tra tr au": q[0] == "Tra" elif q[0] == "Tri tria": q[0] == "Tria" elif q[0] == "Tuc tucn": q[0] == "Tucn" elif q[0] == "Uma u maj": q[0] == "Uma" elif q[0] == "Umi u min": q[0] == "Umi" elif q[0] == "Vel velr": q[0] == "Velr" elif q[0] == "Vir virg": q[0] == "Virg" elif q[0] == "Vol voln": q[0] == "Voln" elif q[0] == "Vul vulp": q[0] == "Vulp" elif q[0] == "Impact strength or impact energy": q[0] == "impact energy" elif q[0] == "Jato jato or jato": q[0] == "Jato" elif q[0] == "Rato rato or rato": q[0] == "Rato" if not bailout: Rechecked += [q] Redirects = Rechecked print len(ProtoArticles), "articles" print len(Redirects), "redirects" ### So tell me, why are full articles mixed in with the redirect list? --fixed ##to_out = [] ##for q in Redirects: to_out += q[0]+"\n" ##execute = Stringify(to_out) ##do_w=open("G_H501_2321B.txt", 'w') ##do_w.write(execute) ##do_w.close() ##raise Exception, "Reached designated stopping point" ##qqq=0 ##for q in ProtoArticles: ## if q[1] == "Aurora": print "aurora is",qqq ## if q[1] == "Absolute Magnitude": print "absolute magnitude is",qqq ## qqq += 1 # # # from PreAutostub3: # # # # ProtoArticles (article text, article name, flags) # # Redirects (redirect name, redirect to this article) # # # def XMLproof(textish): outp = "" for q in textish: if q == "<": outp += "<" elif q == ">": outp += ">" elif q == "&": outp += "&" elif q == '"': outp += """ ##elif q == ":": outp += "%3A" else: outp += q return outp def StartXML(): out = [['<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">\n']] out += [' <siteinfo>\n'] out += [' <sitename>Lunarpedia</sitename>\n'] out += [' </siteinfo>\n'] return out def EndXML(): out = [['</mediawiki>\n']] return out def ArtXML(title, contrib, date, text): '''XML markup for article in file title -- title of article contrib -- name of script (ie Autostub2) text -- the article ''' out = [[' <page>\n']] out += [' <title>'+title+'</title>\n'] out += [' <revision>\n'] out += [' <timestamp>'+date+'</timestamp>'] out += [' <contributor>\n'] out += [' <username>'+contrib+'</username>\n'] out += [' </contributor>\n'] ##out += [' <text xml:space="preserve">'+text+'</text>'] out += [' <text xml:space="preserve">'] ##print text out += text out += ['</text>\n'] out += [' </revision>\n'] out += [' </page>\n'] return out def linebrk(listish): newlist = [] for q in listish: newlist += [q+"\n"] return newlist def dodef(thisentry): """Create stub article from sequence """ global TESTNO global NewCats # # 0 -- body of definition article # 1 -- name of article # 2 -- flags # I: missing image # R: needs references section # #%#%#%#%#%#%#%#%#%#%#%#%#%#%%#%#%#%#%# # C_Acoustics = 0 # Acoustics categorization hit count C_Aerospace = 0 # Aerospace categorization hit count C_Aircraft = 0 # Aircraft categorization hit count C_Astronomy = 0 # Astronomy categorization hit count C_Chemistry = 0 # Chemistry categorization hit count C_Components = 0 # Components categorization hit count C_Computing = 0 # Computing categorization hit count C_Electronics = 0 # Electronics categorization hit count C_Engineering = 0 # Engineering categorization hit count C_Hardware = 0 # Hardware categorization hit count C_Instruments = 0 # Instruments categorization hit count C_Mathematics = 0 # Mathematics categorization hit count C_Measurements = 0 # Measurements categorization hit count C_Medical = 0 # Medical categorization hit count C_Navigation = 0 # Navigation categorization hit count C_Nuclear = 0 # Nuclear categorization hit count C_Optics = 0 # Optics categorization hit count C_Photography = 0 # Photography categorization hit count C_Physics = 0 # Physics categorization hit count C_Radio = 0 # Radio categorization hit count C_Rocketry = 0 # Rocketry categorization hit count C_Statistics = 0 # Statistics categorization hit count C_Time = 0 # Time categorization hit count # #%#%#%#%#%#%#%#%#%#%#%#%#%#%%#%#%#%#%# # # start generating the article here: # to_out = [] to_out = ["{{Autostub}}"] #to_out += ["{{Autostub}}"] to_out += ["{{Initial Proof Needed}}"] to_out += ["{{Initial Categorization Proof Needed}}"] ##print thisentry[0] ##print "*****" ##print thisentry[1] ##print "*****" ##print thisentry[2] to_out += ["'''"+thisentry[1]+"'''"] to_out += [thisentry[0]] #print "*****" #print to_out to_out += ["==References=="] to_out += ["''This article is based on NASA's [[NASA SP-7|Dictionary of Technical Terms for Aerospace Use]]''"] if thisentry[2].count("R"): to_out += ["<references/>"] to_out += ["[[Category%3ADefinitions]]"] to_out += ["[[Category%3ANASA SP-7]]"] if thisentry[2].count("I"): to_out += ["[[Category%3ADefinitions with Missing Images]]"] #print "Bob was there too." # assign alphabetical categorizations ##print thisentry to_out += ["[[Category%3A"+thisentry[1][0].upper()+" (all)]]"] twodigit = thisentry[1][0:2].upper() if len(twodigit) == 2: to_out += ["[[Category%3A"+twodigit+"]]"] #print twodigit if not NewCats.count(twodigit): NewCats += [twodigit] # assigning provisional categorizations based on keystrings if thisentry[0].lower().count("acoustic"): C_Acoustics += 1 if thisentry[0].lower().count("aero"): C_Aerospace += 1 C_Aircraft += 1 if thisentry[0].lower().count("aerospace"): C_Aerospace += 1 if thisentry[0].lower().count("altitud"): C_Aerospace += 1 C_Aircraft += 1 C_Instruments += 1 C_Measurements += 1 if thisentry[0].lower().count("angle"): C_Mathematics += 1 C_Measurements += 1 C_Navigation += 1 if thisentry[0].lower().count("antenna"): C_Components += 1 C_Instruments += 1 C_Radio += 1 if thisentry[0].lower().count("aphelion"): C_Astronomy += 1 if thisentry[0].lower().count("apogee"): C_Astronomy += 1 if thisentry[0].lower().count("astronaut"): C_Aerospace += 1 C_Rocketry += 1 if thisentry[0].lower().count("astronomy"): C_Astronomy += 1 if thisentry[0].lower().count("atmospher"): C_Aerospace += 1 C_Aircraft += 1 if thisentry[0].lower().count("atom"): C_Nuclear += 1 C_Physics += 1 if thisentry[0].lower().count("attitude"): C_Mathematics += 1 C_Navigation += 1 if thisentry[0].lower().count("axes"): C_Mathematics += 1 C_Navigation += 1 if thisentry[0].lower().count("axis"): C_Mathematics += 1 C_Navigation += 1 if thisentry[0].lower().count("azimuth"): C_Mathematics += 1 C_Navigation += 1 if thisentry[0].lower().count("baro"): C_Instruments += 1 if thisentry[0].lower().count("binary"): C_Computing += 1 if thisentry[0].lower().count("bodily"): C_Medical += 1 if thisentry[0].lower().count("body"): C_Medical += 1 if thisentry[0].lower().count("candle"): C_Astronomy += 1 C_Optics += 1 if thisentry[0].lower().count("celestial"): C_Astronomy += 1 if thisentry[0].lower().count("cells"): C_Medical += 1 if thisentry[0].lower().count("chemical"): C_Chemistry += 1 if thisentry[0].lower().count("chromat"): C_Optics += 1 C_Photography += 1 if thisentry[0].lower().count("circuit"): C_Electronics += 1 if thisentry[0].lower().count("coeff"): C_Mathematics += 1 C_Statistics += 1 if thisentry[0].lower().count("compression"): C_Acoustics += 1 C_Mathematics += 1 if thisentry[0].lower().count("computer"): C_Computing += 1 if thisentry[0].lower().count("construct"): C_Engineering += 1 if thisentry[0].lower().count("convuls"): C_Medical += 1 if thisentry[0].lower().count("cooling"): C_Components += 1 if thisentry[0].lower().count("cosmic"): C_Astronomy += 1 if thisentry[0].lower().count("deci"): C_Measurements += 1 if thisentry[0].lower().count("dental"): C_Medical += 1 if thisentry[0].lower().count("dimen"): C_Mathematics += 1 C_Physics += 1 if thisentry[0].lower().count("displ"): C_Components += 1 C_Electronics += 1 C_Instruments += 1 if thisentry[0].lower().count("duct"): C_Aircraft += 1 C_Components += 1 C_Engineering += 1 C_Hardware += 1 if thisentry[0].lower().count("ear"): C_Medical += 1 if thisentry[0].lower().count("east"): C_Measurements += 1 C_Navigation += 1 if thisentry[0].lower().count("eclipse"): C_Astronomy += 1 if thisentry[0].lower().count("electr"): C_Electronics += 1 if thisentry[0].lower().count("elevation"): C_Measurements += 1 C_Navigation += 1 if thisentry[0].lower().count("ellip"): C_Mathematics += 1 C_Navigation += 1 if thisentry[0].lower().count("emit"): C_Physics += 1 C_Radio += 1 if thisentry[0].lower().count("emuls"): C_Photography += 1 if thisentry[0].lower().count("energy"): C_Chemistry += 1 C_Nuclear += 1 C_Physics += 1 if thisentry[0].lower().count("engine"): C_Aircraft += 1 C_Engineering += 1 C_Hardware += 1 C_Rocketry += 1 if thisentry[0].lower().count("equat"): C_Mathematics += 1 if thisentry[0].lower().count("flight"): C_Aerospace += 1 C_Aircraft += 1 C_Rocketry += 1 if thisentry[0].lower().count("floating"): C_Computing += 1 C_Mathematics += 1 if thisentry[0].lower().count("fluid"): C_Chemistry += 1 C_Engineering += 1 C_Photography += 1 if thisentry[0].lower().count("frequenc"): C_Acoustics += 1 C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("galactic"): C_Astronomy += 1 if thisentry[0].lower().count("galax"): C_Astronomy += 1 if thisentry[0].lower().count("gas"): C_Acoustics += 1 C_Astronomy += 1 C_Chemistry += 1 C_Physics += 1 if thisentry[0].lower().count("gate"): C_Computing += 1 C_Electronics += 1 if thisentry[0].lower().count("great circ"): C_Mathematics += 1 C_Navigation += 1 if thisentry[0].lower().count("guidance"): C_Instruments += 1 C_Navigation += 1 C_Rocketry += 1 if thisentry[0].lower().count("gyro"): C_Components += 1 C_Instruments += 1 C_Navigation += 1 if thisentry[0].lower().count("heat"): C_Nuclear += 1 C_Physics += 1 if thisentry[0].lower().count("helio"): C_Astronomy += 1 if thisentry[0].lower().count("human"): C_Medical += 1 if thisentry[0].lower().count("indicator"): C_Instruments += 1 if thisentry[0].lower().count("inertia"): C_Navigation += 1 C_Physics += 1 C_Rocketry += 1 if thisentry[0].lower().count("instrument"): C_Instruments += 1 if thisentry[0].lower().count("lagrang"): C_Astronomy += 1 if thisentry[0].lower().count("launching"): C_Aerospace += 1 C_Rocketry += 1 if thisentry[0].lower().count("leg"): C_Medical += 1 if thisentry[0].lower().count("limb"): C_Medical += 1 if thisentry[0].lower().count("liquid"): C_Photography += 1 C_Physics += 1 if thisentry[0].lower().count("logic"): C_Computing += 1 C_Electronics += 1 C_Mathematics += 1 if thisentry[0].lower().count("magnet"): C_Electronics += 1 C_Instruments += 1 C_Physics += 1 if thisentry[0].lower().count("malfunction"): C_Engineering += 1 if thisentry[0].lower().count("maneuv"): C_Aerospace += 1 C_Aircraft += 1 C_Navigation += 1 C_Rocketry += 1 if thisentry[0].lower().count("measure"): C_Mathematics += 1 C_Measurements += 1 if thisentry[0].lower().count("medic"): C_Medical += 1 if thisentry[0].lower().count("memory"): C_Computing += 1 if thisentry[0].lower().count("meter"): C_Measurements += 1 if thisentry[0].lower().count("mile"): C_Measurements += 1 if thisentry[0].lower().count("modulation"): C_Acoustics += 1 C_Electronics += 1 C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("molar"): C_Chemistry += 1 if thisentry[0].lower().count("mole"): C_Chemistry += 1 C_Physics += 1 if thisentry[0].lower().count("moon"): C_Astronomy += 1 if thisentry[0].lower().count("motion picture"): C_Photography += 1 if thisentry[0].lower().count("muscle"): C_Mathematics += 1 if thisentry[0].lower().count("navigat"): C_Navigation += 1 if thisentry[0].lower().count("neutron"): C_Physics += 1 if thisentry[0].lower().count("newton"): C_Measurements += 1 C_Physics += 1 if thisentry[0].lower().count("north"): C_Measurements += 1 C_Navigation += 1 if thisentry[0].lower().count("notation"): C_Mathematics += 1 C_Physics += 1 if thisentry[0].lower().count("nozz"): C_Components += 1 C_Engineering += 1 if thisentry[0].lower().count("nuclear"): C_Nuclear += 1 if thisentry[0].lower().count("number"): C_Mathematics += 1 C_Statistics += 1 if thisentry[0].lower().count("occultation"): C_Astronomy += 1 if thisentry[0].lower().count("ometer"): C_Instruments += 1 if thisentry[0].lower().count("orbit"): C_Aerospace += 1 C_Astronomy += 1 C_Rocketry += 1 if thisentry[0].lower().count("oscill"): C_Acoustics += 1 C_Instruments += 1 C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("osphere"): C_Aerospace += 1 if thisentry[0].lower().count("particle"): C_Physics += 1 if thisentry[0].lower().count("perhelion"): C_Astronomy += 1 if thisentry[0].lower().count("perigee"): C_Astronomy += 1 if thisentry[0].lower().count("photograph"): C_Photography += 1 if thisentry[0].lower().count("physicist"): C_Physics += 1 if thisentry[0].lower().count("physiolog"): C_Medical += 1 if thisentry[0].lower().count("pitch"): C_Aerospace += 1 C_Aircraft += 1 C_Instruments += 1 C_Navigation += 1 if thisentry[0].lower().count("planet"): C_Astronomy += 1 if thisentry[0].lower().count("pressure"): C_Aerospace += 1 C_Aircraft += 1 C_Engineering += 1 if thisentry[0].lower().count("prob"): C_Mathematics += 1 C_Statistics += 1 if thisentry[0].lower().count("processing"): C_Computing += 1 if thisentry[0].lower().count("proton"): C_Physics += 1 if thisentry[0].lower().count("radar"): C_Electronics += 1 C_Instruments += 1 if thisentry[0].lower().count("radio "): C_Radio += 1 if thisentry[0].lower().count("radioactive"): C_Nuclear += 1 if thisentry[0].lower().count("ray"): C_Astronomy += 1 C_Mathematics += 1 if thisentry[0].lower().count("reactions"): C_Chemistry += 1 C_Nuclear += 1 if thisentry[0].lower().count("reactor"): C_Nuclear += 1 if thisentry[0].lower().count("refract"): C_Optics += 1 if thisentry[0].lower().count("reliability"): C_Engineering += 1 if thisentry[0].lower().count("roll"): C_Instruments += 1 C_Navigation += 1 if thisentry[0].lower().count("rotation"): C_Astronomy += 1 C_Instruments += 1 C_Navigation += 1 if thisentry[0].lower().count("scope"): C_Instruments += 1 if thisentry[0].lower().count("sea level"): C_Aerospace += 1 C_Aircraft += 1 C_Instruments += 1 if thisentry[0].lower().count("signal"): C_Instruments += 1 C_Radio += 1 if thisentry[0].lower().count("solar"): C_Astronomy += 1 if thisentry[0].lower().count("solid"): C_Physics += 1 if thisentry[0].lower().count("sonic"): C_Acoustics += 1 C_Aircraft += 1 if thisentry[0].lower().count("sound"): C_Acoustics += 1 if thisentry[0].lower().count("south"): C_Navigation += 1 if thisentry[0].lower().count("spacecraft"): C_Aerospace += 1 C_Rocketry += 1 if thisentry[0].lower().count("spectrum"): C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("stability"): C_Aerospace += 1 C_Aircraft += 1 C_Instruments += 1 C_Navigation += 1 if thisentry[0].lower().count("star"): C_Astronomy += 1 if thisentry[0].lower().count("statistic"): C_Statistics += 1 if thisentry[0].lower().count("structural"): C_Engineering += 1 if thisentry[0].lower().count("takeoff"): C_Aircraft += 1 C_Rocketry += 1 if thisentry[0].lower().count("teeth"): C_Medical += 1 if thisentry[0].lower().count("terrestrial"): C_Navigation += 1 C_Optics += 1 if thisentry[0].lower().count("testing"): C_Engineering += 1 if thisentry[0].lower().count("thermodynamic"): C_Physics += 1 if thisentry[0].lower().count("time"): C_Time += 1 if thisentry[0].lower().count("tooth"): C_Medical += 1 if thisentry[0].lower().count("tracking"): C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("trajectory"): C_Aerospace += 1 C_Aircraft += 1 C_Mathematics += 1 C_Navigation += 1 C_Physics += 1 C_Rocketry += 1 if thisentry[0].lower().count("transmis"): C_Radio += 1 if thisentry[0].lower().count("transmit"): C_Radio += 1 if thisentry[0].lower().count("transponder"): C_Components += 1 C_Radio += 1 if thisentry[0].lower().count("tron"): C_Electronics += 1 C_Nuclear += 1 C_Physics += 1 if thisentry[0].lower().count("unit of"): C_Measurements += 1 if thisentry[0].lower().count("vacuum"): C_Aerospace += 1 C_Physics += 1 C_Rocketry += 1 if thisentry[0].lower().count("value"): C_Mathematics += 1 C_Measurements += 1 if thisentry[0].lower().count("vector"): C_Mathematics += 1 C_Navigation += 1 C_Rocketry += 1 if thisentry[0].lower().count("vehicle"): C_Aerospace += 1 C_Rocketry += 1 if thisentry[0].lower().count("velocity"): C_Navigation += 1 C_Physics += 1 if thisentry[0].lower().count("visco"): C_Physics += 1 if thisentry[0].lower().count("wave"): C_Acoustics += 1 C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("wavelength"): C_Optics += 1 C_Radio += 1 if thisentry[0].lower().count("west"): C_Navigation += 1 if thisentry[0].lower().count("yaw"): C_Instruments += 1 C_Navigation += 1 if thisentry[0].lower().count("year"): C_Time += 1 # inserting assigned provisional categorizations and noting times it was set off if C_Acoustics: to_out += ["[[Category%3AAcoustics]] <!-- "+str(C_Acoustics)+" -->"] if C_Aerospace: to_out += ["[[Category%3AAerospace]] <!-- "+str(C_Aerospace)+" -->"] if C_Aircraft: to_out += ["[[Category%3AAircraft]] <!-- "+str(C_Aircraft)+" -->"] if C_Astronomy: to_out += ["[[Category%3AAstronomy]] <!-- "+str(C_Astronomy)+" -->"] if C_Chemistry: to_out += ["[[Category%3AChemistry]] <!-- "+str(C_Chemistry)+" -->"] if C_Components: to_out += ["[[Category%3AComponents]] <!-- "+str(C_Components)+" -->"] if C_Computing: to_out += ["[[Category%3AComputing]] <!-- "+str(C_Computing)+" -->"] if C_Electronics: to_out += ["[[Category%3AElectronics]] <!-- "+str(C_Electronics)+" -->"] if C_Engineering: to_out += ["[[Category%3AEngineering]] <!-- "+str(C_Engineering)+" -->"] if C_Hardware: to_out += ["[[Category%3AHardware]] <!-- "+str(C_Hardware)+" -->"] if C_Instruments: to_out += ["[[Category%3AInstruments]] <!-- "+str(C_Instruments)+" -->"] if C_Mathematics: to_out += ["[[Category%3AMathematics]] <!-- "+str(C_Mathematics)+" -->"] if C_Measurements: to_out += ["[[Category%3AMeasurements]] <!-- "+str(C_Measurements)+" -->"] if C_Medical: to_out += ["[[Category%3AMedical]] <!-- "+str(C_Medical)+" -->"] if C_Navigation: to_out += ["[[Category%3ANavigation]] <!-- "+str(C_Navigation)+" -->"] if C_Nuclear: to_out += ["[[Category%3ANuclear]] <!-- "+str(C_Nuclear)+" -->"] if C_Optics: to_out += ["[[Category%3AOptics]] <!-- "+str(C_Optics)+" -->"] if C_Photography: to_out += ["[[Category%3APhotography]] <!-- "+str(C_Photography)+" -->"] if C_Physics: to_out += ["[[Category%3APhysics]] <!-- "+str(C_Physics)+" -->"] if C_Radio: to_out += ["[[Category%3ARadio]] <!-- "+str(C_Radio)+" -->"] if C_Rocketry: to_out += ["[[Category%3ARocketry]] <!-- "+str(C_Rocketry)+" -->"] if C_Statistics: to_out += ["[[Category%3AStatistics]] <!-- "+str(C_Statistics)+" -->"] if C_Time: to_out += ["[[Category%3ATime]] <!-- "+str(C_Time)+" -->"] if (C_Acoustics + C_Aerospace + C_Aircraft + C_Astronomy + C_Chemistry + C_Components + C_Computing + C_Electronics + C_Engineering + C_Hardware + C_Instruments + C_Mathematics + C_Measurements + C_Medical + C_Navigation + C_Nuclear + C_Optics + C_Photography + C_Physics + C_Radio + C_Rocketry + C_Statistics + C_Time) < 1: to_out += ["[[Category%3AMiscellaneous]] <!-- Autostub3 unable to provisionally categorize -->"] ##to_out += [""] ##to_out += ["<!-- Generated by a gamma candidate version of Autostub3 (Test "+TESTNO+") -->"] to_out = linebrk(to_out) return to_out def doredir(thisentry): """Create redirect from two item sequence """ # # 0 -- name of redirect # 1 -- redirect to this article # #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%# # # #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%# # # start generating the redirect here: # if thisentry[1][0:9] == "#REDIRECT": return thisentry[1] else: return ["#REDIRECT: [["+thisentry[1]+"]]"] def TLcat_o_mat(cats): """Create category pages for two letter indices""" # # 0 -- name of category # 1 -- text and subcategorization # #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%# # # #%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%#%# # # start generating the redirect here: # all_out = [] #print cats for q in cats: #print "Bob was there, too." all_out += [["Category:"+q,"This category includes definitions starting with the letters "+ q+". \n\n\n [[Category:"+q[0]+"]]"]] #print all_out return all_out def doare(tup): proofme = doredir(tup) proofed = [] for q in proofme: proofed += XMLproof(q) return ArtXML(tup[0], "Autostub3", "2007-05-02T00:00:00Z", proofed) def doanart(tup): proofme = dodef(tup) proofed = [] for q in proofme: proofed += XMLproof(q) return ArtXML(tup[1], "Autostub3", "2007-05-01T00:00:00Z", proofed) def doacat(tup): #proofme = dodef(tup) proofed = [] for q in tup[1]: proofed += XMLproof(q) return ArtXML(tup[0], "Autostub3", "2007-05-02T00:00:00Z", proofed) def file_o_mat(glubby, namebase): #namebase = "/home/Luna/autostub3_A" # base name glubglub = [] # pieces parts in one big part maxout = 64 # about how many to each xml file tomax = 0 # how many so far glublet = [] # the miniglub presently being assembled execute = [] # to be fused. And parts is parts. nowserving = 1 # which chunk is being saved. If you wait for it to get cold it's not my problem. for q in glubby: if tomax < maxout: glublet += [q] tomax += 1 else: glubglub += [glublet] glublet = [q] tomax = 1 glubglub += [glublet] for miniglub in glubglub: print str(miniglub[0][1])+" -- "+str(miniglub[-1][1]) execute = StartXML() execute += miniglub execute += EndXML() execute = Stringify(execute) do_xml=open(namebase+"_"+str(nowserving)+".xml", 'w') #print "creating "+namebase+"_"+str(nowserving)+".xml" do_xml.write(execute) do_xml.close() nowserving += 1 #print execute #raise Exception, "stopping here" glub = [] # stick them all in here # uncomment for actual run for q in ProtoArticles: glub += [doanart(q)] for q in Redirects: glub += [doare(q)] DoCats = TLcat_o_mat(NewCats) for q in DoCats: glub += [doacat(q)] ##print "ready to run file-o-mat()" file_o_mat(glub,"/home/Luna/autostub3_H503") #uncomment to test ##import random ##for q in ProtoArticles: ## if (random.random() > 0.00): glub += [doanart(q)] ## ##for q in Redirects: ## if (random.random() > 0.92): glub += [doare(q)] ## ##DoCats = TLcat_o_mat(NewCats) ## ##for q in DoCats: ## if (random.random() > 0.5): glub += [doacat(q)] ## ##print glub ## ##file_o_mat(glub,"/home/Luna/autostub3_A2_test2") #testbench activities ##print NewCats ##print DoCats ##print Stringify(glub) to_out = [] ##to_out += "==Articles==\n" ##for q in ProtoArticles: to_out += "[["+q[1]+"]]<BR/>\n" ##for q in ProtoArticles: to_out += q[1]+"\n" ##to_out += "==Some of the Redirects==\n" ##for q in Redirects: to_out += "[["+q[0]+"]]<BR/>\n" ##for q in Redirects: to_out += q[0]+"\n" ##execute = Stringify(to_out) ##do_w=open("G_H501_0832.txt", 'w') ##do_w.write(execute) ##do_w.close()