(1)Bash shell
#!/bin/sh # Create Date: 2013-01-16 export POOLDIR="${SYSDIR}/send_pool" LIST_URL="${SYSDIR}/input/list_url" # Generate Time (Odd, 4min) TIME=$(date +%Y%m%d.%H%M) STIME=`date --date='+2 minute' +%Y%m%d.%H%M` len=${#STIME} checktail=${STIME:$len-1} echo "@Debug: checktail=[${checktail}]" if [ $((${checktail}%2)) -eq 0 ] then another="${STIME:0:len-1}$((${STIME:len-1}+1))" echo "@Debug: Round to odd [$another]" STIME=$another fi # Read from list_url, schedual sending if [ ! -f "${POOLDIR}/${STIME}" ] then touch "${POOLDIR}/${STIME}" fi echo "@Info: Sendlist Task Pushed(4min later/odd): [${STIME}]" while read line do if [ ! "$line" = "" ] then echo $line >>"${POOLDIR}/${STIME}" fi done <${LIST_URL} python ${SYSDIR}/bin/patch.py >$SYSDIR/log/patch.py.all_log 2>$SYSDIR/log/patch.py.all_err &
(2)Python
--more--
#!/usr/bin/python ###################################################################### # # File name: patch.py # Author: niyishuai # Last Update: 2013-01-31 # #--------------------------------------------------------------------- # # This program: # 1. Reads [info] file generated by (recv_pool_schedual.sh). # 2. Separates (list info) and (item info). # 2.1 Gets new items from (list info) # and push (item-link) to [send_pool]. # 2.2 Updates item informations using (item info). # 3. Uses a (dict:Data) to hold all item data. # 4. Outputs all item data periodically. # #--------------------------------------------------------------------- # Input Stream: {sys}/info/\d+ # Output Stream: {sys}/table/TIME {sys}/output/TIME # Input for Init: {sys}/input/list_url # Output Optional: {sys}/send_pool/TIME ###################################################################### import os import time import sys #********************************************************************* # Class Definition #********************************************************************* class Item(object): def __init__(self): self.from_url="[Null]" self.url="[Null]" self.create_time = time.strftime('%Y-%m-%d.%H:%M',time.localtime(time.time())) self.update_time = "Never Updated" self.table={} pass ###################################################################### # Init Variables ###################################################################### def init(): #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ global Data, List_dict, List_list Data = {} #All items [url -> Item] List_dict={} List_list=[""]*1000 # Caution: Max Capacity #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ global Sysd, Fields, Field_cnt Sysd = "/home/work/nys/system" Fields = [ "$0","$1","$2","$3","$4", "title", "content", "post-time", "type", "type2", ## $5~$9 "like-cnt", "hate-cnt", "fav-cnt", "view-cnt", "comment-cnt", "share-cnt", ## $10~$15 "small-img", "big-img", "tag", "img-cnt", ## $16~19 "" ] Field_cnt = 19 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ global DEBUG_MODE ## $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ DEBUG_MODE DEBUG_MODE = True ###################################################################### # Info: Init [List_dict] for further use. # Input: {sys}/input/list_url # ---------------TODO: Priority------------------ ###################################################################### def init_list(): f_list = open(Sysd+"/input/list_url", "r") ### f_list__open cnt = 0 for line in f_list: buf = ((line.split(" "))[0]).strip(' \n\r\t') cnt += 1 List_list[ cnt ] = buf List_dict[ buf ] = cnt f_list.close() ### f_list__close print "@@ Debug: list urls: [url, id] @init_list()" for k in List_dict: print "\t"+k, List_dict[k] ###################################################################### # Info: Output all data to files # Output: {sys}/table/YYYYmmdd.HHMM # Output: {sys}/output/YYYYmmdd.HHMM ###################################################################### def output(): f_output = open(Sysd + "/output/" + time.strftime('%Y%m%d.%H%M',time.localtime(time.time())) , "w") ### f_output__open f_table = open(Sysd + "/table/" + time.strftime('%Y%m%d.%H%M',time.localtime(time.time())) , "w") ### f_table__open for k in Data: f_output.write( ("*"*70) + "\n" + ("-"*70) + "\n") f_output.write( k+"\n" ) # Url f_output.write( "From ListUrl: " + Data[k].from_url + "\n") # From Url f_output.write( "Create time: " + Data[k].create_time + "\n") # Create time f_output.write( "Update time: " + Data[k].update_time + "\n") # Update time f_output.write( ("-"*70) +"\n" ) f_table.write(k+"\t"+Data[k].from_url+"\t"+Data[k].create_time+"\t"+Data[k].update_time+"\t") tmp=Data[k].table # Item's attr_dict for ind in tmp: tmp_str = tmp[ind] if tmp_str.find('\t')>=0: # Eat the Tabs!!!!!!!!!!!!!!!!! print >> sys.stderr, "@@ Error: Tab Found!!{%s} @[output]" % (tmp_str) tmp[ind] = tmp_str.replace( '\t', ' ') f_output.write( "\t"+ind + " := {"+tmp[ind]+"}\n" ) for ind in range(5,Field_cnt+1): if Fields[ind] in tmp: f_table.write( tmp[ Fields[ind] ] + "\t" ) else: f_table.write("None\t") f_table.write("\n") f_output.close() ### f_output__close f_table.close() ### f_table__close ###################################################################### # Info: Deal with (list urls) in [info], get new items (and push item-link into [send_pool]) # Params: url(str) L(list) # Output: {sys}/send_pool/YYmmdd.HHMM ###################################################################### def solve_list( url, L ): domain = url[:url[8:].index("/")+8] item = None for line in L: buf = map( lambda x:x.strip(' \n\r\t'), line.split(" ") ) if len(buf)<3 or buf[1]!="=": continue key,value = buf[0],line[line.index(" = ")+3:].strip(' \n\r\t') # Caution! Use url-domain/item-link if key=="item-link": if value[:4]!="http": if value[0]=="/": value = domain + value else: value = domain + "/" + value if value in Data: item = "[None]" continue # Add Item---------------------------------------! print "@@-----Debug: New Item! Item-Link = [%s]"%(value) Data[value] = Item() item = Data[value] item.url = value item.from_url = url f_send.write(value+" 9000000 n\n") ##---------------Never Update Twice! continue # Check <item> if item is None: print >> sys.stderr, "@@ Error: Item is None! @[solve_list()]" continue if item=="[None]": continue # Update item info print "@@ Debug: key = [%s], value = [%s]" % (key,value) assert key[-2:] == "_o" item.table[ key[:-2] ] = "[o]"+value ###################################################################### # Info: Deal with (item urls) in [info], update item infos. # Params: url(str) L(list) ###################################################################### def solve_item( url, L ): if url not in Data: print "@@ Error: Item Update Failed! Please Check item-link![%s]" % (url) return item = Data[url] item.update_time = time.strftime('%Y-%m-%d.%H:%M',time.localtime(time.time())) for line in L: buf = map( lambda x:x.strip(' \n\r\t'), line.split(" ") ) if len(buf)<3 or buf[1]!="=": continue key,value = buf[0],line[line.index(" = ")+3:].strip(' \n\r\t') # Format : Attr = xxx xxx xxx... print "@@ Debug: key = [%s], value = [%s]" % (key,value) assert key[-2:]=="_i" item.table[ key[:-2] ] = "[i]"+value ###################################################################### # Info: Deal with an [info] file # --> solve_list() # --> solve_item() # Input: {sys}/info/\d+ ###################################################################### def solve( it ): print >> sys.stderr, "@@ Debug:----------------Deal with info[%d]" % (it) print "@@Debug: current info_file:[ %d ]" % (it) f_info = open("%s/info/%d"%(Sysd,it), "r") ### f_info__open lines = f_info.readlines() lines.append("url = [LAST_URL]\n") f_info.close() ### f_info__close #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ index,size = 0,len(lines) prev = None for i in range(size): H = map( lambda x:x.strip(' \n\r\t'), lines[i].split(" ") ) if len(H)==3 and H[1]=="=" and H[0]=="url": # First Url : --Skip-- if prev is None: prev = i continue # Bad Block : --Skip-- if (i-prev) <= 3: print "@@ Error: patch.py solve(): lines not enough! [%d : %d]" % (prev,i) print L[prev:i],"\n|---------------------Trace OK.-------------------------|" prev = i continue # Normal Block : lines[prev:i] prev_url = (lines[prev].split(" "))[2].strip(' \n\r\t') if prev_url in List_dict: print "@@ Debug:|| [list] lines[%d : %d]" % (prev,i) solve_list(prev_url, lines[prev+1:i]) # Url is a <List>. Get item mode! else: print "@@ Debug:|| [item] lines[%d : %d]" % (prev,i) solve_item(prev_url, lines[prev+1:i]) # Url is an <Item>. Update item mode! prev = i print "@@ Warning: Info File [%s/info/%d] is to be **Removed**!"%(Sysd,it) os.system( "rm %s/info/%d"%(Sysd,it) ) ##########(BEGIN)################################################################################### # Info: Read [info] file every 6 minutes ###################################################################### if __name__ == "__main__": # Init global variables init() # Generate list_url infos init_list() # Start Program prev_index = None while True: try: L = map(int, os.listdir(Sysd+"/info") ) s = len(L) print "@@ Debug: There are [%d] files under [info/]:"%(s) if s>0: print L L.sort() tar_time = list( time.localtime( time.time()+180 ) ) # 3 minutes later tar_time[4] = (tar_time[4]>>1<<1) # round to even send_file = Sysd + "/send_pool/" + time.strftime('%Y%m%d.%H%M',tar_time) print "@@ Debug: Write to send_pool_file:(even) [%s]" % (send_file) f_send = open(send_file, "w") ### f_send last = L[-1] del L[-1] for iter in L: solve(iter) # last iter is buffered if prev_index==last: solve(last) prev_index=None else: prev_index=last f_send.close() ### f_send print "@@ Info: Output is to be Generated!" output() print "@@ Info: Python Going to Sleep!" sys.stdout.flush() time.sleep(60*6+1) except: import traceback traceback.print_exc(file=sys.stdout) sys.stdout.flush() print >> sys.stderr, "@@ Error Occured. Traceback:...................." # Zombie still keeps alive --- KuKuKuKu~~~ if DEBUG_MODE: print >> sys.stderr, "@@ [Debug] Mode, Exit!" assert False else: print >> sys.stderr, "@@ [Online] Mode, Continue!" time.sleep(60*6+1) # While-loop Broken print >> sys.stderr, "@@ Error: While-loop Broken??" ##########(END)###################################################################################