# Copyright (c) 2004-2005 DoCoMo Euro-Labs GmbH (Munich, Germany). # Copyright (c) 2001-2005 LOGILAB S.A. (Paris, FRANCE). # # http://www.docomolab-euro.com/ -- mailto:tarlano@docomolab-euro.com # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """Basic narval actions, such as nop, file reading/writing, executing arbitrary system command... :version: $Revision:$ :author: Logilab :copyright: 2000-2005 LOGILAB S.A. (Paris, FRANCE) 2004-2005 DoCoMo Euro-Labs GmbH (Munich, Germany) :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr http://www.docomolab-euro.com/ -- mailto:tarlano@docomolab-euro.com """ __revision__ = "$Id: Basic.py,v 1.70 2002/09/20 14:12:35 syt Exp $" __docformat__ = 'restructuredtext en' import re from os import system from copy import deepcopy from narval.public import AL_NS from narval.interfaces.base import IFile, IOpen, IData, ICommand from narval.elements import create_error from narval.elements.base import CommandElement, DataElement, FileElement from narval.elements.core import QuitElement, StartPlanElement MOD_XML = "" % AL_NS # FIXME: handle http proxy correctly ? def act_nop(inputs) : """No OPeration""" return {} MOD_XML += """ the very usefull NO oPeration :) """ def act_die(inputs) : """shutdown the interpreter by producing a element""" return {'quit': QuitElement()} MOD_XML += """ shutdown the interpreter by producing a quit element isinstance(elmt, QuitElement) the quit element """ def act_mirror(inputs) : return deepcopy(inputs) MOD_XML += """ the mirror operation (copy input on output) """ def act_start_plan(inputs): """produce a start plan element, with a CommandElement as context if necessary """ cmd = ICommand(inputs['cmd']) start_plan = StartPlanElement(recipe=cmd.args[0]) # *60 since delay on command is in minutes, but in seconds on the start-plan element start_plan.delay = cmd.args[1] * 60 if len(cmd.args) > 2: subcmd = CommandElement(name=cmd.args[2]) subcmd.args = cmd.args[3:] subcmd.from_msg = cmd.from_msg start_plan.context = [subcmd] return {'startplan' : start_plan} MOD_XML += """ %s ICommand(elmt) the command describing the plan to execute isinstance(elmt, StartPlanElement) the start-plan element """ % act_start_plan.__doc__ def act_reschedule_plan(inputs): """reschedule a plan in a given delay """ cmd = ICommand(inputs['cmd']) orig_start_plan = inputs['startplan'] delay = cmd.args[1] # *60 since delay on command is in minutes, but in seconds on the start-plan element int_delay = int(delay) * 60 start_plan = StartPlanElement(recipe=orig_start_plan.recipe) cancelled_start_plan = orig_start_plan.cancel(start_plan) if delay[0] in ('+', '-'): start_plan.delay = cancelled_start_plan.delay + int_delay else: start_plan.delay = int_delay start_plan.context = cancelled_start_plan.context return {'startplan' : start_plan} MOD_XML += """ %s ICommand(elmt) the command describing the plan to reschedule (ie id + delay) IStartPlan(elmt).eid == ICommand(cmd).args[0] the start-plan to reschedule isinstance(elmt, StartPlanElement) a new rescheduled start-plan element """ % act_reschedule_plan.__doc__ def act_command(inputs): """Execute an arbitrary system command""" command = IData(inputs['input']).data if system(command): msg = 'error while execute command %r' % command error = create_error(msg=msg, type='os') return {'error': error} return {} MOD_XML += """ %s IData(elmt) the command to execute """ % act_command.__doc__ def act_read_raw_file(inputs): """read a file from the file system""" # create document with root outputs o = {} # read data openable = IOpen(inputs['to-read']) f = openable.open() data = f.read() f.close() o['output'] = elmt = FileElement() elmt.address = openable.original.address elmt.data = data elmt.mode = 'w' return o MOD_XML += """ read a raw file url locating the file to read IOpen(elmt) xml element containing the raw file in its data child IFile(elmt) elmt.address """ def act_write_raw_file(inputs) : """write a file to the file system""" rawf = IFile(inputs['input']) proto = rawf.protocol() if proto != 'file' : msg = 'can not write %s, handle only the file protocol, not %s' % ( rawf.address, proto) return {'error': create_error('not writeable', msg)} data = rawf.data if rawf.encoding and isinstance(data, unicode): data = data.encode(rawf.encoding) f = open(rawf.path(), rawf.mode or 'w') f.write(data) f.close() return {} MOD_XML += """ write a raw file url locating the file to write IFile(elmt) elmt.address elmt.data """ def act_grep_file(inputs) : """search for a pattern in a file, as grep would do""" f = IOpen(inputs['to-read']).open() rgx = re.compile(IData(inputs['regexp']).data) # FIXME: put this in an adapter or so ? if hasattr(f, 'xreadlines'): lines = f.xreadlines elif hasattr(f, 'readlines'): lines = f.readlines else: lines = f.read().splitlines match = [] for line in lines(): if rgx.search(line): match.append(line.strip()) result = DataElement() if match: result.data = '\n'.join(match) else: result.data = 'No matching lines' return {'result': result} MOD_XML += """ search for a pattern in a file as grep openable object locating the file to search IOpen(elmt) the regular expression to search IData(elmt) a data element containing matching lines IData(elmt) """ # FIXME: actions below are not backported ! ## # Read an XML file ############################################################# ## def read_xml_file_f(inputs): ## outputs = {} ## url = inputs['to-read'].firstChild.data ## ## PrettyPrint(args['to-read']) ## ## print 'URL: ',url ## proxy = inputs['proxy'] ## if proxy: ## from Http import export_http_proxy ## export_http_proxy(proxy) ## try: ## # read data ## # FIXME: rajouter Proxies et URLOpener ## try: ## docnode = read_domtree_from_uri(url).documentElement ## outputs['output'] = docnode ## except Exception,e: ## import traceback ## traceback.print_exc() ## f = urlopen(normalize_url(url)[0]) ## data = f.read() ## f.close() ## # try correcting stuff ## if data[:5] == '') ## data = '%s encoding="utf-8" %s'% (data[:p], data[p:]) ## else: ## raise e ## else : ## data = '\n%s'%( ## data, get_encoding()) ## docnode = read_domtree_from_string(data).documentElement ## outputs['output'] = docnode ## except IOError, e: ## import traceback ## traceback.print_exc() ## outputs['error'] = [error(doc, 'No such url %s'%url)] ## return outputs ## """ ## read a valid xml file ## ## url locating the file to read ## url/text() ## ## ## proxy element, for page requests ## proxy[@type="http"] ## ## ## ## xml element which represents the readen file (it's root element depends on the ## file you have just read!) ## ## ## ## """ ## # Write an XML file ############################################################ ## def write_xml_file_f(inputs) : ## # get the url where the file will be written ## url = inputs['url'].firstChild.data ## url, url_tuple = normalize_url(url) ## # get the data to be written in the file ## data = inputs["to-write"] ## # get the encoding of the file (optional, default is UTF-8) ## encoding = inputs['encoding'] ## if encoding is None: ## encoding = get_encoding() ## else: ## encoding = encoding.firstChild.data ## o = {} ## # # get the doctype of the file (optional) ## # try: ## # systemId = select_one(args,'xml-file/doctype/systemid/text()').data ## # except SelectException: ## # systemId = "" ## # try: ## # publicId = select_one(args,'xml-file/doctype/publicid/text()').data ## # except SelectException: ## # publicId = "" ## # if systemId=="": ## # # publicId can't be defined alone (cf. XML Spec) ## # publicId = "" ## # # Builds a doctype for the file ## # docType = implementation.createDocumentType(data.localName, ## # publicId, systemId) ## # # Builds a document for the file ## # document = implementation.createDocument('', '', docType) ## # # Adds the data to be saved as child of this document ## # document.appendChild(document.importNode(data,1)) ## # Opens the file and saves the document ## if url_tuple[0] == 'file' : ## write_domtree_to_file(data, url_tuple[2], encoding) ## else : ## o['error'] = [error(doc, 'Can\'t write file at url %s'%url)] ## return o ## MOD_XML += """ ## write an xml file ## ## url[text()] ## url locating the file to write ## ## ## encoding/text() ## encoding to use in the file ## ## ## xml element which represents the file to write ## ## """ ## # Write an HTML file ########################################################### ## def write_html_file_f(inputs) : ## from xml.dom.ext.XHtml2HtmlPrinter import XHtml2HtmlPrintVisitor ## from xml.dom.ext.Printer import PrintWalker ## filenode = inputs['input'] ## # get the url where the file will be written ## url = select_one(filenode,'url/text()').data ## url, url_tuple = normalize_url(url) ## # get the data to be written in the file ## dataNodes = select_many(filenode,'data/*') ## # get the encoding of the file (optional, default is UTF-8) ## try: ## encoding = select_one(filenode,'encoding/text()').data ## except SelectException: ## encoding = get_encoding() ## o = {} ## # Builds a doctype for the file (neither systemId nor publicId is ## # necessary, the printer will insert default ones) ## ##docType = implementation.createDocumentType('xhtml','', '') ## # Opens the file and saves the document ## if url_tuple[0] == 'file' : ## f = open(url_tuple[2], 'w') ## visitor = XHtml2HtmlPrintVisitor(f, encoding, ' ') ## for dataNode in dataNodes : ## # Builds a document, ## doc = Document() ## doc.appendChild(doc.createElementNS(NO_NS, 'xhtml')) ## # then merge the data. ## doc.documentElement.appendChild(doc.importNode(dataNode,1)) ## PrintWalker(visitor, dataNode).run() ## f.write('\n') ## f.close() ## else : ## o['error'] = [error(doc, 'Can\'t write file at url %s'%url)] ## return o ## """ ## write an html file ## html-file/url/text() ## html-file node contain url locating the file to write ## and the data to write in the file ## html-file/data/* ## ## ## """ ## # Search a match to regular expressions in an element ########################## ## REGEXER_ERR_MSG = """Could not find string to match, check that you do ## with ## your text string and mind ## the baz/foo/bar and foo/bar difference.""" ## def regexer_f(inputs) : ## # init ## regexp_set = inputs['input'] ## regexp_target = inputs["regexp-target"] ## o = {} ## regexp_result_node = False ## # Let's get the string to test... ## try: ## path = regexp_set.getAttributeNS(NO_NS, 'match') ## st = select_one(regexp_target,path).data ## except SelectException: ## # error = doc.createElementNS(AL_NS,'al:error') ## # error.appendChild(doc.createTextNode(REGEXER_ERR_MSG)) ## # o['error'] = [error] ## o['error'] = error(doc, REGEXER_ERR_MSG) ## return o ## # Apply each rule until we find a match. ## for rule_node in select_many(regexp_set,'rule') : ## regexp = rule_node.firstChild.data ## if rule_node.getAttributeNS(NO_NS, 'ignore-case') == 'yes' : ## m = re.compile(regexp, re.I).match(st) ## else : ## m = re.compile(regexp).match(st) ## if m : ## regexp_result_node = doc.createElementNS(NO_NS, 'regexp-result') ## regexp_result_node.setAttributeNS('','rule-id', ## rule_node.getAttributeNS(NO_NS, 'id')) ## o['output'] = regexp_result_node ## for name,value in m.groupdict().items() : ## group_node = doc.createElementNS(NO_NS, 'group') ## group_node.setAttributeNS(NO_NS, 'name',name) ## value_node = doc.createTextNode(value) ## group_node.appendChild(value_node) ## regexp_result_node.appendChild(group_node) ## break ## # not match found: error ## if not regexp_result_node : ## error_node = error(doc, 'No rule matched.') ## map(error_node.appendChild, inputs.values()) ## o['error'] = [error_node] ## return o ## """ ## regular expression matcher ## regexp-set[@match and rule/text()] ## regular expression ## ## ## string to process ## ## regexp-result ## result structure ## ## ## """ ## # Send a message ############################################################### ## def send_msg_f(inputs) : ## outputs = {} ## msg = inputs['message'] ## #print 'Sending message',select_one(msg,'@eid/text()').data, \ ## # select_one(msg,'header/to/text()').data ## msg.removeAttributeNS(AL_NS,'eid') ## #msg.removeAttributeNS(AL_NS,'from_plan') ## #msg.removeAttributeNS(AL_NS,'from_step') ## # FIXME : many listen-on nodes are authorized, so which one to take? ## listen = inputs['narval'][0] ## headers = select_one(msg,'header') ## from_host = listen.getAttributeNS(NO_NS,'host') ## from_port = listen.getAttributeNS(NO_NS,'port') ## host, port = select_one(headers,'to/text()').data.split(':') ## # timestamp = select_one(msg,'@timestamp/text()').data ## timestamp = msg.getAttributeNS(NO_NS,'timestamp') ## msg.removeAttributeNS(NO_NS,'timestamp') ## from_node = msg.ownerDocument.createElementNS(AL_NS, 'al:from') ## time_node = msg.ownerDocument.createElementNS(AL_NS, 'al:timestamp') ## headers.appendChild(from_node) ## headers.appendChild(time_node) ## from_node.appendChild(doc.createTextNode(from_host+':'+from_port)) ## time_node.appendChild(doc.createTextNode(timestamp)) ## from narval.communication import RPCFactory ## try: ## print "HOST=", host, port ## uri = RPCFactory.encode_uri('xmlrpc', host, port) ## other = RPCFactory.create_proxy(uri) ## other.postMessage(domtree_to_string(msg)) ## except: ## err = error(doc, 'unreachable host') ## err.setAttributeNS(NO_NS,'address',str(host+':'+port)) ## outputs['error'] = err ## raise ## return outputs ## """ ## send a message to another Narval, need use of xmlrpc ## ## message/@type='outgoing' ## message to send ## ## ## al:listen-on ## structure which describes remote Narval ## ## ## """ ## def download_dataunit_f(inputs): ## # FIXME: optional myhtmllib (->word coef) ## myhtmllib = load_extension_module('html', 'myhtmllib') ## output = {'output': []} ## list_error=[] ## im_filter = inputs['filter'] or 1 ## encod = get_encoding() ## for s_u in inputs['input']: ## try: ## data_n = select_one(s_u, 'data') ## except Exception,e: ## data_n = None ## data = '' ## else: ## try: ## txt_n = select_one(data_n, 'text()') ## # FIXME: isn't it yet unicode ?? ## data = unicode(txt_n.data, encod) ## data_n.removeChild(txt_n) ## except Exception,e: ## import traceback ## traceback.print_exc() ## data = '' ## url = select_one(s_u, 'url/text()').data ## # image filter ## if im_filter: ## ext_list = re.split('[.*]', url[-5:]) ## end = len(ext_list)-1 ## if ext_list[end] == 'jpg' or \ ## ext_list[end] == 'png' or \ ## ext_list[end] == 'gif': ## print 'url is a picture' ## continue ## try: ## txt_data = myhtmllib.parse_html_url(url) ## from cStringIO import StringIO ## data = StringIO('%s%s' % (txt_data.getvalue(), data)) ## except : ## # 'Error, Pass this url.' ## print 'ERROR in url', url ## import traceback ## traceback.print_exc() ## list_error.append(s_u) ## continue ## if data_n is None: ## data_n = doc.createElementNS(NO_NS, 'data') ## s_u.appendChild(data_n) ## data_txt = doc.createTextNode(unicode(data.getvalue(), get_encoding())) ## data_n.appendChild(data_txt) ## #PrettyPrint(s_u) ## output['output'].append(s_u) ## # FIXME error handling in different ns ? ## return output ## """ ## take a data-unit element with an url child, return the data-unit ## element with its data child concatened with the content downloaded from the url ## ## ## data-unit[url/text()] ## ## data-unit element with data to download from its url child ## ## ## ## image-filter ## ## filters images if this element is found ## ## ## ## data-unit ## completed data-unit ## ## ## """ MOD_XML += ""