Reading Moses search graph using Python

I perfer a search graph than word graph so I wrote this:

import re
import sys

PAT_INIT = re.compile('^(?P\d+) hyp=(?P\d+) stack=(?P\d+) forward=(?P-?\d+\.?\d*) fscore=(?P-?\d+\.?\d*)$')
PAT_REG = re.compile('^(?P\d+) hyp=(?P\d+) stack=(?P\d+) back=(?P\d+) score=(?P-?\d+\.?\d*) transition=(?P-?\d+\.?\d*) forward=(?P-?\d+\.?\d*) fscore=(?P-?\d+\.?\d*) covered=(?P\d+)-(?P\d+) out=(?P.+)$')
PAT_RECOM = re.compile('^(?P\d+) hyp=(?P\d+) stack=(?P\d+) back=(?P\d+) score=(?P-?\d+\.?\d*) transition=(?P-?\d+\.?\d*) recombined=(?P\d+) forward=(?P-?\d+\.?\d*) fscore=(?P-?\d+\.?\d*) covered=(?P\d+)-(?P\d+) out=(?P.+)$')

def format_hypo(parse_hypo):
    def new_parse_hypo(line):
        ret = parse_hypo(line)
        new_ret = {}
        new_ret['sent_id'] = int(ret['sent_id'])
        new_ret['hyp'] = int(ret['hyp'])
        new_ret['stack'] = int(ret['stack'])
        new_ret['forward'] = float(ret['forward'])
        new_ret['fscore'] = float(ret['fscore'])
        if ret.has_key('back'):
            new_ret['back'] = int(ret['back'])
        if ret.has_key('score'):
            new_ret['score'] = float(ret['score'])
        if ret.has_key('transition'):
            new_ret['transition'] = float(ret['transition'])
        if ret.has_key('recombined'):
            new_ret['recombined'] = int(ret['recombined'])
        if ret.has_key('covered_start'):
            new_ret['covered'] = (int(ret['covered_start']), int(ret['covered_end']))
        if ret.has_key('out'):
            new_ret['out'] = ret['out']
        return new_ret
    return new_parse_hypo

@format_hypo
def parse_hypo(line):
    line = line.strip()
    m = PAT_INIT.match(line)
    if m:
        return m.groupdict()

    m = PAT_REG.match(line)
    if m:
        return m.groupdict()

    m = PAT_RECOM.match(line)
    if m:
        return m.groupdict()

    raise RuntimeError

def read_search_graph(source_path, target_path, graph_path):
    source_file = open(source_path)
    target_file = open(target_path)
    graph_file = open(graph_path)

    hypos = []
    hypo = None

    for i, source in enumerate(source_file):
        target = target_file.readline()
        if target == "" or target == None:
            raise RuntimeError
        while True:
            if hypo != None:
                if hypo['sent_id'] > i:
                    break
                hypos.append(hypo)
                hypo = None
            else:
                line = graph_file.readline()
                if line == "":
                    break
                hypo = parse_hypo(line)
        yield source, target, hypos
        hypos = []

    source_file.close()
    target_file.close()
    graph_file.close()


def usage():
    print >>sys.stderr, "Usage: python", __file__, "  "
    sys.exit(1)

def main():
    params = sys.argv[1:]
    if len(params) != 3:
        usage()
    source, target, graph = params
    for sent in read_search_graph(source, target, graph):
        print sent

if __name__ == '__main__':
    main()

ใส่ความเห็น

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / เปลี่ยนแปลง )

Twitter picture

You are commenting using your Twitter account. Log Out / เปลี่ยนแปลง )

Facebook photo

You are commenting using your Facebook account. Log Out / เปลี่ยนแปลง )

Google+ photo

You are commenting using your Google+ account. Log Out / เปลี่ยนแปลง )

Connecting to %s