Artificial truth

The more you see, the less you believe.

[archives] [latest] | [homepage] | [atom/rss/twitter]

PEiD to Yara, now with Python3!
Sun 09 February 2014 — download

I was curious about the format used by PEiD, and how difficult it would be to make an equivalent for GNU/Linux. It seems that someone did a script. But unfortunately, it does not work well with Python3, and I was not happy with some parts of the code. Since it's GPL, I made an updated version.


  • Python3 support
  • Able to merge several userdb.txt without duplicates
  • Less regexp, less complexity


You can also download it here, along with my database.


Created by Matthew Richard on 2010-03-12.
Ported to py3 by Julien (jvoisin) Voisin on feb2014
Copyright (c) 2010-2014. All rights reserved.

import os
import re
import argparse
import collections

def main():
    parser = argparse.ArgumentParser(description='PEiD to yara rules converter')
    parser.add_argument('-n', '--no-ep', dest='no_ep', action='store_true',
        default=False, help='no entrypoint restriction')
    parser.add_argument('files', metavar='files', type=str, nargs='+',
        help='scanned filenames')
    parser.add_argument('-o', '--output-file', action='store', dest='outfile',
        help='output filename')

    opts = parser.parse_args()

    if opts.outfile is None:
        parser.error('You must specify an output filename!\n')
    elif opts.files is None:
        parser.error('You must supply at least one filename!\n')
        for fin in opts.files:
            if not os.path.isfile(fin):
                parser.error('%s does not exist' % fin)

    # yara rule template from which rules will be created
    yara_rule = '''
rule %s

    rules = collections.defaultdict(lambda: set(), {})

    #  read the PEiD signature files
    data = ' '.join([open(f, 'r').read() for f in opts.files])

    #  every signature takes the form of
    #  [signature_name]
    #  signature = hex signature
    #  ep_only = (true|false)
    signature = re.compile(r'''
        \[\d*(.+?)\]\r?\n                   # rule name (Can not start with a number)
        signature\ =\ (?:\?\?\ )*(.+?)\r?\n # signature (Can not start with '?? '*)
        ep_only\ =\ (true|false)            # ep_only
        ''', re.MULTILINE | re.DOTALL | re.VERBOSE)

    # rule name has the same constraints as a C variable name 
    rules_cpt = 0
    name_filter = re.compile(r'(\W)')
    for match in signature.finditer(data):
        name = name_filter.sub('_',
        rules_cpt += 1
    print('[+] Found %d signatures in PEiD input file' % rules_cpt)

    output = ''
    for rule in list(rules.keys()):
        detects = ''
        conds = '\t'
        counter = 0
        for (detect, use_ep) in rules[rule]:
            # create each new rule using a unique numeric value
            # to allow for multiple criteria and no collisions
            detects += '\t$a%d = { %s }\n' % (counter, detect)

            if counter > 0:
                conds += ' or '

            # if the rule specifies it should be at EP we add
            # the yara specifier 'at entrypoint'
            conds += '$a%d' % counter
            if use_ep == 'true' and opts.no_ep is False:
                conds += ' at entrypoint'
            counter += 1

        # add the rule to the output
        output += yara_rule % (rule, detects, conds)

    # could be written to an output file
    with open(opts.outfile, 'w') as fout:

    print('[+] Wrote %d rules to %s' % (len(rules), opts.outfile))

if __name__ == '__main__':