#! /bin/sed -f # # nex2epf.sed -- Sed script to convert NEXUS to extended PHYLIP format. Tested # with GNU sed 4.1.4. # # (C) 2007 by # Markus Goeker (markus.goeker@uni-tuebingen.de) # # This program is distributed under the terms of the Gnu Public License V2. # For further information, see http://www.gnu.org/licenses/gpl.html # # If you happen to use this script in a publication, please cite the web page # at http://www.goeker.org/scripts/ # # Known problems: The script is not at all NEXUS-compatible, but should work # with, e.g., non-interleaved PAUP* output except the sequences labels are # enclosed in "'" and contain a "]" after a "[" character. In that case, the # brackets and anything between is removed from the label. # # Remove comments and replace tabs s/\[[^]]*\]//g s/^[^]']*\]// s/\[[^]']*$// s/\t\+/ /g # Remove empty lines /^ *$/ d # Print dimensions /^ *[Dd][Ii][Mm][Ee][Nn][Ss][Ii][Oo][Nn][Ss] \+/ { s/[^0-9 ]//g s/ \+/ /g p d } # Print data /^ *[Mm][Aa][Tt][Rr][Ii][Xx] *$/, /^ *; *$/ { /^ *\([Mm][Aa][Tt][Rr][Ii][Xx]\|;\) *$/ ! { /'/ { h s/.\+'// T error x s/''//g s/\(^ *'\)\(.\+\)\('[^']\+$\)/\2/ T error s/[^A-Za-z0-9_.]/_/g G s/\n// } p } } d :error s/.*/Unbalanced single quotes./ w /dev/stderr Q 1