Module autostreamtree.params
Expand source code
import sys
import getopt
# Object to parse command-line arguments
class parseArgs():
def __init__(self):
# Define options
try:
options, _ = getopt.getopt(
sys.argv[1:], 'hs:i:r:p:d:a:lw:o:gP:L:Scn:G:v:C:O:',
["shp=", "help", "input=", "run=", "pop=", "popmap=", "dist=",
"agg_method=", "het", "genmat=", "snp", "snps", "msat",
"msats", "log", "and_log", "iterative", "weight=", "out=",
"method=", "plots", "plot", "perm=", "phased", "median",
"diploid", "geopop", "geopops", "global_het", "haploid",
"loc_agg=", "pop_agg=", "sdist_agg=", "clusterpop",
"epsilon=", "min_samples=", "sclusterpop", "network=",
"overwrite", "reachid_col=", "length_col=", "coercemat",
"locmatdir=", "vcf=", "concat=", "edge_list=", "gdf_out=",
"seed="]
)
except getopt.GetoptError as err:
print(err)
self.display_help(
"\nExiting because getopt returned non-zero exit status.")
# Default values for params
# Input params
self.shapefile = None
self.locmatdir = None
self.geodb = None
self.vcf = None
self.concat = "none"
self.run = "ALL"
self.network = None
self.pop = False
self.overwrite = False
self.geopop = False
self.clusterpop = False
self.sclusterpop = False
self.output_driver = "GPKG"
self.dist = "LINFST"
self.het = False
self.genmat = None
self.log = False
self.and_log = False
self.iterative = True
self.weight = "CSE67"
self.permutations = 1000
self.coercemat = False
self.method = "PEARSON"
self.edge_list = None
self.plots = False
self.out = "out"
self.median = False
self.ploidy = 2
self.global_het = False
self.loc_agg = "ARITH"
self.pop_agg = "ARITH"
self.sdist_agg = "ARITH"
self.seed = None
self.reachid_col = "HYRIV_ID"
self.length_col = "LENGTH_KM"
# dbscan Options
self.min_samples = 1
self.epsilon = 20
# First pass to see if help menu was called
for o, a in options:
if o in ("-h", "-help", "--help"):
self.display_help("Exiting because help menu was called.")
# Second pass to set all args.
for opt, arg in options:
arg = arg.strip()
opt = opt.replace("-", "")
if opt in ('s', 'shp'):
self.shapefile = arg
elif opt in ('h', 'help'):
pass
elif opt in ('i', 'coords'):
self.geodb = arg
elif opt in ("C", "concat"):
arg_lower = str(arg).lower()
if arg_lower in ("none", "all", "loc"):
self.concat = arg_lower
else:
self.display_help(f"Invalid option {arg_lower} \
for option <--concat>")
elif opt in ('v', 'vcf'):
self.vcf = arg
elif opt == "seed":
self.seed = int(arg)
elif opt in ('r', 'run'):
self.run = arg.upper()
valid_runs = ["ALL", "GENDIST", "IBD", "STREAMDIST",
"STREAMTREE", "DISTANCES", "RUNLOCI"]
if self.run not in valid_runs:
self.display_help(f"Invalid option {self.run} \
for option <-r/--run>")
elif opt in ('p', 'pop', "popmap"):
self.pop = arg
elif opt in ("g", "geopop", "geopops"):
self.geopop = True
elif opt in ('d', 'dist'):
self.dist = arg.upper()
valid_dists = ["PDIST", "FST",
"LINFST", "JOST", "NEI83",
"CHORD", "GST", "HARMD"]
if self.dist not in valid_dists:
self.display_help(f"Invalid option {self.dist} for \
option <-d/--dist>")
elif opt == "het":
self.het = True
elif opt == "locmatdir":
self.locmatdir = arg
elif opt == "reachid_col":
self.reachid_col = arg
elif opt == "length_col":
self.length_col = arg
elif opt in ("genmat", "G"):
self.genmat = arg
elif opt in ('l', 'log'):
self.log = True
elif opt == "and_log":
self.and_log = True
elif opt == "edge_list":
self.edge_list = arg
elif opt in ("clusterpop", "c"):
self.clusterpop = True
elif opt == "epsilon":
self.epsilon = float(arg)
elif opt == "min_samples":
self.min_samples = int(arg)
elif opt in ('w', 'weight'):
self.weight = arg.upper()
weight_mappings = {
"FM": "FM67", "1/D": "FM67",
"BEYER": "BEYER74", "1/D^2": "BEYER74",
"1": "CSE67", "CSE": "CSE67"
}
self.weight = weight_mappings.get(self.weight, self.weight)
valid_weights = ["CSE67", "FM67", "BEYER74"]
if self.weight not in valid_weights:
self.display_help(f"Invalid option {self.weight} \
for option <-w/--weight>")
elif opt in ('o', 'out'):
self.out = arg
elif opt == "stream_fit":
self.stream_fit = True
elif opt == "overwrite":
self.overwrite = True
elif opt == "perm":
self.permutations = int(arg)
elif opt == "method":
print("Sorry: Option --method is not yet implemented.")
sys.exit(0)
elif opt in ("plot", "plots"):
self.plots = True
elif opt == "phased":
self.phased = True
print("WARNING: Option <--snp> not yet implemented")
elif opt == "median":
self.median = True
elif opt == "diploid":
self.ploidy = 2
elif opt == "haploid":
self.ploidy = 1
elif opt in ("O", "gdf_out"):
arg_upper = str(arg).upper()
if arg_upper not in ["GPKG", "SHP", "GDB"]:
self.display_help(f"Invalid option {arg_upper} for \
option <--gdf_out>")
self.output_driver = arg_upper
elif opt == "global_het":
self.global_het = True
elif opt == "coercemat":
self.coercemat = True
elif opt in ("network", "n"):
self.network = arg
elif opt in ("pop_agg", "P"):
self.pop_agg = arg.upper()
valid_agg_methods = ["HARM", "ADJHARM", "ARITH",
"GEOM", "MEDIAN", "MAX", "MIN"]
if self.pop_agg not in valid_agg_methods:
self.display_help(f"Invalid option {self.pop_agg} for \
option <--pop_agg>")
elif opt in ("loc_agg", "L"):
self.loc_agg = arg.upper()
valid_agg_methods = ["HARM", "ADJHARM", "ARITH", "GEOM",
"MEDIAN", "MAX", "MIN"]
if self.loc_agg not in valid_agg_methods:
self.display_help(f"Invalid option {self.loc_agg} for \
option <--loc_agg>")
else:
assert False, f"Unhandled option {opt}"
if not self.geodb:
self.display_help("No input provided <-i/--coords>")
if not self.shapefile and self.run != "GENDIST":
self.display_help("No shapefile provided <-s/--shp>")
if self.ploidy > 2 or self.ploidy < 1:
self.display_help(
f"Ploidy of {self.ploidy} not currently \
allowable. Please choose 1 (haploid) or 2 (diploid)")
# sanity checks
if self.dist not in ["PDIST"]:
if not self.pop and not self.geopop:
self.display_help(
f"ERROR: Distance metric {self.dist} not possible \
without --pop or --geopop data."
)
def display_help(self, message=None):
if message is not None:
print("\n", message)
print("\nautostreamtree\n")
print("Author: Tyler K Chafin, Biomathematics and Statistics Scotland")
print("Description: Methods for analysing genetic distances in \
networks.")
print(
"\nMandatory arguments:\n"
" -s, --shp : Path to shapefile containing cleaned, \
contiguous stream reaches\n"
" (can also support geodatabase or GPKG \
files)\n"
" -i, --input : Input .tsv file containing sample \
coordinates\n"
" -v, --vcf : Input VCF file containing genotypes\n\n"
"General options:\n"
" -o, --out : Output prefix [default=\"out\"]\n"
" -O, --gdf_out : Output driver for annotated geodataframe \
(options \"SHP\", \"GPKG\", \"GDB\")\n"
" -C, --concat : Concatenate all SNPs (\"all\"), by locus \
(\"loc\"), or not at all (\"none\")\n"
" -n, --network : Provide an already optimized network \
output from a previous run\n"
" --overwrite : Overwrite an input network (Only relevant \
with --network)\n"
" -h, --help : Displays help menu\n"
" -r, --run : Run which steps? Options:\n"
" ALL : Run all steps\n"
" GENDIST : Only calculate genetic distance matrix\n"
" STREAMDIST : Only compute pairwise stream distances\n"
" DISTANCES : Only compute GENDIST + STREAMDIST\n"
" IBD : GENDIST + STREAMDIST + Mantel test\n"
" STREAMTREE : GENDIST + STREAMDIST + fit StreamTree \
model\n"
" RUNLOCI : Run STREAMTREE fitting on each locus\n"
" -p, --pop : Pool individuals based on an input \
population map tsv file\n"
" NOTE: The location will be taken as the centroid among \
individual samples\n"
" -g, --geopop : Pool individuals having identical \
coordinates\n"
" -c, --clusterpop: Use DBSCAN algorithm to automatically \
cluster populations\n"
" --reachid_col : Attribute name representing primary key in \
shapefile [default=\"HYRIV_ID\"]\n"
" --length_col : Attribute name giving length in kilometers \
[default=\"LENGTH_KM\"]\n\n"
" --seed : Seed for RNG\n\n"
"Genetic distance options:\n"
" -d, --dist : Use which metric of distance? Options:\n"
" Individual-based:\n"
" PDIST : Uncorrected p-distances [# Differences / \
Length]\n"
" Frequency models (when using --pop):\n"
" FST : Weir and Cockerham's Fst formulation \
(=THETAst)\n"
" LINFST : [default] Rousset's (1997) Fst \
[=Fst/(1-Fst)]\n"
" JOST : Global estimate Jost's (2008) D\n"
" HARMD : Harmonic mean of Jost's D per-locus\n"
" CHORD : Cavalli-Sforza and Edwards (1967) chord \
distance\n"
" --NOTE: Individual-based metrics can also be computed \
for\n"
" populations. You can set how these are \
aggregated w/ --pop_agg\n"
" --NOTE: Multiple loci for PDIST\n"
" will be reported using the method defined in \
--loc_agg\n"
" -G, --genmat : Skip calculation and use the provided \
labeled .tsv matrix\n"
" --coercemat : [Boolean] Coerce negative values in input \
matrix to zero\n"
" --het : [Boolean] Count partial differences [e.g. \
ind1=T, ind2=W]\n"
" --global_het : Estimate Ht using global frequencies \
(default is averaged over pops)\n\n"
"DBSCAN options (only when --clusterpop):\n"
" --min_samples : Minimum samples per cluster [default=1]\n"
" --epsilon : Maximum distance (in km) within a cluster \
[default=20]\n\n"
"Aggregation options:\n"
" -P, --pop_agg : Define aggregator function for certain \
genetic distances in pop samples\n"
" -L, --loc_agg : Define aggregator function for aggregating \
locus-wise distances\n"
" All of these can take the following options:\n"
" ARITH : [default] Use arithmetic mean\n"
" MEDIAN : Use median distance\n"
" HARM : Use harmonic mean\n"
" ADJHARM : Adjusted harmonic mean (see docs)\n"
" GEOM : Use geometric mean\n"
" MIN : Use minimum distance\n"
" MAX : Use maximum distance\n\n"
"IBD options:\n"
" --perm : Number of permutations for mantel test \
[def=1000]\n"
" --and_log : Also perform IBD steps with log geographic \
distances\n\n"
"StreamTree options (see Kalinowski et al. 2008) :\n"
" -w, --weight : Desired weighting for least-squares \
fitting:\n"
" Options:\n"
" FM67 : Fitch and Margoliash (1967) [w = 1/D^2]\n"
" BEYER74 : Beyer et al. (1974) weights [w = 1/D]\n"
" CSE67 : [default] Cavalli-Sforza & Edwards (1967) \
[w = 1]\n\n"
)
print()
sys.exit()
Classes
class parseArgs
-
Expand source code
class parseArgs(): def __init__(self): # Define options try: options, _ = getopt.getopt( sys.argv[1:], 'hs:i:r:p:d:a:lw:o:gP:L:Scn:G:v:C:O:', ["shp=", "help", "input=", "run=", "pop=", "popmap=", "dist=", "agg_method=", "het", "genmat=", "snp", "snps", "msat", "msats", "log", "and_log", "iterative", "weight=", "out=", "method=", "plots", "plot", "perm=", "phased", "median", "diploid", "geopop", "geopops", "global_het", "haploid", "loc_agg=", "pop_agg=", "sdist_agg=", "clusterpop", "epsilon=", "min_samples=", "sclusterpop", "network=", "overwrite", "reachid_col=", "length_col=", "coercemat", "locmatdir=", "vcf=", "concat=", "edge_list=", "gdf_out=", "seed="] ) except getopt.GetoptError as err: print(err) self.display_help( "\nExiting because getopt returned non-zero exit status.") # Default values for params # Input params self.shapefile = None self.locmatdir = None self.geodb = None self.vcf = None self.concat = "none" self.run = "ALL" self.network = None self.pop = False self.overwrite = False self.geopop = False self.clusterpop = False self.sclusterpop = False self.output_driver = "GPKG" self.dist = "LINFST" self.het = False self.genmat = None self.log = False self.and_log = False self.iterative = True self.weight = "CSE67" self.permutations = 1000 self.coercemat = False self.method = "PEARSON" self.edge_list = None self.plots = False self.out = "out" self.median = False self.ploidy = 2 self.global_het = False self.loc_agg = "ARITH" self.pop_agg = "ARITH" self.sdist_agg = "ARITH" self.seed = None self.reachid_col = "HYRIV_ID" self.length_col = "LENGTH_KM" # dbscan Options self.min_samples = 1 self.epsilon = 20 # First pass to see if help menu was called for o, a in options: if o in ("-h", "-help", "--help"): self.display_help("Exiting because help menu was called.") # Second pass to set all args. for opt, arg in options: arg = arg.strip() opt = opt.replace("-", "") if opt in ('s', 'shp'): self.shapefile = arg elif opt in ('h', 'help'): pass elif opt in ('i', 'coords'): self.geodb = arg elif opt in ("C", "concat"): arg_lower = str(arg).lower() if arg_lower in ("none", "all", "loc"): self.concat = arg_lower else: self.display_help(f"Invalid option {arg_lower} \ for option <--concat>") elif opt in ('v', 'vcf'): self.vcf = arg elif opt == "seed": self.seed = int(arg) elif opt in ('r', 'run'): self.run = arg.upper() valid_runs = ["ALL", "GENDIST", "IBD", "STREAMDIST", "STREAMTREE", "DISTANCES", "RUNLOCI"] if self.run not in valid_runs: self.display_help(f"Invalid option {self.run} \ for option <-r/--run>") elif opt in ('p', 'pop', "popmap"): self.pop = arg elif opt in ("g", "geopop", "geopops"): self.geopop = True elif opt in ('d', 'dist'): self.dist = arg.upper() valid_dists = ["PDIST", "FST", "LINFST", "JOST", "NEI83", "CHORD", "GST", "HARMD"] if self.dist not in valid_dists: self.display_help(f"Invalid option {self.dist} for \ option <-d/--dist>") elif opt == "het": self.het = True elif opt == "locmatdir": self.locmatdir = arg elif opt == "reachid_col": self.reachid_col = arg elif opt == "length_col": self.length_col = arg elif opt in ("genmat", "G"): self.genmat = arg elif opt in ('l', 'log'): self.log = True elif opt == "and_log": self.and_log = True elif opt == "edge_list": self.edge_list = arg elif opt in ("clusterpop", "c"): self.clusterpop = True elif opt == "epsilon": self.epsilon = float(arg) elif opt == "min_samples": self.min_samples = int(arg) elif opt in ('w', 'weight'): self.weight = arg.upper() weight_mappings = { "FM": "FM67", "1/D": "FM67", "BEYER": "BEYER74", "1/D^2": "BEYER74", "1": "CSE67", "CSE": "CSE67" } self.weight = weight_mappings.get(self.weight, self.weight) valid_weights = ["CSE67", "FM67", "BEYER74"] if self.weight not in valid_weights: self.display_help(f"Invalid option {self.weight} \ for option <-w/--weight>") elif opt in ('o', 'out'): self.out = arg elif opt == "stream_fit": self.stream_fit = True elif opt == "overwrite": self.overwrite = True elif opt == "perm": self.permutations = int(arg) elif opt == "method": print("Sorry: Option --method is not yet implemented.") sys.exit(0) elif opt in ("plot", "plots"): self.plots = True elif opt == "phased": self.phased = True print("WARNING: Option <--snp> not yet implemented") elif opt == "median": self.median = True elif opt == "diploid": self.ploidy = 2 elif opt == "haploid": self.ploidy = 1 elif opt in ("O", "gdf_out"): arg_upper = str(arg).upper() if arg_upper not in ["GPKG", "SHP", "GDB"]: self.display_help(f"Invalid option {arg_upper} for \ option <--gdf_out>") self.output_driver = arg_upper elif opt == "global_het": self.global_het = True elif opt == "coercemat": self.coercemat = True elif opt in ("network", "n"): self.network = arg elif opt in ("pop_agg", "P"): self.pop_agg = arg.upper() valid_agg_methods = ["HARM", "ADJHARM", "ARITH", "GEOM", "MEDIAN", "MAX", "MIN"] if self.pop_agg not in valid_agg_methods: self.display_help(f"Invalid option {self.pop_agg} for \ option <--pop_agg>") elif opt in ("loc_agg", "L"): self.loc_agg = arg.upper() valid_agg_methods = ["HARM", "ADJHARM", "ARITH", "GEOM", "MEDIAN", "MAX", "MIN"] if self.loc_agg not in valid_agg_methods: self.display_help(f"Invalid option {self.loc_agg} for \ option <--loc_agg>") else: assert False, f"Unhandled option {opt}" if not self.geodb: self.display_help("No input provided <-i/--coords>") if not self.shapefile and self.run != "GENDIST": self.display_help("No shapefile provided <-s/--shp>") if self.ploidy > 2 or self.ploidy < 1: self.display_help( f"Ploidy of {self.ploidy} not currently \ allowable. Please choose 1 (haploid) or 2 (diploid)") # sanity checks if self.dist not in ["PDIST"]: if not self.pop and not self.geopop: self.display_help( f"ERROR: Distance metric {self.dist} not possible \ without --pop or --geopop data." ) def display_help(self, message=None): if message is not None: print("\n", message) print("\nautostreamtree\n") print("Author: Tyler K Chafin, Biomathematics and Statistics Scotland") print("Description: Methods for analysing genetic distances in \ networks.") print( "\nMandatory arguments:\n" " -s, --shp : Path to shapefile containing cleaned, \ contiguous stream reaches\n" " (can also support geodatabase or GPKG \ files)\n" " -i, --input : Input .tsv file containing sample \ coordinates\n" " -v, --vcf : Input VCF file containing genotypes\n\n" "General options:\n" " -o, --out : Output prefix [default=\"out\"]\n" " -O, --gdf_out : Output driver for annotated geodataframe \ (options \"SHP\", \"GPKG\", \"GDB\")\n" " -C, --concat : Concatenate all SNPs (\"all\"), by locus \ (\"loc\"), or not at all (\"none\")\n" " -n, --network : Provide an already optimized network \ output from a previous run\n" " --overwrite : Overwrite an input network (Only relevant \ with --network)\n" " -h, --help : Displays help menu\n" " -r, --run : Run which steps? Options:\n" " ALL : Run all steps\n" " GENDIST : Only calculate genetic distance matrix\n" " STREAMDIST : Only compute pairwise stream distances\n" " DISTANCES : Only compute GENDIST + STREAMDIST\n" " IBD : GENDIST + STREAMDIST + Mantel test\n" " STREAMTREE : GENDIST + STREAMDIST + fit StreamTree \ model\n" " RUNLOCI : Run STREAMTREE fitting on each locus\n" " -p, --pop : Pool individuals based on an input \ population map tsv file\n" " NOTE: The location will be taken as the centroid among \ individual samples\n" " -g, --geopop : Pool individuals having identical \ coordinates\n" " -c, --clusterpop: Use DBSCAN algorithm to automatically \ cluster populations\n" " --reachid_col : Attribute name representing primary key in \ shapefile [default=\"HYRIV_ID\"]\n" " --length_col : Attribute name giving length in kilometers \ [default=\"LENGTH_KM\"]\n\n" " --seed : Seed for RNG\n\n" "Genetic distance options:\n" " -d, --dist : Use which metric of distance? Options:\n" " Individual-based:\n" " PDIST : Uncorrected p-distances [# Differences / \ Length]\n" " Frequency models (when using --pop):\n" " FST : Weir and Cockerham's Fst formulation \ (=THETAst)\n" " LINFST : [default] Rousset's (1997) Fst \ [=Fst/(1-Fst)]\n" " JOST : Global estimate Jost's (2008) D\n" " HARMD : Harmonic mean of Jost's D per-locus\n" " CHORD : Cavalli-Sforza and Edwards (1967) chord \ distance\n" " --NOTE: Individual-based metrics can also be computed \ for\n" " populations. You can set how these are \ aggregated w/ --pop_agg\n" " --NOTE: Multiple loci for PDIST\n" " will be reported using the method defined in \ --loc_agg\n" " -G, --genmat : Skip calculation and use the provided \ labeled .tsv matrix\n" " --coercemat : [Boolean] Coerce negative values in input \ matrix to zero\n" " --het : [Boolean] Count partial differences [e.g. \ ind1=T, ind2=W]\n" " --global_het : Estimate Ht using global frequencies \ (default is averaged over pops)\n\n" "DBSCAN options (only when --clusterpop):\n" " --min_samples : Minimum samples per cluster [default=1]\n" " --epsilon : Maximum distance (in km) within a cluster \ [default=20]\n\n" "Aggregation options:\n" " -P, --pop_agg : Define aggregator function for certain \ genetic distances in pop samples\n" " -L, --loc_agg : Define aggregator function for aggregating \ locus-wise distances\n" " All of these can take the following options:\n" " ARITH : [default] Use arithmetic mean\n" " MEDIAN : Use median distance\n" " HARM : Use harmonic mean\n" " ADJHARM : Adjusted harmonic mean (see docs)\n" " GEOM : Use geometric mean\n" " MIN : Use minimum distance\n" " MAX : Use maximum distance\n\n" "IBD options:\n" " --perm : Number of permutations for mantel test \ [def=1000]\n" " --and_log : Also perform IBD steps with log geographic \ distances\n\n" "StreamTree options (see Kalinowski et al. 2008) :\n" " -w, --weight : Desired weighting for least-squares \ fitting:\n" " Options:\n" " FM67 : Fitch and Margoliash (1967) [w = 1/D^2]\n" " BEYER74 : Beyer et al. (1974) weights [w = 1/D]\n" " CSE67 : [default] Cavalli-Sforza & Edwards (1967) \ [w = 1]\n\n" ) print() sys.exit()
Methods
def display_help(self, message=None)
-
Expand source code
def display_help(self, message=None): if message is not None: print("\n", message) print("\nautostreamtree\n") print("Author: Tyler K Chafin, Biomathematics and Statistics Scotland") print("Description: Methods for analysing genetic distances in \ networks.") print( "\nMandatory arguments:\n" " -s, --shp : Path to shapefile containing cleaned, \ contiguous stream reaches\n" " (can also support geodatabase or GPKG \ files)\n" " -i, --input : Input .tsv file containing sample \ coordinates\n" " -v, --vcf : Input VCF file containing genotypes\n\n" "General options:\n" " -o, --out : Output prefix [default=\"out\"]\n" " -O, --gdf_out : Output driver for annotated geodataframe \ (options \"SHP\", \"GPKG\", \"GDB\")\n" " -C, --concat : Concatenate all SNPs (\"all\"), by locus \ (\"loc\"), or not at all (\"none\")\n" " -n, --network : Provide an already optimized network \ output from a previous run\n" " --overwrite : Overwrite an input network (Only relevant \ with --network)\n" " -h, --help : Displays help menu\n" " -r, --run : Run which steps? Options:\n" " ALL : Run all steps\n" " GENDIST : Only calculate genetic distance matrix\n" " STREAMDIST : Only compute pairwise stream distances\n" " DISTANCES : Only compute GENDIST + STREAMDIST\n" " IBD : GENDIST + STREAMDIST + Mantel test\n" " STREAMTREE : GENDIST + STREAMDIST + fit StreamTree \ model\n" " RUNLOCI : Run STREAMTREE fitting on each locus\n" " -p, --pop : Pool individuals based on an input \ population map tsv file\n" " NOTE: The location will be taken as the centroid among \ individual samples\n" " -g, --geopop : Pool individuals having identical \ coordinates\n" " -c, --clusterpop: Use DBSCAN algorithm to automatically \ cluster populations\n" " --reachid_col : Attribute name representing primary key in \ shapefile [default=\"HYRIV_ID\"]\n" " --length_col : Attribute name giving length in kilometers \ [default=\"LENGTH_KM\"]\n\n" " --seed : Seed for RNG\n\n" "Genetic distance options:\n" " -d, --dist : Use which metric of distance? Options:\n" " Individual-based:\n" " PDIST : Uncorrected p-distances [# Differences / \ Length]\n" " Frequency models (when using --pop):\n" " FST : Weir and Cockerham's Fst formulation \ (=THETAst)\n" " LINFST : [default] Rousset's (1997) Fst \ [=Fst/(1-Fst)]\n" " JOST : Global estimate Jost's (2008) D\n" " HARMD : Harmonic mean of Jost's D per-locus\n" " CHORD : Cavalli-Sforza and Edwards (1967) chord \ distance\n" " --NOTE: Individual-based metrics can also be computed \ for\n" " populations. You can set how these are \ aggregated w/ --pop_agg\n" " --NOTE: Multiple loci for PDIST\n" " will be reported using the method defined in \ --loc_agg\n" " -G, --genmat : Skip calculation and use the provided \ labeled .tsv matrix\n" " --coercemat : [Boolean] Coerce negative values in input \ matrix to zero\n" " --het : [Boolean] Count partial differences [e.g. \ ind1=T, ind2=W]\n" " --global_het : Estimate Ht using global frequencies \ (default is averaged over pops)\n\n" "DBSCAN options (only when --clusterpop):\n" " --min_samples : Minimum samples per cluster [default=1]\n" " --epsilon : Maximum distance (in km) within a cluster \ [default=20]\n\n" "Aggregation options:\n" " -P, --pop_agg : Define aggregator function for certain \ genetic distances in pop samples\n" " -L, --loc_agg : Define aggregator function for aggregating \ locus-wise distances\n" " All of these can take the following options:\n" " ARITH : [default] Use arithmetic mean\n" " MEDIAN : Use median distance\n" " HARM : Use harmonic mean\n" " ADJHARM : Adjusted harmonic mean (see docs)\n" " GEOM : Use geometric mean\n" " MIN : Use minimum distance\n" " MAX : Use maximum distance\n\n" "IBD options:\n" " --perm : Number of permutations for mantel test \ [def=1000]\n" " --and_log : Also perform IBD steps with log geographic \ distances\n\n" "StreamTree options (see Kalinowski et al. 2008) :\n" " -w, --weight : Desired weighting for least-squares \ fitting:\n" " Options:\n" " FM67 : Fitch and Margoliash (1967) [w = 1/D^2]\n" " BEYER74 : Beyer et al. (1974) weights [w = 1/D]\n" " CSE67 : [default] Cavalli-Sforza & Edwards (1967) \ [w = 1]\n\n" ) print() sys.exit()