Source code for pysep.recsec

#!/usr/bin/evn python3
"""
RECord SECtion plotting tool for seismic waveforms (observed and synthetic)

This is a refactor of Pysep's Python utility `plotw_rs`, a record section
plotting script. The intent of this script is to plot multiple time series'
based on source-receiver characteristics (i.e., src-rcv distance, backazimuth).

.. note:: 
    Code History:  
    - Written by Carl Tape (11/21/2011) and Yun Wang (11/2011) in Matlab  
    - Translated to Python by Nealy Sims (1/2021)  
    - Upgraded by Aakash Gupta (9/2021)  
    - Refactored by Bryant Chow (3/2022)  
    - Currently maintained by adjTomo Dev Team

.. requires::
    obspy >= 1.2 (expected to bring in numpy and matplotlib)

.. rubric:: 

    1) Print the help message to see available options and flags
    $ python recsec.py -h

    2) From the command line: The following example code blocks work with pysep 
        to download data for a southern California event.

        # cd path/to/pysep
        $ python rungetwaveform.py event_input_mtuq2022 2  # 20200404015318920

    a) Plot a record section for the socal event with default values

        $ python recsec.py --pysep_path 20200404015318920 

    b) Plot high-passed data with 7 km/s move out (focused on direct arrivals),
        show direct arrivals through to surface waves for all traces, thin lines
        to accentuate high frequencies. Overwrite previous figure and split
        figure onto multiple pages

        $ python recsec.py --pysep_path 20200404015318920 \
            --move_out 7 --min_period_s 1 --xlim_s 100 175 \
            --linewidth .25 --max_traces_per_rs 60 --overwrite

    c) Plot bandpassed data with 4 km/s move out (focused on surface waves),
        horizontal components only (radial, transverse),
        thicken up default linewidth and increase spacing between adjacent 
        seismograms 

        $ python recsec.py --pysep_path 20200404015318920 \
            --components RT --move_out 4 --min_period_s 2 --max_period_s 50 \
            --xlim_s 50 200 --y_axis_spacing 3 --linewidth 1 \
            --amplitude_scale_factor 4 --overwrite 

    d) Plot bandpassed transverse component, sort by azimuth, scale by the 
        maximum amplitude of ALL traces shown on the figure. 
        Scale amplitudes by factor 2 for better visualization
        and start azimuth plotting at 180* (as opposed to default 0*).

        $ python recsec.py --pysep_path 20200404015318920 \
            --sort_by azimuth --scale_by global_norm --components T \
            --min_period_s 2 --max_period_s 30 --move_out 4 \
            --amplitude_scale_factor 2 --azimuth_start_deg 180 \
            --linewidth 1 --overwrite

    e) Plot bandpassed vertical components sorted by absolute distance.
        Reduce amplitudes by 1/4 (0.25). Set y label fontsize smaller than 
        default and at the max X value of the figure (far to the right)

        $ python recsec.py --pysep_path 20200404015318920 \
            --sort_by abs_distance_r --components Z --min_period_s 2 \
            --max_period_s 50 --amplitude_scale_factor 0.25 \
            --y_label_loc x_max --y_label_fontsize 7 --overwrite \
            --linewidth 1

    3) From the Python interpreter: this is an example code block that can be
        written into a Python script or run from inside a Python interactive 
        environment. Code block assumes we have downloaded event data with Pysep

        >>> import os
        >>> from glob import glob
        >>> from obspy import read
        >>> from recsec import plotw_rs  # NOQA
        >>> st = Stream()
        >>> for fid in glob(os.path.join("20200404015318920", "*.?")):
        >>>     st += read(fid)
        >>> plotw_rs(st=st, sort_by="distance_r")
"""
import os
import sys
import argparse
import textwrap
import numpy as np
import matplotlib.pyplot as plt

from glob import glob
from datetime import datetime
from matplotlib.ticker import MultipleLocator
from obspy import read, Stream
from obspy.geodetics import (kilometers2degrees, gps2dist_azimuth)

from pysep import logger
from pysep.utils.cap_sac import origin_time_from_sac_header, SACDICT
from pysep.utils.io import read_sem, read_sem_cartesian
from pysep.utils.curtail import subset_streams
from pysep.utils.plot import plot_geometric_spreading, set_plot_aesthetic

# Unicode degree symbol for plot text
[docs] DEG = u"\N{DEGREE SIGN}"
[docs] class Dict(dict): """Simple dictionary overload for nicer get/set attribute characteristics"""
[docs] def __setattr__(self, key, value): self[key] = value
[docs] def __getattr__(self, key): return self[key]
[docs] class RecordSection: """ Record section plotting tool which takes ObsPy streams and: 1) preprocesses and filters waveforms, 2) sorts source-receiver pairs based on User input, 3) produces record section waveform figures. """ def __init__(self, pysep_path=None, syn_path=None, stations=None, cmtsolution=None, st=None, st_syn=None, sort_by="default", scale_by=None, time_shift_s=None, zero_pad_s=None, move_out=None, min_period_s=None, max_period_s=None, preprocess=None, max_traces_per_rs=None, integrate=0, xlim_s=None, components="ZRTNE12", y_label_loc="default", y_axis_spacing=1, amplitude_scale_factor=1, azimuth_start_deg=0., distance_units="km", tmarks=None, geometric_spreading_factor=0.5, geometric_spreading_k_val=None, geometric_spreading_exclude=None, geometric_spreading_ymax=None, geometric_spreading_save=None, figsize=(9, 11), show=True, save="./record_section.png", overwrite=False, log_level="DEBUG", cartesian=False, synsyn=False, **kwargs): """ .. note:: Used for reading in Pysep-generated waveforms :type pysep_path: str :param pysep_path: path to Pysep output, which is expected to contain trace-wise SAC waveform files which will be read in. .. note:: Used for reading in SPECFEM-generated synthetic waveforms :type syn_path: str :param syn_path: full path to directory containing synthetic seismograms that have been outputted by SPECFEM. The synthetics are expected in the format: '*??.*.*.sem?*', which generally matches SPECFEM files like 'NZ.BFZ.BXE.semd' :type stations: str :param stations: full path to STATIONS file used to define the station coordinates. Format is dictated by SPECFEM :type cmtsolution: str :param cmtsolution: required for synthetics, full path to SPECFEM source file, which was used to generate SPECFEM synthetics. Example filenames are CMTSOLUTION, FORCESOLUTION, SOURCE. :type cartesian: bool :param cartesian: lets RecSec know that the domain is set in Cartesian coordinates (SPECFEM2D or SPECFEM3D_CARTESIAN in UTM), such that data/metadata reading will need to adjust acoordingly because the ObsPy tools used to read metadata will fail for domains defined in XYZ :type synsyn: bool :param synsyn: flag to let RecSec know that we are plotting two sets of synthetic seismograms. Such that both `pysep_path` and `syn_path` will be both attempt to read in synthetic data. Both sets of synthetics MUST share the same `cmtsolution` and `stations` metadata .. note:: Used for defining user-input waveforms data :type st: obspy.core.stream.Stream :param st: Stream objects containing observed time series to be plotted on the record section. Can contain any number of traces :type st_syn: obspy.core.stream.Stream :param st_syn: Stream objects containing synthetic time series to be plotted on the record section. Must be same length as `st` .. note:: Waveform plotting organization parameters :type sort_by: str :param sort_by: How to sort the Y-axis of the record section, available: - 'default': Don't sort, just iterate directly through Stream - 'alphabetical': sort alphabetically A->Z. Components sorted separately with parameter `components` - 'azimuth': sort by source-receiver azimuth (deg) with constant vertical spacing on the y-axis. Requires `azimuth_start_deg` - 'backazimuth': sort by source-receiver backazimuth (deg) with constant vertical spacing. Requires `azimuth_start_deg` - 'distance': sort by source-receiver distance (km) with constant vertical spacing. Smallest distances at the top of the figure. Requires `distance_units` - 'abs_distance': absolute vertical spacing of waveforms defined by source-receiver distance. Smallest distance at the top of the figure. Requires `distance_units` - 'abs_azimuth': absolute vertical spacing of waveforms defined by source-receiver azimuth (deg). - 'abs_backazimuth': absolute vertical spacing of waveforms by source-receiver backazimuth (deg). - '*_r': Add a '_r' to any of the values about to REVERSE the sort, e.g., 'alphabetical_r' sort will go Z->A :type scale_by: str :param scale_by: scale amplitude of waveforms by available: - None: Not set, no amplitude scaling, waveforms shown raw - 'normalize': scale each trace by the maximum amplitude, i.e., > a /= max(abs(a)) # where 'a' is time series amplitudes - 'global_norm': scale by the largest amplitude to be displayed on the screen. Will not consider waveforms which have been excluded on other basis (e.g., wrong component) i.e., > st[i].max /= max([max(abs(tr.data)) for tr in st]) - 'geometric_spreading': scale amplitudes by expected reduction through geometric spreading. Related parameters are: - `geometric_spreading_factor` - `geometric_spreading_k_val` - `geometric_spreading_exclude` - `geometric_spreading_ymax` A Equation is A(d) = k / sin(d) ** f Where A(d) is the amplitude reduction factor as a function of distnace, d. 'k' is the `geometric_spreading_k_val` and 'f' is the `geometric_spreading_factor`. 'k' is calculated automatically if not given. :type time_shift_s: float OR list of float OR str :param time_shift_s: apply static time shift to waveforms, two options: 1. float (e.g., -10.2), will shift ALL waveforms by that number (i.e., -10.2 second time shift applied) 2. list (e.g., [5., -2., ... 11.2]), will apply individual time shifts to EACH trace in the stream. The length of this list MUST match the number of traces in your input stream. 3. str: apply time shift based on a theoretical TauP phase arrival if available in the SAC header. These should have been appended by PySEP during data download. If no value is available in the SAC header, defaults to 0. This may have unintended consequences so you should manually check that all values are okay. Available options are: - 'first_arrival_time': shift based on earliest phase arrival - 'p_arrival_time': shift based on earliest P phase arrival - 's_arrival_time': shift based on earliest S phase arrival :type zero_pad_s: list :param zero_pad_s: zero pad data in units of seconsd. applied after tapering and before filtering. Input as a tuple of floats, * (start, end): a list of floats will zero-pad the START and END of the trace. Either can be 0 to ignore zero-padding at either end :type amplitude_scale_factor: float OR list of float :param amplitude_scale_factor: apply scale factor to all amplitudes. Used as a dial to adjust amplitudes manually. Defaults to 1. Two options: 1. float (e.g., 1.2), will multiply ALL waveforms by that number 2. list (e.g., [5., -2., ... 11.2]), will apply individual amplitude scale to EACH trace in the stream. The length of this list MUST match the number of traces in your input stream. :type move_out: float :param move_out: Optional. A velocity value that will be used to calculate move out, which will time shift seismograms based on their source receiver distance. This parameter will be ADDED to time_shift_s (both float and list), if it is provided. Should be in units of `distance_units`/s :type azimuth_start_deg: float :param azimuth_start_deg: If sorting by azimuth, this defines the azimuthal angle for the waveform at the top of the figure. Set to 0 for default behavior :type distance_units: str :param distance_units: Y-axis units when sorting by epicentral distance 'km': kilometers on the sphere 'deg': degrees on the sphere 'km_utm': kilometers on flat plane, UTM coordinate system .. note:: Data processing parameters :type preprocess: str :param preprocess: choose which data to preprocess, options are: - None: do not run preprocessing step, including filter (Default) - 'st': process waveforms in `st` - 'st_syn': process waveforms in `st_syn`. st still must be given - 'both': process waveforms in both `st` and `st_syn` :type min_period_s: float :param min_period_s: minimum filter period in seconds :type max_period_s: float :param max_period_s: maximum filter period in seconds :type components: str :param components: a sequence of strings representing acceptable components from the data. Also determines the order these are shown EVEN when sorted by other variables. For example, components=='ZR' would only display Z and R components, and Z components would be should BEFORE R components for the SAME station. :type integrate: int :param integrate: apply integration `integrate` times on all traces. acceptable values [-inf, inf], where positive values are integration and negative values are differentiation e.g., if integrate == 2, will integrate each trace twice. or if integrate == -1, will differentiate once or if integrate == 0, do nothing (default) .. note:: Geometric spreading parameters, used for amplitude scaling :type geometric_spreading_factor: float :param geometric_spreading_factor: factor to scale amplitudes by predicting the expected geometric spreading amplitude reduction and correcting for this factor. Related optional parameter: `geometric_spreading_k_val`. For Rayleigh waves, `geometric_spreading_factor` == 0.5 (default) :type geometric_spreading_k_val: float :param geometric_spreading_k_val: Optional constant scaling value used to scale the geometric spreading factor equation. If not given, calculated automatically using max amplitudes Value should be between 0.5 and 1.0 for regional surface waves. :type geometric_spreading_exclude: list :param geometric_spreading_exclude: a list of station names that should match the input stations. Used to exclude stations from the automatic caluclation of the geometric :type geometric_spreading_ymax: float :param geometric_spreading_ymax: Optional value for geometric spreading plot. Sets the max y-value on the plot. If not set, defaults to whatever the peak y-value plotted is. :type geometric_spreading_save: str :param geometric_spreading_save: file id to save separate geometric spreading scatter plot iff `scale_by`=='geometric_spreading'. If NoneType, will not save. By default, turned OFF .. note:: Figure generation control parameters :type max_traces_per_rs: int :param max_traces_per_rs: maximum number of traces to show on a single record section plot. Defaults to all traces in the Stream :type xlim_s: list of float :param xlim_s: [start, stop] in units of time, seconds, to set the xlimits of the figure :type y_axis_spacing: float :param y_axis_spacing: spacing between adjacent seismograms applied to Y-axis on relative (not absolute) scales. Defaults to 1. :type y_label_loc: str :param y_label_loc: Location to place waveform labels on the y-axis - 'default': auto choose the best location based on `sort_by` - 'y_axis': Replace tick labels on the y-axis (left side of figure), This won't work if using absolute sorting and will be over- written by 'default' - 'y_axis_abs': For absolute y-axis only. waveform labels plotted on the left side outside border, with y-axis labels overlapping (showing distance or azimuth) - 'y_axis_abs_right': For absolute y-axis only. waveform labels plotted on the right side outside border, with y-axis labels on the left side of the figure (showing distance or azimuth) - 'y_axis_right': Replace tick labels on the right side of the y-axis. This option won't work with absolute sorting - 'x_min': Place labels on top of the waveforms at the global min x-value on the figure - 'x_max': Place labels on top of the waveforms at the global max x-value on the figure - None: Don't plot any text labels :type tmarks: list of float :param tmarks: place vertical lines at given reference times. Used for marking reference times such as the event origin, or phase arrival. Input as a list of times in units of seconds (where T=0 is the event origin time). For example `tmarks`=[0, 100, 200] would set vertical lines at 0s, 100s and 200s :type figsize: tuple of float :param figsize: size the of the figure, passed into plt.subplots() :type show: bool :param show: show the figure as a graphical output :type save: str :param save: path to save output figure, will create the parent directory if it doesn't exist. If None, will not save (default). .. note:: Internal RecSec parameters :type overwrite: bool :param overwrite: if the path defined by `save` exists, will overwrite the existing figure :type log_level: str :param log_level: level of the internal logger. In order of ascending verbosity: 'CRITICAL', 'WARNING', 'INFO', 'DEBUG'. :raises AssertionError: if any parameters are set incorrectly """ # Set the logger level before running anything logger.setLevel(log_level) # Check file path existence before we do any reading if pysep_path is not None: assert(os.path.exists(pysep_path)), \ f"`pysep_path` given but does not exist: '{pysep_path}'" if syn_path is not None: assert(os.path.exists(syn_path)), \ f"`syn_path` given but does not exist: '{syn_path}'" # Read files from path if provided if pysep_path is not None: if not synsyn: # Expecting to find SAC files labelled as such fids = glob(os.path.join(pysep_path, "*.sac")) if not fids: # Or if legacy naming schema, assume that the sac files have # a given file format: event_tag.NN.SSS..LL.CC.c fids = glob(os.path.join(pysep_path, "*.*.*.*.*.?")) if fids: logger.info(f"Reading {len(fids)} files from: {pysep_path}") # Overwrite stream, so reading takes precedence st = Stream() for fid in fids: st += read(fid) else: # User has told us that we want to read 'data' as synthetics, # useful for comparing e.g., a synthetic-synthetic inversion logger.debug("reading `pysep_path` expecting SPECFEM-generated " "synthetic data") st = self._generate_synthetic_stream(syn_path=pysep_path, source=cmtsolution, stations=stations, cartesian=cartesian) # Read in SPECFEM generated synthetics and generate SAC headed streams if syn_path is not None: assert(os.path.exists(syn_path)), \ f"`syn_path` given but does not exist: '{syn_path}'" st_syn = self._generate_synthetic_stream(syn_path=syn_path, source=cmtsolution, stations=stations, cartesian=cartesian) # Allow plotting ONLY synthetics and no data if st is None: st = st_syn.copy() st_syn = None assert st, ("Stream object not found, please check inputs `st` " "and `pysep_path") # User defined parameters, do some type-setting self.st = st.copy() try: self.st_syn = st_syn.copy() except AttributeError: self.st_syn = None # Y-Axis sorting parameters self.sort_by = sort_by.lower() self.y_axis_spacing = float(y_axis_spacing) self.azimuth_start_deg = float(azimuth_start_deg) self.components = str(components) # Amplitude scaling parameters self.scale_by = scale_by self.amplitude_scale_factor = amplitude_scale_factor self.geometric_spreading_factor = float(geometric_spreading_factor) self.geometric_spreading_k_val = geometric_spreading_k_val self.geometric_spreading_exclude = geometric_spreading_exclude or [] self.geometric_spreading_ymax = geometric_spreading_ymax self.geometric_spreading_save = geometric_spreading_save # Time shift parameters self.move_out = move_out # float check incase command line input provides as a string, and also # to ensure int -> float try: self.time_shift_s = float(time_shift_s) except (TypeError, ValueError): self.time_shift_s = time_shift_s self.zero_pad_s = zero_pad_s # Filtering parameters self.min_period_s = min_period_s self.max_period_s = max_period_s self.preprocess = preprocess self.max_traces_per_rs = max_traces_per_rs self.integrate = int(integrate) # Plotting parameters self.xlim_s = xlim_s self.distance_units = distance_units.lower() self.y_label_loc = y_label_loc self.tmarks = tmarks self.figsize = figsize self.show = bool(show) self.save = save self.overwrite = bool(overwrite) self.kwargs = Dict(kwargs) # Run checks to ensure that all the parameters are set properly # And get some new, initial parameters that are required for other # 'get' functions self.check_parameters() self.stats = self.get_srcrcv_stats() self.distances, self.azimuths, self.backazimuths = \ self.get_srcrcv_dist_az_baz() # Internally used parameters that will be filled out by other functions self.f = None self.ax = None self.idx = [] self.station_ids = [] self.max_amplitudes = [] self.max_amplitudes_syn = [] self.amplitude_scaling = [] self.amplitude_scaling_syn = [] self.y_axis = [] self.xlim = [] # unit: samples self.xlim_syn = [] self.sorted_idx = []
[docs] def _generate_synthetic_stream(self, syn_path, source, stations, cartesian=False, fmt="*.*.*.sem?*"): """ Convenience fucntion to read in synthetic seismograms from SPECFEM2D, SPECFEM3D or SPECFEM3D_GLOBE. Can be used to read in both `st` and `st_syn` :type syn_path: str :param syn_path: full path to directory containing synthetic seismograms :type source: str :param source: path to source file which defined the source that generated the synthetics. Acceptable values are CMTSOLUTION (from SPECFEM3D/GLOBE), and SOURCE (from SPECFEM2D) :type stations: str :param stations: full path to STATIONS file used to define the station coordinates. Format is dictated by SPECFEM :type fmt: str :param fmt: the expected filename format of the sythetics. Based on ASCII style files generated by SPECFEM. Defaults to '*??.*.*.sem?*'. Expected filename looks something like: 'NN.SSS.BXC.semd' :rtype: obspy.core.stream.Stream :return: Stream object with synthetic waveforms """ assert(source is not None and os.path.exists(source)), ( f"If `syn_path` is given, RecSec also requires `cmtsolution`" ) assert(stations is not None and os.path.exists(stations)), ( f"If `syn_path` is given, RecSec also requires `stations`" ) fids = glob(os.path.join(syn_path, fmt)) if fids: logger.info(f"Reading {len(fids)} synthetics from: {syn_path}") st_syn = Stream() for fid in fids: logger.debug(fid) if not cartesian: st_syn += read_sem(fid=fid, source=source, stations=stations) else: # If we are using SPECFEM2D synthetics, trying to read # the SOURCE file will st_syn += read_sem_cartesian(fid=fid, source=source, stations=stations) return st_syn
[docs] def check_parameters(self): """ Check that parameters are set properly and in line with how they are expected by the program .. note:: Not using assertions here because we want all incorrect parameters to be evaluated together and displayed at once, that way the user doesn't have to run this function multiple times to figure out how to set their parameters correctly :raises AssertionError: If any parameters are not set as expected by plotw_rs functionality """ logger.info("checking parameter acceptability") # Used to keep track of which parameters failed and in what way err = Dict() # Check to make sure there is data if not bool(self.st): err.st = f"stream has {len(self.st)} traces, no data" # Check that stream has SAC headers if we want to sort by dist or (b)az. # Pysep should have created these if self.sort_by != "default" and \ any(_ in self.sort_by for _ in ["azimuth", "distance"]): _idx = [] for i, tr in enumerate(self.st): if not hasattr(tr.stats, "sac"): _idx.append(i) if _idx: err.st = (f"{len(_idx)} traces have no SAC header, recsec " f"expects SAC headers for sorting. Trace indexes " f"are: {_idx}") # Make sure stream and synthetic stream have the same length. If they # don't, subset so that they do. if self.st_syn is not None: self.st, self.st_syn = subset_streams(self.st, self.st_syn) if len(self.st) != len(self.st_syn): err.st_syn = f"length must match `st` (which is {len(self.st)})" # Check the `sort_by` sorting parameter options acceptable_sort_by = ["default", "azimuth", "backazimuth", "distance", "alphabetical", "abs_azimuth", "abs_distance", "abs_backazimuth"] # Allow reverse sorts acceptable_sort_by += [f"{_}_r" for _ in acceptable_sort_by] if self.sort_by not in acceptable_sort_by: err.sort_by = f"must be in {acceptable_sort_by}" if "azimuth" in self.sort_by: if not (0 <= self.azimuth_start_deg <= 360): err.azimuth_start_deg = f"0 < azi < 360" acceptable_distance_units = ["km", "km_utm", "deg"] if ("distance" in self.sort_by) and \ (self.distance_units not in acceptable_distance_units): err.azimuth_start_deg = \ f"must be in {acceptable_distance_units}" if self.scale_by is not None: acceptable_scale_by = ["normalize", "global_norm", "geometric_spreading"] if self.scale_by not in acceptable_scale_by: err.scale_by = f"must be in {acceptable_scale_by}" if self.time_shift_s is not None: acceptable_time_shift_s = [float, list, str] if type(self.time_shift_s) not in acceptable_time_shift_s: err.time_shift_s = f"must be in {acceptable_time_shift_s}" if isinstance(self.time_shift_s, list) and \ len(self.time_shift_s) != len(self.st): err.time_shift_s = f"must be list of length {len(self.st)}" elif isinstance(self.time_shift_s, str): if self.time_shift_s not in SACDICT: err.time_shift_s = f"must be {SACDICT} ending w/ '_time'" if self.zero_pad_s is not None: assert(isinstance(self.zero_pad_s, list)), ( f"`zero_pad_s` must be a list of floats representing the " f"[`start`, `end`] amount to pad around trace, units seconds" ) assert(len(self.zero_pad_s) == 2), ( f"`zero_pad_s` must be a list of floats length 2" ) _start, _end = self.zero_pad_s assert(_start >= 0. and _end >= 0.), \ f"`zero_pad_s` values must be >= 0" # Defaults to float value of 1 acceptable_scale_factor = [int, float, list] if type(self.amplitude_scale_factor) not in acceptable_scale_factor: err.amplitude_scale_factor = f"must be in {acceptable_scale_factor}" if isinstance(self.amplitude_scale_factor, list) and \ len(self.amplitude_scale_factor) != len(self.st): err.amplitude_scale_factor = f"must be list length {len(self.st)}" if self.min_period_s is not None and self.max_period_s is not None: if self.min_period_s >= self.max_period_s: err.min_period_s = "must be less than `max_period_s`" if self.min_period_s is not None or self.max_period_s is not None: assert(self.preprocess is not None), \ f"Setting filter bounds requires `preprocess` flag to be set" if self.preprocess is not None: acceptable_preprocess = ["both", "st", "st_syn"] if self.preprocess not in acceptable_preprocess: err.preprocess = f"must be in {acceptable_preprocess}" if self.preprocess in ["st_syn", "both"]: assert(self.st is not None and self.st_syn is not None), ( f"`preprocess` choice requires both `st` & `st_syn` to exist." f"If you only have one or the other, set: `preprocess`=='st'" ) # Overwrite the max traces per record section, enforce type int if self.max_traces_per_rs is None: self.max_traces_per_rs = int(len(self.st)) else: self.max_traces_per_rs = int(self.max_traces_per_rs) if self.xlim_s is not None: if len(self.xlim_s) != 2: err.xlim_s = f"must be of length 2, [start, stop]" elif self.xlim_s[0] > self.xlim_s[1]: err.xlim_s = f"start time must be less than stop time" acceptable_y_label_loc = ["default", "y_axis", "y_axis_right", "x_min", "x_max", "y_axis_abs", "y_axis_abs_right", None] if self.y_label_loc not in acceptable_y_label_loc: err.y_label_loc = f"must be in {acceptable_y_label_loc}" if "abs" in self.sort_by and "y_axis" in self.sort_by: err.y_label_loc = (f"absolute sorting means 'y_axis' label loc is" f"not available") if len(self.figsize) != 2: err.figsize = "must be tuple defining (horizontal, vertical) extent" if self.save: if os.path.exists(self.save) and not self.overwrite: err.save = (f"path {self.save} already exists. Use " f"'--overwrite' flag to save over existing figures" ) else: _dirname = os.path.abspath(os.path.dirname(self.save)) if not os.path.exists(_dirname): logger.info(f"creating output directory {_dirname}") os.makedirs(_dirname) if err: out = "ERROR - Parameter errors, please make following changes:\n" out += "\n".join([f"`{key}`: {val}" for key, val in err.items()]) logger.info(out) sys.exit(-1)
[docs] def get_skip_idx(self): """ Get a list of any traces that don't adhere to user-defined boundaries such as dist, az, baz, id, or component matches. Don't actually remove the traces from the stream but rather just collect indices we can use to skip when plotting. TODO add distance, azi and backazi skip criteria :rtype: np.array :return: returns an indexing list which can be used to skip over traces that don't adhere to certain criteria """ logger.info(f"determining if any stations/channels should be skipped") skip_idx = [] for idx in self.idx: tr = self.st[idx] # Component-wise removal if tr.stats.component not in self.components: logger.debug(f"skip '{tr.get_id()}' for non-matching component") skip_idx.append(idx) # !!! Add more here logger.info(f"criteria check will remove " f"{len(skip_idx)}/{len(self.st)} traces") return np.array(skip_idx)
[docs] def get_parameters(self): """ Calculate parameters in a specific order and based on the user-defined information. .. note:: The order of function calls here is important! Some of the 'get' functions require the results of other 'get' functions. Calculated Parameters :: np.array idx: a linear indexing of all the traces in the stream np.array station_ids: an ordered list of station ids, used to get station names that match the index defined in `idx` np.array max_amplitudes: abs max amplitudes of each trace, used for normalization np.array amplitude_scaling: An array to scale amplitudes based on user choices np.array time_shift_s: An array to time shift time series based on user choices np.array y_axis: Y-Axis values based on sorting algorithm, used for plotting np.array distances: source-receiver distances in `distance_units` units np.array azimuths: source-receiver azimuths in degrees np.array backazimuths: source-receiver backazimuths in degrees np.array sorted_idx: sorted indexing on all the traces of the stream based on the chosen sorting algorithm """ # Extract basic information from the Stream self.idx = np.arange(0, len(self.st), 1) self.station_ids = np.array([tr.get_id() for tr in self.st]) self.time_shift_s = self.get_time_shifts() # !!! OVERWRITES user input # Needs to be run before getting xlims so that we know the time offset self.get_time_offsets() self.xlim = self.get_xlims() # Get xlims synthetic waveforms which may have different start/end times if self.st_syn is not None: self.xlim_syn = self.get_xlims(st=self.st_syn) # Max amplitudes should be RELATIVE to what were showing (e.g., if # zoomed in on the P-wave, max amplitude should NOT be the surface wave) for tr, xlim in zip(self.st, self.xlim): start, stop = xlim self.max_amplitudes = np.append(self.max_amplitudes, max(abs(tr.data[start:stop]))) self.max_amplitudes = np.array(self.max_amplitudes) # Max amplitudes will be DIFFERENT for synthetics, which affects # normalizations and thus requires its own array if self.st_syn is not None: for tr, xlim in zip(self.st_syn, self.xlim): start, stop = xlim self.max_amplitudes_syn = np.append( self.max_amplitudes_syn, max(abs(tr.data[start:stop])) ) self.max_amplitudes_syn = np.array(self.max_amplitudes_syn) # Figure out which indices we'll be plotting sorted_idx = self.get_sorted_idx() skip_idx = self.get_skip_idx() # Remove skip indexes from sorted index to get the final ordered list self.sorted_idx = np.array([_ for _ in sorted_idx if _ not in skip_idx]) # Figure out how to manipulate each of the traces in the Stream self.y_axis = self.get_y_axis_positions() self.amplitude_scaling = self.get_amplitude_scaling(_choice="st") if self.st_syn: self.amplitude_scaling_syn = \ self.get_amplitude_scaling(_choice="st_syn")
[docs] def get_xlims(self, st=None): """ The x-limits of each trace depend on the overall time shift (either static or applied through move out), as well as the sampling rate of each trace (which can vary). Retrieve an index-dependent list of x-limits which can be used to truncate the time series during plotting. .. note:: Requires that get_time_shifts() has already been run :type st: obspy.core.stream.Stream :param st: stream object to get xlims for. By default this is the 'data' stored in `st` but it can also be given `st_syn` to get synthetic x limits which may differ :rtype: np.array :return: an array of tuples defining the start and stop indices for EACH trace to be used during plotting. Already includes time shift information so xlim can be applied DIRECTLY to the time shifted data """ if st is None: st = self.st xlim = [] if self.xlim_s is None: # None's will index the entire trace xlim = np.array([(None, None) for _ in range(len(st))]) else: # Looping to allow for delta varying among traces, # AND apply the time shift so that indices can be used directly in # the plotting function for tr, tshift in zip(st, self.time_shift_s): start, stop = [int(_/tr.stats.delta) for _ in self.xlim_s] sshift = int(tshift / tr.stats.delta) # unit: samples # These indices define the index of the user-chosen timestamp start_index = start - sshift end_index = stop - sshift # Shift by the total amount that the zero pad adjusted starttime if self.zero_pad_s: zero_pad_index = int(self.zero_pad_s[0]/tr.stats.delta) start_index += zero_pad_index end_index += zero_pad_index # Address time shift introduced by traces which do not start # at the origin time if hasattr(tr.stats, "time_offset"): start_index += abs(int(tr.stats.time_offset/tr.stats.delta)) end_index += abs(int(tr.stats.time_offset/tr.stats.delta)) # When setting xlimits, cannot acces out of bounds (negative # indices or greater than max). This might happen when User # asks for `xlim_s` that is outside data bounds. In this case # we just plot up to the end of the data if start_index < 0: logger.warning(f"trying to access negative time axis index " f"for xlimit of {tr.get_id()}, setting to 0") start_index = 0 if end_index > len(tr.data): logger.warning(f"trying to access past time axis index " f"for xlimit of {tr.get_id()}, setting to " f"length of data trace") end_index = len(tr.data) xlim.append((start_index, end_index)) xlim = np.array(xlim) return xlim
[docs] def get_srcrcv_stats(self): """ Get source receiver information such as min max values, and count-related numbers (e.g., num stations) to be used mainly for print statements and text information Stats Arguments :: np.array event_names: unique event names taken from the SAC header int nevents: number of unique events in the stream np.array unique_sta_ids: unique station codes taken from trace stats int nstation_ids: number of unique station codes np.array network_codes: unique network codes taken from station ids int nnetwork: number of unique network codes np.array station_codes: unique station codes taken from station ids int nstation: number of unique station codes np.array location_codes: unique location codes taken from station ids int nlocation: number of unique location codes np.array channel_codes: unique channel codes taken from station ids int nchannel: number of unique channel codes bool reverse_sort: determine if the user wants to reverse their sort, they do this by appending '_r' to the end of the `sort_by` argument """ logger.info("getting source-receiver stats") def _unique(list_): """return a unique numpy array derived from a list""" return np.unique(np.array(list_, dtype=str)) stats = Dict() stats.event_names = _unique([tr.stats.sac.kevnm for tr in self.st]) stats.nevents = len(stats.event_names) stats.unique_sta_ids = _unique([tr.get_id() for tr in self.st]) stats.longest_id = max([len(_) for _ in stats.unique_sta_ids]) stats.nstation_ids = len(stats.unique_sta_ids) # Get unique network, station, location and channel codes. Also numbers for name in ["network", "station", "location", "channel", "component"]: stats[f"{name}_codes"] = _unique( [getattr(tr.stats, name) for tr in self.st] ) stats[f"n{name}"] = len(stats[f"{name}_codes"]) # We use `not` in `reverse_sort` to make the top of the y-axis the # starting point, which seems more intuitive for record sections, but # is opposite the behavior when you increment from 0 stats.reverse_sort = not bool("_r" in self.sort_by) # Initiate empty lists for _plot_trace() to fill with min and max data # values which can be used for global plotting parameters like xlims stats.xmin, stats.xmax, stats.ymin, stats.ymax = [], [], [], [] return stats
[docs] def get_time_offsets(self): """ Find time shift to data constituting difference between trace start time and event origin time. Both synthetics and data should have the correct timing. Time offsets will be used during plotting to make sure all traces have the same T=0 .. note:: Appends time offset directly to the stats header, overwriting any value that might have been there already """ event_origin_time = origin_time_from_sac_header(self.st[0].stats.sac) logger.info(f"calculating starttime offsets from event origin time " f"{event_origin_time}") for tr in self.st: tr.stats.time_offset = tr.stats.starttime - event_origin_time if self.st_syn is not None: for tr in self.st_syn: tr.stats.time_offset = tr.stats.starttime - event_origin_time
[docs] def get_time_shifts(self): """ Very simple function which allows float inputs for time shifts and ensures that time shifts are always per-trace arrays Applies the move out by calculating a time shift using src-rcv distance :rtype: np.array :return: a stream-lengthed array of time shifts that can be applied per trace """ # No user input means time shifts will be 0, so nothing happens time_shift_arr = np.zeros(len(self.st)) if self.time_shift_s is not None: # User inputs a static time shift if isinstance(self.time_shift_s, float): logger.info(f"apply constant time shift {self.time_shift_s}s") time_shift_arr += self.time_shift_s # User input an array which should have already been checked for len elif isinstance(self.time_shift_s, list): logger.info(f"apply user-defined array time shift values") time_shift_arr = self.time_shift_s # Allow shifting by a given phase arrival in the SAC header elif isinstance(self.time_shift_s, str): sac_key = SACDICT[self.time_shift_s] logger.info(f"apply time shift by {self.time_shift_s}") time_shift_arr = [-1 * tr.stats.sac[sac_key] for tr in self.st] time_shift_arr = np.array(time_shift_arr) # Further change the time shift if we have move out input if self.move_out: logger.info(f"apply {self.move_out} {self.distance_units}/s " f"move out") move_out_arr = self.distances / self.move_out time_shift_arr -= move_out_arr return time_shift_arr
[docs] def get_srcrcv_dist_az_baz(self): """ Convenience function to wrap _get_srcrcv_dist_az_baz_trace into a loop over the whole stream and return lists of distances, azimuths, and backazimuths :rtype distances: np.array :return distances: source-receiver distances in user-defined units in the original order of Stream :rtype azimuths: np.array :return azimuths: source-receiver azimuths (deg) in the original order of Stream :rtype backazimuths: np.array :return backazimuths: source-receiver azimuths (deg) in the original order of Stream """ logger.info("calculating source-receiver distance and (back)azimuths") distances, azimuths, backazimuths = [], [], [] for tr in self.st: gcd, az, baz = self._get_srcrcv_dist_az_baz_trace(tr=tr) distances.append(gcd) azimuths.append(az) backazimuths.append(baz) distances = np.array(distances) azimuths = np.array(azimuths) backazimuths = np.array(backazimuths) return distances, azimuths, backazimuths
[docs] def _get_srcrcv_dist_az_baz_trace(self, tr=None, idx=0): """ Check the source-receiver characteristics such as src-rcv distance, azimuth, backazimuth for a given trace. .. note:: This function ASSUMES that SAC headers have been written to the traces. Otherwise we will need more complicated ways to get event lat and lon :type tr: obspy.core.trace.Trace :param tr: trace to get srcrcv information for. If None, will use `idx` :type idx: int :param idx: if `tr`==None, user can provide index of self.st (Stream) defaults to index 0 :rtype gcdist: float :return gcdist: great circle distance in units specified by `distance_units`, can be 'km' or 'deg' :rtype az: float :return az: azimuth (degrees) between source and receiver :rtype baz: float :return baz: azimuth (degrees) between source and receiver """ # Default to the first trace in the Stream if tr is None: tr = self.st[int(idx)] try: dist = tr.stats.sac.dist # units: km az = tr.stats.sac.az # units: deg baz = tr.stats.sac.baz # units: deg except AttributeError: # If for whatever reason SAC headers dont contain this info already # Use ObsPy to get great circle distance, azimuth and backazimuth dist, az, baz = gps2dist_azimuth(lat1=tr.stats.sac.evla, lon1=tr.stats.sac.evlo, lat2=tr.stats.sac.stla, lon2=tr.stats.sac.stlo) dist *= 1E-3 # units: m -> km # Ensure that 0 <= (b)az <= 360 az = az % 360 baz = baz % 360 # Make sure distance is in the correct units, default units 'km' if self.distance_units == "deg": dist = kilometers2degrees(dist) # units: km -> deg elif self.distance_units == "km_utm": # Overwrite `dist`, could probably skip that calc above but # leaving for now as I don't think this option will be used heavily. dist_deg = np.sqrt( ((tr.stats.sac.stlo - tr.stats.sac.evlo) ** 2) + ((tr.stats.sac.stla - tr.stats.sac.evla) ** 2) ) dist = kilometers2degrees(dist_deg) # units: km return dist, az, baz
[docs] def get_amplitude_scaling(self, _choice="st"): """ Scale the amplitudes of all the waveforms by producing a Stream dependent scale factor based on user choice. It is expected that the output array will be DIVIDED by the data arrays: i.e., st[i].data /= self.amplitude_scaling[i] .. note:: Needs to be run AFTER preprocessing because filtering etc. will affect the final amplitudes of the waveforms :type _choice: str :param _choice: Internal choice only required for 'normalize' scaling. `st` for amplitude scaling w.r.t data array, `st_syn` for amplitude scaling w.r.t synthetics :rtype: np.array :return: an array corresponding to the Stream indexes which provides a per-trace scaling coefficient """ logger.info(f"determining amplitude scaling w.r.t {_choice} with: " f"'{self.scale_by}'") # Don't scale by anything if self.scale_by is None: amp_scaling = np.ones(len(self.st)) # Scale by the max amplitude of each trace elif self.scale_by == "normalize": if _choice == "st": amp_scaling = self.max_amplitudes elif _choice == "st_syn": amp_scaling = self.max_amplitudes_syn # When using absolute distance scale, scale waveforms to minmax dist if "abs" in self.sort_by: if "distance" in self.sort_by: logger.info("scaling amplitudes for absolute distance") scale = np.mean(self.distances) / 10 elif "backazimuth" in self.sort_by: logger.info("scaling amplitudes for absolute backazimuth") scale = self.backazimuths.max() - self.backazimuths.min() scale /= 100 elif "azimuth" in self.sort_by: logger.info("scaling amplitudes for absolute azimuth") scale = self.azimuths.max() - self.azimuths.min() scale /= 50 # Divide to make waveforms larger amp_scaling /= scale # Scale by the largest amplitude shown elif self.scale_by == "global_norm": global_max = self.max_amplitudes[self.sorted_idx].max() if self.max_amplitudes_syn.any(): global_max = max(global_max, self.max_amplitudes_syn[self.sorted_idx].max() ) amp_scaling = np.ones(len(self.st)) * global_max # Scale by the theoretical geometrical spreading factor elif self.scale_by == "geometric_spreading": amp_scaling = self.calculate_geometric_spreading() # Apply manual scale factor if provided, default value is 1 so nothing # Divide because the amplitude scale divides the data array, which means # `amplitude_scale_factor` will be MULTIPLIED by the data array amp_scaling /= self.amplitude_scale_factor return amp_scaling
[docs] def calculate_geometric_spreading(self): """ Stations with larger source-receiver distances will have their amplitude scaled by a larger value. For information on geometric spreading, see Stein and Wysession, Section 4.3.4, Eq 20 (for Rayleigh waves, geometrical spreading factor = 0.5). For our purposes, this will fold the attenuation factor into the same single factor that accounts for geometric spreading. Equation is for amplitude reduction 'A' as a factor of distance 'd': A(d) = k / sin(d) ** f where 'k' is the `geometric_spreading_k_val` and 'f' is the `geometric_spreading_factor`. 'k' is calculated automatically if not given by the User. In the code, A(d) is represented by `w_vector` .. note:: This does not take into account the variation in amplitude from focal mechanism regions TODO - Look in Stein and Wysession and figure out vector names - Allow ignoring specific components, currently only allowed to exclude on a per-station basis :rtype: list :return: scale factor per trace in the stream based on theoretical geometrical spreading factor. This is meant to be MULTIPLIED by the data arrays """ logger.info("using geometrical spreading factor: " f"{self.geometric_spreading_factor}") # Ignore any amplitudes with 0 max amplitude, which will throw off calc for i, max_amp in enumerate(self.max_amplitudes): if max_amp == 0: station_name = self.st[i].get_id().split(".")[1] if station_name not in self.geometric_spreading_exclude: self.geometric_spreading_exclude.append(station_name) logger.warning(f"{station_name} has 0 max amplitude which " f"will negatively affect geometric " f"spreading calculation. excluding from list" ) # To selectively remove stations from this calculation, we will need # to gather a list of indexes to skip which can be applied to lists indices = self.sorted_idx # already some stations have been removed for station in set(self.geometric_spreading_exclude): # Matching station names to get indices remove_idx = [i for i, id_ in enumerate(self.station_ids) if station in id_] if remove_idx: logger.debug(f"remove '{station}' from geometric spreading eq.") indices = [i for i in indices if i not in remove_idx] logger.info(f"excluded {len(self.sorted_idx) - len(indices)} traces " f"from geometric spreading calculation") # Make sure distances are in units degrees if "km" in self.distance_units: distances = kilometers2degrees(self.distances) else: distances = self.distances # Create a sinusoidal function based on distances in degrees sin_delta = np.sin(np.array(distances) / (180 / np.pi)) # Use or calculate the K valuse constant scaling factor if self.geometric_spreading_k_val is None: k_vector = self.max_amplitudes[indices] * \ (sin_delta[indices] ** self.geometric_spreading_factor) # OVERWRITING internal parameter here, which will be used elsewhere self.geometric_spreading_k_val = np.median(k_vector) logger.info(f"geometric spreading `k` vector = " f"{self.geometric_spreading_k_val}") # This is the amplitude scaling that we are after, defined for ALL stas w_vector = (self.geometric_spreading_k_val / (sin_delta ** self.geometric_spreading_factor)) # Plot the geometric spreading figure if self.geometric_spreading_save is not None: # Curate station names so that it is just 'STA.COMP' station_ids = [f"{_.split('.')[1]}.{_.split('.')[-1][-1]}" for _ in self.station_ids] plot_geometric_spreading( distances=distances, max_amplitudes=self.max_amplitudes, geometric_spreading_factor=self.geometric_spreading_factor, geometric_k_value=self.geometric_spreading_k_val, station_ids=station_ids, units=self.distance_units, include=indices, ymax=self.geometric_spreading_ymax, save=self.geometric_spreading_save ) return w_vector
[docs] def get_sorted_idx(self): """ Sort the source-receiver pairs by the chosen parameters. Except for in `default` sorting, always maintains component ordering defined by the user, i.e., for the same station, components will be ordered by the `component` parameter. :rtype: np.array :return: returns an indexing list which is sorted based on the user defined `sort_by` argument. """ logger.info(f"determining sort order with parameter: {self.sort_by}") # Retain input ordering but run sorting to allow reversing if "default" in self.sort_by: sorted_idx = sorted(self.idx, reverse=self.stats.reverse_sort) # Sort alphabetically BUT allow component sorting elif "alphabetical" in self.sort_by: sorted_idx = self._sort_by_alphabetical() # Sort by dist, az or baz else: components = self._get_component_order() # Azimuthal sorts are allowed to start at a value other than 0 # Backazimuth needs to come first because 'azimuth' can return both if "backazimuth" in self.sort_by: sort_list = (self.backazimuths - self.azimuth_start_deg) % 360 elif "azimuth" in self.sort_by: sort_list = (self.azimuths - self.azimuth_start_deg) % 360 elif "distance" in self.sort_by: sort_list = self.distances # Sort by the values, AND the components (e.g., Z->R->T or whatever) _, _, sorted_idx = \ zip(*sorted(zip(sort_list, components, self.idx), reverse=self.stats.reverse_sort) ) sorted_idx = np.array(list(sorted_idx)) return sorted_idx
[docs] def _sort_by_alphabetical(self): """ Sort by full station name in order. That is, network is sorted, then within network, station is sorted, and so on. Components are sorted last and NOT in alphabetical order. They are ordered based on the user-input `components` parameter. :rtype: np.array :return: indexing of networks and stations sorted alphabetically """ networks = [_.split(".")[0] for _ in self.station_ids] stations = [_.split(".")[1] for _ in self.station_ids] locations = [_.split(".")[2] for _ in self.station_ids] components = self._get_component_order() zipped = zip(networks, stations, locations, components, self.idx) arr_out = np.array( list(zip(*sorted(zipped, reverse=self.stats.reverse_sort))) ) return arr_out[-1].astype(int)
[docs] def _get_component_order(self): """ When we are sorting, we want components to be sorted by the preferred order (i.e, ZRT, Z index is 0, T is 2), which means the components have to be reordered to adhere to the sorting algorithm. ALSO we need to ensure that we honor component order even if everything else is being sorted reverse alphabetically so the components entry needs to be the opposite of stats.reverse_sort :rtype: list :return: components converted to integer values which can be used for sorting algorithms. """ channels = [_.split(".")[3] for _ in self.station_ids] # ASSUMING component is the last entry in the channel, SEED convention components = [_[-1] for _ in channels] if self.stats.reverse_sort: comp_list = self.components else: comp_list = self.components[::-1] # Can't list comp here incase `comp_list` does NOT contain one of the # available components, which will throw a ValueError. Use a random # number to catch these. numbered_components = [] for comp in components: try: numbered_components.append(comp_list.index(comp)) except ValueError: numbered_components.append(-999) return numbered_components
[docs] def get_y_axis_positions(self): """ Determine how seismograms are vertically separated on the Y-Axis when plotting. Allows for constant separation, or absolute separation based on distance or (back)azimuth separation. :rtype: np.array :return: an array of actual y-axis positions that can be passed directly to plt.plot() (or similar) """ logger.info(f"determining y-axis positioning for sort: {self.sort_by}") # Default weights provides constant `y_axis_spacing` between seismos if self.sort_by == "default" or "abs_" not in self.sort_by: y_range = np.arange(0, len(self.sorted_idx), 1) y_axis = self.y_axis_spacing * y_range # Absolute y-axis (i.e., absolute distance or (back)azimuth) will just # plot the actual distance or azimuth value else: if "backazimuth" in self.sort_by: y_axis = self.backazimuths elif "azimuth" in self.sort_by: y_axis = self.azimuths elif "distance" in self.sort_by: y_axis = self.distances return y_axis
[docs] def get_x_axis_tick_values(self): """ Determine, based on the length of the plotted traces, how often tick marks should be applied so that they are spaced as aesthetically pleasing values. """ # Get a rough estimate for total length of time of the trace in seconds if self.xlim_s: rs_len = self.xlim_s[1] - self.xlim_s[0] else: # Assuming that all waveforms are trimmed to the same length rs_len = self.st[0].stats.endtime - self.st[0].stats.starttime # Find order of magnitude of the length to give a rough estimate oom = np.floor(np.log10(rs_len)) xtick_major = 10 ** oom xtick_minor = xtick_major / 2 return xtick_minor, xtick_major
[docs] def process_st(self): """ Preprocess the Stream with optional filtering in place. .. note:: Data in memory will be irretrievably altered by running preprocess. TODO Add feature to allow list-like periods to individually filter seismograms. At the moment we just apply a blanket filter. """ if self.preprocess is None: logger.info("no preprocessing (including filtering) applied") return elif self.preprocess == "st": logger.info(f"preprocessing {len(self.st)} `st` waveforms") preprocess_list = [self.st] elif self.preprocess == "st_syn": logger.info(f"preprocessing {len(self.st_syn)} `st_syn` waveforms") preprocess_list = [self.st_syn] elif self.preprocess == "both": logger.info(f"preprocessing {len(self.st) + len(self.st_syn)} " f"`st` and `st_syn` waveforms") preprocess_list = [self.st, self.st_syn] for st in preprocess_list: # Fill any data gaps with mean of the data, do it on a trace by # trace basis to get individual mean values for tr in st: tr.trim(starttime=tr.stats.starttime, endtime=tr.stats.endtime, pad=True, fill_value=tr.data.mean()) st.detrend("demean") st.taper(max_percentage=0.05, type="cosine") # Zero pad start and end of data if requested by user if self.zero_pad_s: _start, _end = self.zero_pad_s logger.info(f"padding zeros to traces with {_start}s before " f"and {_end}s after") for idx, tr in enumerate(st): tr.trim(starttime=tr.stats.starttime - _start, endtime=tr.stats.endtime + _end, pad=True, fill_value=0) # Allow multiple filter options based on user input # Max period only == high-pass if self.max_period_s is not None and self.min_period_s is None: logger.info(f"apply highpass filter w/ cutoff " f"{1/self.max_period_s}") st.filter("highpass", freq=1/self.max_period_s, zerophase=True) # Min period only == low-pass elif self.min_period_s is not None and self.max_period_s is None: logger.info(f"apply lowpass filter w/ cutoff " f"{1/self.min_period_s}") st.filter("lowpass", freq=1/self.min_period_s, zerophase=True) # Both min and max period == band-pass elif self.min_period_s is not None and \ self.max_period_s is not None: logger.info(f"applying bandpass filter w/ " f"[{1/self.max_period_s}, {self.min_period_s}]") st.filter("bandpass", freqmin=1/self.max_period_s, freqmax=1/self.min_period_s, zerophase=True) else: logger.info("no filtering applied") # Integrate and differentiate N number of times specified by user st.detrend("simple") if self.integrate != 0: if self.integrate < 0: func = "differentiate" elif self.integrate > 0: func = "integrate" for i in range(np.abs(self.integrate)): logger.info(f"{func} all waveform data x{abs(self.integrate)}") getattr(st, func)()
[docs] def plot(self, subset=None, page_num=None, **kwargs): """ Plot record sections based on all the available information :type subset: list of int :param subset: subset of `sorted_idx` if there are too many waveforms to plot on one page (set by `max_traces_per_rs`). e.g., to get the first 10 entries, subset=[0,10] :type page_num: int :param page_num: If multiple pages are required due to exceeding `max_traces_per_rs`, page_num will make adjustments to the figure name and title to differentiate different pages of the same record section """ if subset is None: start, stop = 0, None # None will allow full list traversal nwav = len(self.sorted_idx) else: start, stop = subset nwav = stop - start logger.info(f"plotting record section for {nwav} waveforms") assert(nwav > 0), ( f"no waveforms available for plotting. Check skip criteria if you " f"think this is an issue" ) # Do a text output of station information so the user can check # that the plot is doing things correctly logger.debug("plotting line check starting from bottom (y=0)") logger.debug( "\nIDX\tY\t\tID\tDIST\tAZ\tBAZ\tTSHIFT\tTOFFSET\tYABSMAX" ) self.f, self.ax = plt.subplots(figsize=self.figsize) log_str = "\n" # Allow choosing observed or synthetic data, defaults to observed # Allow different waveform looks based on observed vs. synthetic for choice in ["st", "st_syn"]: if getattr(self, choice) is None: continue # Main plotting call. Indexes should already be sorted for y_idx, idx in enumerate(self.sorted_idx[start:stop]): # Absolute scaling requires plotting actual dist or az values # Relative scaling just requires a linear index to stack seismos if "abs_" in self.sort_by: y_index = idx else: y_index = y_idx + start log_str += self._plot_trace(idx=idx, y_index=y_index, choice=choice, **kwargs) logger.debug(log_str) # If plotting both observed AND synthetic, log some relative information if self.st is not None and self.st_syn is not None: self._log_relative_information() # Plot vertical bars at given reference times if self.tmarks: self._plot_tmarks() # Change the aesthetic look of the figure, should be run before other # set functions as they may overwrite what is done here _xtick_minor, _xtick_major = self.get_x_axis_tick_values() # Use kwarg values to avoid double inputs of the same parameter for name, val in zip(["xtick_minor", "xtick_major"], [_xtick_minor, _xtick_major]): if name not in self.kwargs: self.kwargs[name] = val self.ax = set_plot_aesthetic(ax=self.ax, **self.kwargs) # Partition the figure by user-specified azimuth bins for relative # (back)azimuth sorting only (not absolute) if self.sort_by and ("azimuth" in self.sort_by) and \ ("abs_" not in self.sort_by): self._plot_azimuth_bins(start=start, stop=stop) # Finalize the figure accoutrements self._plot_title(nwav=nwav) self._plot_axes(start=start, stop=stop) if self.save: # Allow appending page numbers to figure names, # e.g., default.png -> default_01.png if page_num: fid, ext = os.path.splitext(self.save) save_fid = f"{fid}_{page_num:0>2}{ext}" else: save_fid = self.save logger.info(f"saving figure to {save_fid}") plt.savefig(save_fid) if self.show: plt.show()
[docs] def _log_relative_information(self, start=0, stop=None): """ If both `st` and `st_syn` are being plotted, then write some relative information about their amplitudes to the main logger. Related to #116 """ log_str = ( "relative amplitude information" f"\nIDX{'[O]BS':>13}{'[S]YN':>15}" f"{'[O_A]BSMAX':>15}{'[S_A]BSMAX':>12} " f"{'O_A/S_A':>8}{'LN(O_A/S_A)':>14}\n" ) for idx in self.sorted_idx[start:stop]: tr = self.st[idx] tr_syn = self.st_syn[idx] # Get absolute maximum value of both obs and syn traces tr_max = np.abs(tr.max()) tr_syn_max = np.abs(tr_syn.max()) log_str += ( f"{idx:<3}" f"{tr.get_id():>15}" f"{tr_syn.get_id():>15}" f"{tr_max:12.2E}" f"{tr_syn_max:12.2E}" f"{tr_max / tr_syn_max:12.2E}" f"{np.log(tr_max / tr_syn_max):12.2E}\n" ) logger.debug(log_str)
[docs] def _plot_trace(self, idx, y_index, choice="st"): """ Plot a single trace on the record section, with amplitude scaling, time shifts, etc. Observed waveforms are black, synthetics are red. :type idx: int :param idx: index of the trace to plot and all trace-ordered values like amplitude scaling and time shifts :type y_index: int :param y_index: numerical order which this trace was plotted, as each trace is plotted on top of the previous one. y_index should be iterated linearly in the loop that calls this function. :type choice: str :param choice: choice of 'st' or 'st_syn' depending on whether you want to plot the observed or synthetic waveforms """ linewidth = self.kwargs.get("linewidth", .25) # Used to differentiate the two types of streams for plotting diffs choices = ["st", "st_syn"] assert (choice in choices) c = choices.index(choice) tr = getattr(self, choice)[idx] # i.e., tr = self.st[idx] # Plot actual data on with amplitude scaling, time shift, and y-offset tshift = self.time_shift_s[idx] # These are still the entire waveform. Make sure we honor zero padding # and any time shift applied x = tr.times() + tshift if self.zero_pad_s is not None: x -= self.zero_pad_s[0] # index 0 is start, index 1 is end # Synthetics will already have a time offset stat from the # 'read_sem' function which grabs T0 value from ASCII # Data will have a time offset relative to event origin time if hasattr(tr.stats, "time_offset"): x += tr.stats.time_offset toffset_str = f"{tr.stats.time_offset:4.2f}" else: toffset_str = " None" # Flip the sign of the y-axis if we are doing normal absolute # sorting because we are flipping the y-axis in _plot_axes() if "abs_" in self.sort_by and "_r" not in self.sort_by: sign = -1 else: sign = 1 # Ampplitude scaling may change between observed and synthetic if e.g., # we are doing trace-wise normalization if choice == "st": amplitude_scaling = self.amplitude_scaling max_amplitudes = self.max_amplitudes elif choice == "st_syn": amplitude_scaling = self.amplitude_scaling_syn max_amplitudes = self.max_amplitudes_syn y = sign * tr.data / amplitude_scaling[idx] + self.y_axis[y_index] # Truncate waveforms to get figure scaling correct. Make the distinction # between data and synthetics which may have different start and end # times natively if choice == "st": start, stop = self.xlim[idx] elif choice == "st_syn": start, stop = self.xlim_syn[idx] x = x[start:stop] y = y[start:stop] self.ax.plot(x, y, c=["k", "r"][c], linewidth=linewidth, zorder=10) # Sanity check print station information to check against plot log_str = (f"{idx}" f"\t{int(self.y_axis[y_index])}" f"\t{tr.get_id():<6}" f"\t{self.distances[idx]:6.2f}" f"\t{self.azimuths[idx]:6.2f}" f"\t{self.backazimuths[idx]:6.2f}" f"\t{self.time_shift_s[idx]:4.2f}" f"\t{toffset_str}" f"\t{max_amplitudes[idx]:.2E}\n" ) # Retain some stats for global plot args self.stats.xmin.append(x.min()) self.stats.xmax.append(x.max()) self.stats.ymin.append(y.min()) self.stats.ymax.append(y.max()) return log_str
[docs] def _plot_tmarks(self): """ Plot vertical lines at given reference times based on user input values """ c = self.kwargs.get("tmark_c", "r") lw = self.kwargs.get("tmark_lw", 1.5) ls = self.kwargs.get("tmark_ls", "-") alpha = self.kwargs.get("tmark_alpha", 0.75) z = self.kwargs.get("tmark_zorder", 5) for tmark in self.tmarks: plt.axvline(x=tmark, c=c, linewidth=lw, linestyle=ls, zorder=z, alpha=alpha)
[docs] def _plot_azimuth_bins(self, start=None, stop=None): """ If plotting by azimuth, create visual bin separators so the user has a visual understanding of radiation patterns etc. :type start: int :param start: optional starting index to determine the bounds of the azimuth bins :type stop: int :param stop: optional stop index to determine the bounds of the azimuth bins """ azimuth_binsize = self.kwargs.get("azimuth_binsize", 45) # Look of the azimuth bin lines c = self.kwargs.get("azimuth_bin_c", "r") lw = self.kwargs.get("azimuth_bin_lw", .75) ls = self.kwargs.get("azimuth_bin_ls", "-") z = self.kwargs.get("azimuth_bin_zorder", 5) azimuth_bins = np.arange(self.azimuth_start_deg, self.azimuth_start_deg + 360, azimuth_binsize) # Make sure that the bins go from 0 <= azimuth_bins <= 360 azimuth_bins = azimuth_bins % 360 # Cut down the azimuth bins to the closest values if we are doing # multi-page record sections so that we don't end up plotting too many if start is not None and stop is not None: min_az = self.azimuths[self.sorted_idx[start:stop]].min() max_az = self.azimuths[self.sorted_idx[start:stop]].max() # Find the closest azimuth bins idx_start = np.argmin(np.abs(azimuth_bins - min_az)) idx_end = np.argmin(np.abs(azimuth_bins - max_az)) azimuth_bins = azimuth_bins[idx_start:idx_end] # In an absolute plot, the y values are simply the azimuth bins if "abs" in self.sort_by: y_vals = azimuth_bins s_vals = [f"{azbin}{DEG}" for azbin in azimuth_bins] # In a relative plot, the y values need to be found between seismograms else: s_vals, y_vals = [], [] # Brute force determine where these azimuth bins would fit into the # actual plotted azimuths, draw a line between adjacent seismograms # i.e., iterating and looking if the bin value fits between # two adjacent azimuth values for azbin in azimuth_bins: # Edge case for 0 deg azimuth bin since azimuth wraps on 0 # Look for the top minimum azimuth value, place line above that if azbin == 0: dy = abs(self.y_axis[1] - self.y_axis[0]) azis = self.azimuths[self.sorted_idx] i = max(np.where(azis == azis.min())[0]) y_vals.append(self.y_axis[i] + dy/2) else: for i, idx in enumerate(self.sorted_idx[1:]): j = i + 1 idx_minus_one = self.sorted_idx[i] azi_low = self.azimuths[idx] azi_high = self.azimuths[idx_minus_one] # Break if bin is in between azi values for two seismos if azi_low <= azbin <= azi_high: break else: continue # Mean gives the space in between two adjacent seismograms y_vals.append(np.mean([self.y_axis[i], self.y_axis[j]])) s_vals.append(f"{azbin}{DEG}") for y, (s_val, y_val) in enumerate(zip(s_vals, y_vals)): # Dealing with the case where two bins occupy the same space, # only plot the first one that occurs if y_val == y_vals[y-1]: continue plt.axhline(y=y_val, c=c, linewidth=lw, linestyle=ls, zorder=z) plt.text(x=max(self.stats.xmax), y=y_val, s=s_val, c=c, ha="right")
[docs] def _plot_axes(self, start=0, stop=None): """ Contains the logic in how to handle the x- and y-axis labels, ticks etc. Logic options for how to plot the y-axis: - Relative: Relative plotting means the yticklabels will get replaced by station information. This can either be on the right or left side but will be attached to the actual axis - Absolute: Absolute plotting means the y-axis actual represents something (e.g., distance, azimuth). That means labels can not replace the y-ticks and they have to be placed within the figure .. note:: Relative plotting can also place labels OFF the y-axis, at which point the y-axis is turned off because it contains no usable information :type start: int :param start: optional starting index for creating text labels :type stop: int :param stop: optional stop index for creating text labels """ # Sort out how the y-axis will be labeled with station information and # dist/azimuth if we're doing absolute sorting if "abs_" in self.sort_by: self._set_y_axis_absolute() if self.y_label_loc is not None: # By default, place absolute sorted labels on y-axis if self.y_label_loc == "default": loc = "y_axis_abs_right" else: loc = self.y_label_loc self._set_y_axis_text_labels(start=start, stop=stop, loc=loc) elif self.y_label_loc is not None: # By default, relative plotting shows labels on the right side if self.y_label_loc == "default": loc = "y_axis" else: loc = self.y_label_loc self._set_y_axis_text_labels(start=start, stop=stop, loc=loc) logger.info(f"placing station labels on y-axis at: {loc}") # User requests that we turn off y-axis if self.y_label_loc is None: logger.info(f"user requests turning off y-axis w/ " f"`y_label_loc`== None") self.ax.get_yaxis().set_visible(False) # OR EDGE CASE: Relative plotting but labels are not placed on the # y-axis, turn off the y-axis cause it doesn't mean anything anymore elif self.y_label_loc not in ["default", "y_axis", "y_axis_right"] and \ "abs" not in self.sort_by: logger.info("turning off y-axis as it contains no information") self.ax.get_yaxis().set_visible(False) # Reverse the y-axis if we are doing absolute y-axis so that smaller # values appear at the top, which is opposite of how the y-axis works. # !!! This also requires flipping the sign of the plotted data in # !!! _plot_trace() to deal with the axis flip. if "abs_" in self.sort_by and "_r" not in self.sort_by: self.ax.invert_yaxis() else: logger.info("user requests inverting y-axis with absolute " "reverse sort") # X-axis label is different if we time shift if self.time_shift_s.sum() == 0: plt.xlabel("Time [s]") else: plt.xlabel("Relative Time [s]") # Allow user defined x-axis limits if self.xlim_s is None: self.ax.set_xlim([min(self.stats.xmin), max(self.stats.xmax)]) else: self.ax.set_xlim(self.xlim_s) plt.tight_layout()
[docs] def _set_y_axis_absolute(self): """ If 'abs_' in sort_by, then the Y-axis should be shown in absolute scale. That means we need to format the text labels, add some labelling etc. """ # Reset tick label size to be larger to match absolute x-axis size ytick_fontsize = self.kwargs.get("ytick_fontsize", 12) self.ax.tick_params(axis="y", labelsize=ytick_fontsize) if "distance" in self.sort_by: # Dynamically determine y-axis ticks based on the the total span # of the y-axis ymin, ymax = self.ax.get_ylim() dist_span = ymax - ymin oom = np.floor(np.log10(dist_span)) _ytick_major = 10 ** oom _ytick_minor = _ytick_major / 2 ytick_minor = self.kwargs.get("ytick_minor", _ytick_minor) ytick_major = self.kwargs.get("ytick_major", _ytick_major) ylabel = f"Distance [{self.distance_units}]" elif "azimuth" in self.sort_by: ytick_minor = self.kwargs.get("ytick_minor", 45) ytick_major = self.kwargs.get("ytick_major", 90) ylabel = f"Azimuth [{DEG}]" # Set ytick label major and minor which is either dist or az self.ax.yaxis.set_major_locator(MultipleLocator(ytick_major)) self.ax.yaxis.set_minor_locator(MultipleLocator(ytick_minor)) self.ax.set_ylabel(ylabel)
[docs] def _set_y_axis_text_labels(self, start=0, stop=-1, loc="y_axis"): """ Plot a text label next to each trace describing the station, azimuth and source-receiver distance. We need to do this all at once because we have to replace all ticks and tick labels at once. .. note:: if using the 'subset' option in plot, need to also tell the y-axis plotter that we are only plotting a subset of data by using the `start` and `stop` parameters :type start: int :param start: starting index for plotting, default to start 0 :type stop: int :param stop: stop index for plotting, default to end -1 :type loc: str :param loc: location to place the y_axis text labels, available: - y_axis: Place labels along the y-axis (left side of the figure) Will replace the actual y-tick labels so not applicable for absolute sorting which requries the y-axis labels - y_axis_abs: for absolute plotting, place waveform labels to the left side of the figure (outside border), co-existing with y-axis labels - y_axis_right: same as `y_axis` but set on the right side of figure - x_min: Place labels on the waveforms at the minimum x value - x_max: Place labels on the waveforms at the maximum x value """ c = self.kwargs.get("y_label_c", "k") fontsize = self.kwargs.get("y_label_fontsize", 10) y_tick_labels = [] for idx in self.sorted_idx[start:stop]: str_id = self.station_ids[idx] if self.sort_by is not None and "backazimuth" in self.sort_by: # This is named `str_az` but it's actually backazimuths str_az = f"{self.backazimuths[idx]:6.2f}{DEG}" else: str_az = f"{self.azimuths[idx]:6.2f}{DEG}" # Allow degree distance to use the unicode * symbol if self.distance_units == "deg": du = DEG else: du = self.distance_units str_dist = f"{self.distances[idx]:5.2f}{du}" # Looks something like: NN.SSS.LL.CC|30*|250.03km label = \ f"{str_id:>{self.stats.longest_id}}|{str_az:>8}|{str_dist:>8}" # Add time shift if we have shifted at all if self.time_shift_s[idx] != 0: label += f"|{self.time_shift_s[idx]:.2f}s" y_tick_labels.append(label) # Generate a 'template' y-axis format to help Users decipher labels if self.sort_by is not None and "backazimuth" in self.sort_by: az_str = "BAZ" else: az_str = "AZ" # Y_FMT will include time shift IF there are time shifts y_fmt = f"NET.STA.LOC.CHA|{az_str}|DIST" if self.time_shift_s.sum() != 0: y_fmt += "|TSHIFT" # Option 1: Replacing y-axis tick labels with waveform labels # Set the y-axis labels to the left side of the left figure border if loc == "y_axis": # For relative plotting (not abs_), replace y_tyick labels with # station information self.ax.set_yticks(self.y_axis[start:stop]) self.ax.set_yticklabels(y_tick_labels) plt.text(-.01, .99, y_fmt, ha="right", va="top", transform=self.ax.transAxes, fontsize=fontsize) # Set the y-axis labels to the right side of the right figure border elif loc == "y_axis_right": self.ax.set_yticks(self.y_axis[start:stop]) self.ax.set_yticklabels(y_tick_labels) self.ax.yaxis.tick_right() self.ax.yaxis.set_label_position("right") plt.text(1.01, .99, y_fmt, ha="left", va="top", transform=self.ax.transAxes, fontsize=fontsize) # Option 2: Plotting labels as text objects, separate from y-axis labels else: # 2a: Set the y-axis labels inside the figure border (xmin or xmax) # Trying to figure out where the min or max X value is on the plot if loc == "x_min": ha = "left" va = "top" func = min x_val = func(self.stats.xmin) plt.text(.01, .99, y_fmt, ha=ha, va=va, transform=self.ax.transAxes, fontsize=fontsize) elif loc == "x_max": ha = "right" va = "top" func = max x_val = func(self.stats.xmax) plt.text(.99, .99, y_fmt, ha=ha, va=va, transform=self.ax.transAxes, fontsize=fontsize) # 2b: Set the y-axis labels outside figure border, co-existing with # y-labels showing distance or azimuth elif loc == "y_axis_abs": ha = "right" va = "center" func = min x_val = func(self.stats.xmin) plt.text(0, .99, y_fmt, ha=ha, va=va, transform=self.ax.transAxes, fontsize=fontsize) elif loc == "y_axis_abs_right": ha = "left" va = "center" func = max x_val = func(self.stats.xmax) plt.text(1., .99, y_fmt, ha=ha, va=va, transform=self.ax.transAxes, fontsize=fontsize) if self.xlim_s is not None: x_val = func([func(self.xlim_s), x_val]) # Plotting y-axis labels for absolute scales if len(self.y_axis) == len(self.st): for idx, s in zip(self.sorted_idx[start:stop], y_tick_labels): plt.text(x=x_val, y=self.y_axis[idx], s=s, ha=ha, va=va, c=c, fontsize=fontsize) # Plotting y-axis labels for relative scales elif len(self.y_axis) == len(y_tick_labels): for y, s in zip(self.y_axis, y_tick_labels): plt.text(x=x_val, y=y, s=s, ha=ha, va=va, c=c, fontsize=fontsize)
[docs] def _plot_title(self, nwav=None): """ Create the title of the plot based on event and station information Allow dynamic creation of title based on user input parameters :type nwav: int :param nwav: if using subset, the title needs to know how many waveforms it's showing on the page. self.plot() should tell it """ title = self.kwargs.get("title", None) if title is not None: self.ax.set_title(title) return # Defines the number of waveforms plotted on a single page, allowing # for subsets per page if nwav is None: nwav = len(self.sorted_idx) # T0: Zero pad is either a list of length or None if self.zero_pad_s: _start, _end = self.zero_pad_s else: _start, _end = 0, 0 # Origintime needs to be shifted by the zero pad offset value origintime = str(min([tr.stats.starttime + _start for tr in self.st])) # YYYY-MM-DDTHH:MM:SS.ssssssZ - > YYYY-MM-DD HH:MM:SS.ss origintime = origintime.replace("T", " ")[:-5] # HYPO: If only one event, show hypocenter information if self.stats.nevents == 1: sac = self.st[0].stats.sac # Accomodate unknown depth and magnitude if "evdp" in sac: evdp = f"{sac['evdp']:.2f}km" else: evdp = "None" if "mag" in sac: mag = f"M{sac['mag']:.2f}" else: mag = "None" # HYPO: lon, lat, depth, mag hypo = ( f"\nHYPO: ({sac['evlo']:.2f}{DEG}, {sac['evla']:.2f}{DEG}), " f"Z={evdp}, Mag={mag}" ) else: hypo = "" # CMP: Get the unique components that have been plotted only cmp = "".join(np.unique([self.st[i].stats.component for i in self.sorted_idx])) # GEOSPREAD: if geometric spreading scaling, post equation variables if self.scale_by == "geometric_spreading": geospread = (f" (k={self.geometric_spreading_k_val:.2E}, " f"f={self.geometric_spreading_factor:.2f})") else: geospread = "" # FILT: If preprocessing is turned OFF, no filtering was applied if self.preprocess is not None: filt = f"\nFILT: [{self.min_period_s}, {self.max_period_s}]s" else: filt = "" # MOVE_OUT: if move out was applied, post if self.move_out: move_out = f"\nMOVE_OUT: {self.move_out}{self.distance_units}/s" else: move_out = "" title = ( f"T0: {origintime}{hypo}\n" f"NWAV: {nwav}; NEVT: {self.stats.nevents}; " f"NSTA: {self.stats.nstation}; COMP: {cmp}\n" f"SORT_BY: {self.sort_by}; " f"SCALE_BY: {self.scale_by}{geospread}" # The following title parts are optional depending on application f"{filt}{move_out}" ) self.ax.set_title(title)
[docs] def run(self): """ Convenience run function to run the RecordSection workflow in order. No internal logic for breaking the figure up into multiple record sections. For that see main function `plotw_rs`. """ self.process_st() self.get_parameters() self.plot()
[docs] def parse_args(): """ Parse command line arguments to set record section parameters dynamically This arg parser provides a simplified interface for working with plotw_rs BUT it limits the flexibility of the code because things like long lists are prohibitively verbose and not included in the arguments. Kwargs can be passed in in the same format thanks to: https://stackoverflow.com/questions/37367331/is-it-possible-to-use-\ argparse-to-capture-an-arbitrary-set-of-optional-arguments .. note:: Not all parameters are set here, some are left as default values Also some parameters are set different than the class defaults, so that when the user runs record_section.py without arguments, they get a reasonable result .. note:: Do NOT use the command line if you want to exploit the expanded capabilities of the record section plotter, rather script it or call from an interactive environment. """ parser = argparse.ArgumentParser( description="Input basic record section params", # formatter_class=argparse.RawTextHelpFormatter, ) parser.add_argument("-p", "--pysep_path", default="./", type=str, nargs="?", help="path to Pysep output, which is expected to " "contain trace-wise SAC waveform files which will " "be read") parser.add_argument("--syn_path", default=None, type=str, nargs="?", help="path to SPECFEM generated synthetics. Also " "requires --cmtsolution_file and --stations_file") parser.add_argument("--cmtsolution", default=None, type=str, nargs="?", help="required for synthetics, path to the CMTSOLUTION " "file used to generate SPECFEM synthetics") parser.add_argument("--stations", default=None, type=str, nargs="?", help="required for synthetics, path to the STATIONS " "file used to generate SPECFEM synthetics") parser.add_argument("--sort_by", default="distance", type=str, nargs="?", help=textwrap.dedent(""" How to sort the Y-axis of the record section - None: Not set, don't sort, just iterate directly through Stream - 'alphabetical': sort alphabetically - 'azimuth': sort by source-receiver azimuth (deg) with constant vertical spacing - 'backazimuth': sort by source-receiver backazimuth (deg) with constant vertical spacing. Requires `azimuth_start_deg` - 'distance': sort by source-receiver distance (km) with constant vertical spacing. Smallest distances at top of figure. Requires `distance_units` - 'abs_distance': absolute vertical spacing of waveforms defined by source-receiver distance (km). Smallest distances at top of figure. Requires `distance_units` - 'abs_azimuth': absolute vertical spacing of waveforms defined by source-receiver azimuth (deg). Requires `azimuth_start_deg` - 'abs_backazimuth': absolute vertical spacing of waveforms by source-receiver backazimuth (deg). - '*_r': Add a '_r' to any of the values about to REVERSE the sort, e.g., alphabetical_r sort will go Z->A" """) ) parser.add_argument("--scale_by", default=None, type=str, nargs="?", help=textwrap.dedent(""" How to sort the Y-axis of the record section - None: Not set, no amplitude scaling, waveforms shown raw - 'normalize': scale each trace by the maximum amplitude, i.e., > a /= max(abs(a)) # where 'a' is time series amplitudes - 'global_norm': scale by the largest amplitude to be displayed on the screen. Will not consider waveforms which have been excluded on other basis (e.g., wrong component) """) ) parser.add_argument("--geometric_spreading_factor", default=0.5, type=float, help="Scale amplitudes by predicting the " "expected geometric spreading amplitude reduction " "and correcting for it. This defaults to 0.5. " "Optional related parameter: " "--geometric_spreading_k_val") parser.add_argument("--geometric_spreading_exclude", default=None, nargs="+", type=str, help="Comma separated list of stations to exclude from " "the geometric spreading equation. e.g., " "'STA1,STA2,STA3'") parser.add_argument("--geometric_spreading_ymax", default=None, nargs="?", type=float, help="Controls the y-max value on the geometric " "spreading plot.") parser.add_argument("--time_shift_s", default=None, nargs="?", help="Set a constant time shift in unit: seconds OR " "shift by a given phase arrival in SAC header") parser.add_argument("--move_out", default=None, type=float, nargs="?", help="Set a constant velocity-based move out in units:" "`distance_units`/s") parser.add_argument("--min_period_s", default=None, type=float, nargs="?", help="Minimum filter period in unit seconds.") parser.add_argument("--max_period_s", default=None, type=float, nargs="?", help="Maximum filter period in unit seconds.") parser.add_argument("--max_traces_per_rs", default=None, nargs="?", help="Max waveforms to show on one page. If the number " "of waveforms to show exceeds this value, " "multiple pages will be saved") parser.add_argument("--integrate", default=0, type=int, nargs="?", help="Integrate (positive values) or differentiate " "(negative values) `integrate` times. e.g., -2 " "will differentiate all waveforms twice.") parser.add_argument("--xlim_s", default=None, type=int, nargs="+", help="Min and max x limits in units seconds. Input as " "a list, e.g., --xlim_s 10 200 ...") parser.add_argument("--zero_pad_s", default=None, type=float, nargs="+", help="Zero pad the start and end of each trace. Input " "as two values in units of seconds." "i.e., --zero_pad_s `START` `END` or" "e.g., --zero_pad_s 30 0 to pad 30s before start " "0s at end of trace") parser.add_argument("--components", default="ZRTNE12", type=str, nargs="?", help="Ordered component list specifying 1) which " "components will be shown, and in what order. " "e.g., 'ZR' will show only Z and R components " "with Z sorted above R.") parser.add_argument("--y_label_loc", default="default", type=str, nargs="?", help="Station information label location on the y-axis") parser.add_argument("--y_axis_spacing", default=1, type=float, nargs="?", help="For relative sorting, the y-axis spacing between " "adjacent seismograms. If waveforms are " "normalized then a default value of 1 is usually " "normalized then a default value of 1 is usually " "fine") parser.add_argument("--amplitude_scale_factor", default=1, type=float, nargs="?", help="A user dial allowing waveform amplitudes to be" "scaled by an arbitrary float value") parser.add_argument("--azimuth_start_deg", default=0, type=float, nargs="?", help="When sorting by azimuth, choose the default " "starting value, with azimuth 0 <= az <= 360") parser.add_argument("--distance_units", default="km", type=str, nargs="?", help="Set units when sorting by distance") parser.add_argument("--tmarks", default=None, type=float, nargs="+", help="Plot vertical lines at given reference" "times [s]. Input as a list of values, " "e.g., --tmarks 0 100 200") parser.add_argument("--save", default="./record_section.png", type=str, nargs="?", help="Path to save the resulting record section fig") parser.add_argument("-o", "--overwrite", default=False, action="store_true", help="overwrite existing figure if path exists") parser.add_argument("--cartesian", default=False, action="store_true", help="Let RecSec know that your domain is defined in" "Cartesian coordinates. Used for SPECFEM2D " "and SPECFEM3D_Cartesian domains defined in XYZ.") parser.add_argument("--synsyn", default=False, action="store_true", help="Let RecSec know that both `pysep_path` and " "`syn_path` should be read in as SPECFEM-" "generated synthetics.") parser.add_argument("-L", "--log_level", default="DEBUG", type=str, help="verbosity of logger: 'WARNING', 'INFO', 'DEBUG'") # Keyword arguments can be passed directly to the argparser in the same # format as the above kwargs (e.g., --linewidth 2), but they will not have # help messages or type checking parsed, unknown = parser.parse_known_args() for arg in unknown: if arg.startswith(("-", "--")): parser.add_argument(arg.split("=")[0]) return parser
[docs] def plotw_rs(*args, **kwargs): """ Plot Waveform Record Section (main call function for RecordSection) Instantiates the RecordSection class, runs processing and parameter getting, and then plots record section. Contains additional logic for breaking up figures into multiple pages if requested by the user, while keeping sort order and waveform spacing consistent across multiple reord sections. .. note:: See RecordSection.__init__() for acceptable args and kwargs """ _start = datetime.now() logger.info(f"starting record section plotter") rs = RecordSection(*args, **kwargs) rs.process_st() rs.get_parameters() # Simple case where all waveforms will fit on one page if len(rs.sorted_idx) <= rs.max_traces_per_rs: rs.plot() # More complicated case where we need to split onto multiple pages else: # Iterate backwards through the range so that the order of pages is # more natural, i.e., plotting the record section from the top down, # rather than the bottom up rnge = np.arange(len(rs.sorted_idx), 0, -1 * rs.max_traces_per_rs) # When `max_traces_per_rs` is not an integer divisor of the number of # streams, the range will not hit zero, so we need to ensure it does if rnge[-1] != 0: rnge = np.append(rnge, 0) for i, stop in enumerate(rnge[:-1]): j = i + 1 start = rnge[j] rs.plot(subset=[start, stop], page_num=j) _end = datetime.now() logger.info(f"finished record section in t={(_end - _start)}s")
[docs] def main(): """ Convenience 'main' function to play nice with entry scripts .. rubric:: $ recsec -h """ parser = parse_args() if len(sys.argv) == 1: parser.print_help() sys.exit(0) plotw_rs(**vars(parser.parse_args()))
if __name__ == "__main__": main()