segy
files included with this notebook. Each segy
file
contains a set of receiver measurements for a single source position. Before
we can start with performing any sort of data analysis, we want to bring this
raw data into SalvusProject and visualize what it looks like.import os
SALVUS_FLOW_SITE_NAME = os.environ.get("SITE_NAME", "local")
PROJECT_DIR = "project"
import zipfile
import numpy as np
import obspy
from obspy.io.segy.segy import _read_segy
import xarray as xr
import salvus.namespace as sn
from salvus.data import external_data_to_hdf5
import utils
d = sn.domain.dim2.BoxDomain(
x0=-41.0,
x1=41.0,
y0=-30.0,
y1=0.0,
)
p = sn.Project.from_domain(path=PROJECT_DIR, domain=d, load_if_exists=True)
p.domain.plot()
segy
file in accordance with the standards outlined by the
Society of Exploration Geophysicists (SEG)
here.obspy.read(...)
is excellent at dealing with a variety of different
common seismic data formats so this may be a good point to start from.with open(filename) as f: data = f.readlines()
_read_segy(...)
within
obspy.io.segy.segy.
to import the segy
data.with zipfile.ZipFile("./data/data.zip") as fh:
fh.extractall("./data")
shot_00.segy
:segy_data = _read_segy("data/shot_00.segy")
segy
file contains the metadata for the shotgather.
Let's get the header and print it to see what it contains.header = segy_data.traces[0].header
print(header)
trace_sequence_number_within_line: 0 trace_sequence_number_within_segy_file: 1 original_field_record_number: 0 trace_number_within_the_original_field_record: 0 energy_source_point_number: 0 ensemble_number: 0 trace_number_within_the_ensemble: 0 trace_identification_code: 12 number_of_vertically_summed_traces_yielding_this_trace: 0 number_of_horizontally_stacked_traces_yielding_this_trace: 0 data_use: 0 distance_from_center_of_the_source_point_to_the_center_of_the_receiver_group: 0 receiver_group_elevation: 0 surface_elevation_at_source: 0 source_depth_below_surface: 0 datum_elevation_at_receiver_group: 0 datum_elevation_at_source: 0 water_depth_at_source: 0 water_depth_at_group: 0 scalar_to_be_applied_to_all_elevations_and_depths: 0 scalar_to_be_applied_to_all_coordinates: -100 source_coordinate_x: -3000 source_coordinate_y: 0 group_coordinate_x: -2400 group_coordinate_y: 0 coordinate_units: 0 weathering_velocity: 0 subweathering_velocity: 0 uphole_time_at_source_in_ms: 0 uphole_time_at_group_in_ms: 0 source_static_correction_in_ms: 0 group_static_correction_in_ms: 0 total_static_applied_in_ms: 0 lag_time_A: 0 lag_time_B: 0 delay_recording_time: 0 mute_time_start_time_in_ms: 0 mute_time_end_time_in_ms: 0 number_of_samples_in_this_trace: 9790 sample_interval_in_ms_for_this_trace: 55 gain_type_of_field_instruments: 0 instrument_gain_constant: 0 instrument_early_or_initial_gain: 0 correlated: 0 sweep_frequency_at_start: 0 sweep_frequency_at_end: 0 sweep_length_in_ms: 0 sweep_type: 0 sweep_trace_taper_length_at_start_in_ms: 0 sweep_trace_taper_length_at_end_in_ms: 0 taper_type: 0 alias_filter_frequency: 0 alias_filter_slope: 0 notch_filter_frequency: 0 notch_filter_slope: 0 low_cut_frequency: 0 high_cut_frequency: 0 low_cut_slope: 0 high_cut_slope: 0 year_data_recorded: 1969 day_of_year: 365 hour_of_day: 23 minute_of_hour: 59 second_of_minute: 59 time_basis_code: 0 trace_weighting_factor: 0 geophone_group_number_of_roll_switch_position_one: 0 geophone_group_number_of_trace_number_one: 0 geophone_group_number_of_last_trace: 0 gap_size: 0 over_travel_associated_with_taper: 0 x_coordinate_of_ensemble_position_of_this_trace: 0 y_coordinate_of_ensemble_position_of_this_trace: 0 for_3d_poststack_data_this_field_is_for_in_line_number: 0 for_3d_poststack_data_this_field_is_for_cross_line_number: 0 shotpoint_number: 0 scalar_to_be_applied_to_the_shotpoint_number: 0 trace_value_measurement_unit: 0 transduction_constant_mantissa: 0 transduction_constant_exponent: 0 transduction_units: 0 device_trace_identifier: 0 scalar_to_be_applied_to_times: 0 source_type_orientation: 0 source_energy_direction_mantissa: 0 source_energy_direction_exponent: 0 source_measurement_mantissa: 0 source_measurement_exponent: 0 source_measurement_unit: 0
segy
file.
However, there is only a handful of information we're actually interested in:[n_samples]
.segy
file only contains integer values, the source/receiver
coordinates (for example header.source_coordinate_x
and
header.source_coordinate_y
) must be scaled by the factor given in
header.scalar_to_be_applied_to_all_coordinates
to achieve the correct
decimal values (see the aforementioned SEG standards
here for more details).# Apply no scaling if it is not provided
if header.scalar_to_be_applied_to_all_coordinates == 0:
coord_scale_fac = 1
# Have the scaling factor act as a divisor if it is negative
elif header.scalar_to_be_applied_to_all_coordinates < 0:
coord_scale_fac = -1 / header.scalar_to_be_applied_to_all_coordinates
# Have the scaling factor act as a multiplier if it is positive
else:
coord_scale_fac = header.scalar_to_be_applied_to_all_coordinates
obspy.Stream
object which
contains the information on the seismic traces along with the relevant
metadata. To do this, we'll first create a list of obspy.Trace
objects:# To keep the numbering of the sources neat, we'll add some zero padding to the
# source names
src_zero_padding = 2
# Define the name of the source position
src_name = str(header.shotpoint_number).zfill(src_zero_padding)
# Initialize an empty list where we can store the `obspy.Trace` objects
traces = []
# Iterate over the traces
for trace in segy_data.traces:
# Get some of the receiver metadata
rec_nr = trace.header.trace_sequence_number_within_line
# Define the names of the receivers; add zero padding to the receiver names
# based on how many receivers are in the gather
rec_zero_padding = int(np.log(len(segy_data.traces)))
rec_name = str(rec_nr).zfill(rec_zero_padding)
# Convert the trace to an obspy trace to make it a little easier to work
# with
stats = trace.to_obspy_trace().stats
# Package everything for the given trace into an `obspy.Trace`
tr = obspy.Trace(
data=trace.data,
header={
"station": rec_name,
"starttime": stats.starttime,
"endtime": stats.endtime,
"sampling_rate": stats.sampling_rate,
"receiver_location": np.array(
[
trace.header.group_coordinate_x,
trace.header.group_coordinate_y,
]
)
* coord_scale_fac,
"source_location": np.array(
[header.source_coordinate_x, header.source_coordinate_y]
)
* coord_scale_fac,
"source_id": src_name,
},
)
traces.append(tr)
# Convert the traces to an obspy.Stream object
shotgather = obspy.Stream(traces)
obspy.Stream
object, we can
use some of the handy utility functions within the included utils
module to
visualize the dataset. Note that this utils
module is open source in case
you wish to take a look "under-the-hood" to further customize how your data
is imported into Salvus.ExternalData
object and plot out where the source/receiver
positions for the given dataset are.external_data = utils.ExternalData(shotgather)
external_data.plot_shots()
external_data.plot_receivers(src_name)
external_data.shots[0].plot_trace(10)
event = sn.Event(
event_name=src_name,
sources=[
sn.simple_config.source.cartesian.VectorPoint2D(
x=shotgather.traces[0].stats.source_location[0],
y=shotgather.traces[0].stats.source_location[1],
fx=0.0,
fy=-1.0,
)
],
receivers=[
sn.simple_config.receiver.cartesian.Point2D(
x=trace.stats.receiver_location[0],
y=trace.stats.receiver_location[1],
network_code="XX",
station_code=f"{trace.stats.station}",
fields=["velocity"],
)
for trace in shotgather.traces
],
)
p += event
p.waveforms.add_external_data()
for this. The function takes the data as an
xarray.Dataset or xarray.DataArray object. Xarray provides a convenient
interface to deal with structured data. It enables storing multi-dimensional
arrays together with information on the coordinates of each data point.
Salvus requires the xarray dataset to adhere to a specific format. The data
should have a 3D shape of (number_of_receivers, number_of_components,
number_of_time_samples). So let us arrange our data in that way.# The following gives us a data array of shape (no_of_receivers,
# no_of_time_samples)
data = np.array([tr.data for tr in shotgather.traces])
# Since we only consider vertical component data here, we add a singleton
# dimension.
data = np.expand_dims(data, 1)
# Which should now give us the correct dimensions (49 receivers, 1 component,
# 9790 samples).
data.shape
(49, 1, 9790)
"receiver_id"
, "component"
, and "time"
. We additionally
need to specify the coordinates of each data point along these three
dimensions. We do this by specifying the coordinates like this. For
"receiver_id"
, we simply assign a unique number to each receiver, the
component we set to "Y", which corresponds to the vertical component in the
Salvus coordinate frame for 2D simulations. As the time coordinate, we
provide the time axis.dims = ["receiver_id", "component", "time"]
coords = {
"receiver_id": np.arange(data.shape[0]),
"component": ["Y"],
"time": np.arange(data.shape[2]) * shotgather[0].stats.delta,
}
"receiver_field"
specifying what field the recorded data corresponds
to. In this case, we are looking at data from geophones, which record
"particle velocity". We set this as the name of our DataArray."dim"
telling Salvus that we are considering a 2D project here.da = xr.DataArray(
data=data,
dims=dims,
coords=coords,
attrs={
"dim": 2, # The number of dimensions considered in our project.
},
name="velocity",
)
da.plot()
<matplotlib.collections.QuadMesh at 0x76c4c0da1cd0>
p.waveforms.add_external(data_name="survey_data", data=da, event=event)
p.waveforms.get(
"EXTERNAL_DATA:survey_data",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="Y",
)
# ## Adding the Remaining Shots
#
# To add the remaining shots to the project, let's use the same process as
# above, but with everything wrapped in a single loop.
# We will keep a list of obspy.Stream objects
shotgathers = []
# Start at index 1 since we've already added shot 0 in the previous section
for i in range(1, 27):
event_name = f"shot_{str(i).zfill(2)}"
filename = f"{event_name}.segy"
segy_data = _read_segy(f"data/{filename}")
header = segy_data.traces[0].header
# Apply no scaling if it is not provided
if header.scalar_to_be_applied_to_all_coordinates == 0:
coord_scale_fac = 1
# Have the scaling factor act as a divisor if it is negative
elif header.scalar_to_be_applied_to_all_coordinates < 0:
coord_scale_fac = -1 / header.scalar_to_be_applied_to_all_coordinates
# Have the scaling factor act as a multiplier if it is positive
else:
coord_scale_fac = header.scalar_to_be_applied_to_all_coordinates
# Define the name of the source position
src_name = str(header.shotpoint_number).zfill(src_zero_padding)
# Initialize an empty list where we can store the `obspy.Trace` objects
traces = []
# Iterate over the traces
for trace in segy_data.traces:
# Get some of the receiver metadata
rec_nr = trace.header.trace_sequence_number_within_line
# Define the names of the receivers; add zero padding to the receiver
# names based on how many receivers are in the gather
rec_zero_padding = int(np.log(len(segy_data.traces)))
rec_name = str(rec_nr).zfill(rec_zero_padding)
# Convert the trace to an obspy trace to make it a little easier to
# work with
stats = trace.to_obspy_trace().stats
# Package everything for the given trace into an `obspy.Trace`
tr = obspy.Trace(
data=trace.data,
header={
"station": rec_name,
"starttime": stats.starttime,
"endtime": stats.endtime,
"sampling_rate": stats.sampling_rate,
"receiver_location": np.array(
[
trace.header.group_coordinate_x,
trace.header.group_coordinate_y,
]
)
* coord_scale_fac,
"source_location": np.array(
[header.source_coordinate_x, header.source_coordinate_y]
)
* coord_scale_fac,
"source_id": src_name,
},
)
traces.append(tr)
shotgathers.append(obspy.Stream(traces))
for shotgather in shotgathers:
event = sn.Event(
event_name=shotgather.traces[0].stats.source_id,
sources=[
sn.simple_config.source.cartesian.VectorPoint2D(
x=shotgather.traces[0].stats.source_location[0],
y=shotgather.traces[0].stats.source_location[1],
fx=0.0,
fy=-1.0,
)
],
receivers=[
sn.simple_config.receiver.cartesian.Point2D(
x=trace.stats.receiver_location[0],
y=trace.stats.receiver_location[1],
network_code="XX",
station_code=f"{trace.stats.station}",
fields=["velocity"],
)
for trace in shotgather.traces
],
)
p += event
data = np.array([tr.data for tr in shotgather.traces])
data = np.expand_dims(data, 1)
da = xr.DataArray(
data=data,
dims=dims,
coords=coords,
attrs={
"dim": 2, # The number of dimensions considered in our project.
},
name="velocity",
)
p.waveforms.add_external(data_name="survey_data", data=da, event=event)
p.viz.nb.domain()
bandpass(min_frequency, max_frequency)
normalize
time_shift(time_in_seconds)
data_name
argument by separating the
name of the data and the processing fragment with the |
operator. Here's
an example:p.waveforms.get( "EXTERNAL_DATA:survey_data | bandpass(20, 80) | normalize | timeshift(0.1)", ... )
src_name = str(9).zfill(src_zero_padding)
p.waveforms.get(
"EXTERNAL_DATA:survey_data",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="Y",
)
normalize
processing fragment:p.waveforms.get(
"EXTERNAL_DATA:survey_data | normalize",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="Y",
)
for i in [0, 13, 26]:
src_name = str(i).zfill(src_zero_padding)
p.waveforms.get(
"EXTERNAL_DATA:survey_data | normalize",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="Y",
)