This documentation is not for the latest stable Salvus version.
segy files included with this notebook. Each segy file contains a set of receiver measurements for a single source position. Before we can start with performing any sort of data analysis, we want to bring this raw data into SalvusProject and visualize what it looks like.import os
SALVUS_FLOW_SITE_NAME = os.environ.get("SITE_NAME", "local")
PROJECT_DIR = "project"import zipfile
import numpy as np
import obspy
from obspy.io.segy.segy import _read_segy
import salvus.namespace as sn
import utilsd = sn.domain.dim2.BoxDomain(
x0=-41.0,
x1=41.0,
y0=-30.0,
y1=0.0,
)
p = sn.Project.from_domain(path=PROJECT_DIR, domain=d, load_if_exists=True)
p.domain.plot()segy file in accordance with the standards outlined by the Society of Exploration Geophysicists (SEG) here.obspy.read(...) is excellent at dealing with a variety of different common seismic data formats so this may be a good point to start from.with open(filename) as f: data = f.readlines()
_read_segy(...) within obspy.io.segy.segy. to import the segy data.with zipfile.ZipFile("./data/data.zip") as fh:
fh.extractall("./data")shot_00.segy:segy_data = _read_segy("data/shot_00.segy")segy file contains the metadata for the shotgather. Let's get the header and print it to see what it contains.header = segy_data.traces[0].header
print(header)trace_sequence_number_within_line: 0 trace_sequence_number_within_segy_file: 1 original_field_record_number: 0 trace_number_within_the_original_field_record: 0 energy_source_point_number: 0 ensemble_number: 0 trace_number_within_the_ensemble: 0 trace_identification_code: 12 number_of_vertically_summed_traces_yielding_this_trace: 0 number_of_horizontally_stacked_traces_yielding_this_trace: 0 data_use: 0 distance_from_center_of_the_source_point_to_the_center_of_the_receiver_group: 0 receiver_group_elevation: 0 surface_elevation_at_source: 0 source_depth_below_surface: 0 datum_elevation_at_receiver_group: 0 datum_elevation_at_source: 0 water_depth_at_source: 0 water_depth_at_group: 0 scalar_to_be_applied_to_all_elevations_and_depths: 0 scalar_to_be_applied_to_all_coordinates: -100 source_coordinate_x: -3000 source_coordinate_y: 0 group_coordinate_x: -2400 group_coordinate_y: 0 coordinate_units: 0 weathering_velocity: 0 subweathering_velocity: 0 uphole_time_at_source_in_ms: 0 uphole_time_at_group_in_ms: 0 source_static_correction_in_ms: 0 group_static_correction_in_ms: 0 total_static_applied_in_ms: 0 lag_time_A: 0 lag_time_B: 0 delay_recording_time: 0 mute_time_start_time_in_ms: 0 mute_time_end_time_in_ms: 0 number_of_samples_in_this_trace: 9790 sample_interval_in_ms_for_this_trace: 55 gain_type_of_field_instruments: 0 instrument_gain_constant: 0 instrument_early_or_initial_gain: 0 correlated: 0 sweep_frequency_at_start: 0 sweep_frequency_at_end: 0 sweep_length_in_ms: 0 sweep_type: 0 sweep_trace_taper_length_at_start_in_ms: 0 sweep_trace_taper_length_at_end_in_ms: 0 taper_type: 0 alias_filter_frequency: 0 alias_filter_slope: 0 notch_filter_frequency: 0 notch_filter_slope: 0 low_cut_frequency: 0 high_cut_frequency: 0 low_cut_slope: 0 high_cut_slope: 0 year_data_recorded: 1969 day_of_year: 365 hour_of_day: 23 minute_of_hour: 59 second_of_minute: 59 time_basis_code: 0 trace_weighting_factor: 0 geophone_group_number_of_roll_switch_position_one: 0 geophone_group_number_of_trace_number_one: 0 geophone_group_number_of_last_trace: 0 gap_size: 0 over_travel_associated_with_taper: 0 x_coordinate_of_ensemble_position_of_this_trace: 0 y_coordinate_of_ensemble_position_of_this_trace: 0 for_3d_poststack_data_this_field_is_for_in_line_number: 0 for_3d_poststack_data_this_field_is_for_cross_line_number: 0 shotpoint_number: 0 scalar_to_be_applied_to_the_shotpoint_number: 0 trace_value_measurement_unit: 0 transduction_constant_mantissa: 0 transduction_constant_exponent: 0 transduction_units: 0 device_trace_identifier: 0 scalar_to_be_applied_to_times: 0 source_type_orientation: 0 source_energy_direction_mantissa: 0 source_energy_direction_exponent: 0 source_measurement_mantissa: 0 source_measurement_exponent: 0 source_measurement_unit: 0
segy file. However, there is only a handful of information we're actually interested in:[n_samples].segy file only contains integer values, the source/receiver coordinates (for example header.source_coordinate_x and header.source_coordinate_y) must be scaled by the factor given in header.scalar_to_be_applied_to_all_coordinates to achieve the correct decimal values (see the aforementioned SEG standards here for more details).# Apply no scaling if it is not provided
if header.scalar_to_be_applied_to_all_coordinates == 0:
coord_scale_fac = 1
# Have the scaling factor act as a divisor if it is negative
elif header.scalar_to_be_applied_to_all_coordinates < 0:
coord_scale_fac = -1 / header.scalar_to_be_applied_to_all_coordinates
# Have the scaling factor act as a multiplier if it is positive
else:
coord_scale_fac = header.scalar_to_be_applied_to_all_coordinatesobspy.Stream object which contains the information on the seismic traces along with the relevant metadata. To do this, we'll first create a list of obspy.Trace objects:# To keep the numbering of the sources neat, we'll add some zero padding to the
# source names
src_zero_padding = 2
# Define the name of the source position
src_name = str(header.shotpoint_number).zfill(src_zero_padding)
# Initialize an empty list where we can store the `obspy.Trace` objects
traces = []
# Iterate over the traces
for trace in segy_data.traces:
# Get some of the receiver metadata
rec_nr = trace.header.trace_sequence_number_within_line
# Define the names of the receivers; add zero padding to the receiver names
# based on how many receivers are in the gather
rec_zero_padding = int(np.log(len(segy_data.traces)))
rec_name = str(rec_nr).zfill(rec_zero_padding)
# Convert the trace to an obspy trace to make it a little easier to work with
stats = trace.to_obspy_trace().stats
# Package everything for the given trace into an `obspy.Trace`
tr = obspy.Trace(
data=trace.data,
header={
"station": rec_name,
"starttime": stats.starttime,
"endtime": stats.endtime,
"sampling_rate": stats.sampling_rate,
"receiver_location": np.array(
[
trace.header.group_coordinate_x,
trace.header.group_coordinate_y,
]
)
* coord_scale_fac,
"source_location": np.array(
[header.source_coordinate_x, header.source_coordinate_y]
)
* coord_scale_fac,
"source_id": src_name,
},
)
traces.append(tr)
# Convert the traces to an obspy.Stream object
shotgather = obspy.Stream(traces)obspy.Stream object, we can use some of the handy utility functions within the included utils module to bring this into Salvus. Note that this utils module is open source in case you wish to take a look "under-the-hood" to further customize how your data is imported into Salvus.ExternalData object and plot out where the source/receiver positions for the given dataset are.external_data = utils.ExternalData(shotgather)
external_data.plot_shots()
external_data.plot_receivers(src_name)external_data.shots[0].plot_trace(10)# Add the event to the project p.events.add(...) # Note that here we assume that there is only a single component to the data receiver_data = np.zeros([N_RECEIVERS, 1, traces.N_POINTS]) for i, trace in enumerate(traces): receiver_data[i, 0, :] = trace.data with tempfile.TemporaryDirectory() as tmpdir: with h5py.File(..., "w") as f: f.create_group("point") f["point"].attrs["sampling_rate_in_hertz"] = ... f["point"].attrs["start_time_in_seconds"] = ... f["point"]["velocity"] = receiver_data f["point"]["velocity"].attrs["PHYSICS"] = "elastic".encode() f["coordinates_ELASTIC_point"] = ... f["names_ELASTIC_point"] = ... p.waveforms.add_external( data_name="survey_data", event=..., data_filename=..., )
add_to_project method within the accompanying utils.external_data.add_to_project(p)p.waveforms.get(
"EXTERNAL_DATA:survey_data",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="X",
)"X" component despite being the vertical component of the velocity. A subsequent mapping of the data will be applied during the processing to map the "X" component to the vertical component.# We will keep a list of obspy.Stream objects
shotgathers = []
# Start at index 1 since we've already added shot 0 in the previous section
for i in range(1, 27):
event_name = f"shot_{str(i).zfill(2)}"
filename = f"{event_name}.segy"
segy_data = _read_segy(f"data/{filename}")
header = segy_data.traces[0].header
# Apply no scaling if it is not provided
if header.scalar_to_be_applied_to_all_coordinates == 0:
coord_scale_fac = 1
# Have the scaling factor act as a divisor if it is negative
elif header.scalar_to_be_applied_to_all_coordinates < 0:
coord_scale_fac = -1 / header.scalar_to_be_applied_to_all_coordinates
# Have the scaling factor act as a multiplier if it is positive
else:
coord_scale_fac = header.scalar_to_be_applied_to_all_coordinates
# Define the name of the source position
src_name = str(header.shotpoint_number).zfill(src_zero_padding)
# Initialize an empty list where we can store the `obspy.Trace` objects
traces = []
# Iterate over the traces
for trace in segy_data.traces:
# Get some of the receiver metadata
rec_nr = trace.header.trace_sequence_number_within_line
# Define the names of the receivers; add zero padding to the receiver
# names based on how many receivers are in the gather
rec_zero_padding = int(np.log(len(segy_data.traces)))
rec_name = str(rec_nr).zfill(rec_zero_padding)
# Convert the trace to an obspy trace to make it a little easier to work with
stats = trace.to_obspy_trace().stats
# Package everything for the given trace into an `obspy.Trace`
tr = obspy.Trace(
data=trace.data,
header={
"station": rec_name,
"starttime": stats.starttime,
"endtime": stats.endtime,
"sampling_rate": stats.sampling_rate,
"receiver_location": np.array(
[
trace.header.group_coordinate_x,
trace.header.group_coordinate_y,
]
)
* coord_scale_fac,
"source_location": np.array(
[header.source_coordinate_x, header.source_coordinate_y]
)
* coord_scale_fac,
"source_id": src_name,
},
)
traces.append(tr)
shotgathers.append(obspy.Stream(traces))external_data = utils.ExternalData(shotgathers)
external_data.add_to_project(p)p.viz.nb.domain()bandpass(min_frequency, max_frequency)normalizetime_shift(time_in_seconds)data_name argument by separating the name of the data and the processing fragment with the | operator. Here's an example:p.waveforms.get( "EXTERNAL_DATA:survey_data | bandpass(20, 80) | normalize | timeshift(0.1)", ... )
src_name = str(9).zfill(src_zero_padding)
p.waveforms.get(
"EXTERNAL_DATA:survey_data",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="X",
)normalize processing fragment:p.waveforms.get(
"EXTERNAL_DATA:survey_data | normalize",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="X",
)for i in [0, 13, 26]:
src_name = str(i).zfill(src_zero_padding)
p.waveforms.get(
"EXTERNAL_DATA:survey_data | normalize",
events=src_name,
)[0].plot(
receiver_field="velocity",
component="X",
)