In this tutorial we will explain:
- How to get bulk statistics on WellLog data created
- How to fetch WellLog bulk statistics code examples
Note: Computable WellLog's curves data types are: integer, float and date/datetime.
Computable WellLog's curves data types are: integer, float and date/datetime.
Before to start to write bulk data through Wellbore DDMS API's, you will need to install the Python packages below:
- The pandas module and its Pandas.Dataframe json format to structure log bulk data to be written to the Wellbore DDMS.
- The pyarrow module to transform Pandas.Dataframe to parquet file through the pyarrow engine.
- The httpx module that allows to post request to the Wellbore DDMS.
# Prerequisite to run this notebook
!python -m pip install pip --upgrade
!pip install pandas numpy httpx pyarrowFor any call to Wellbore DDMS API's you need to pass into the header of the request a valid bearer token. This token can be obtained from any API catalog on the developer portal. You will need first to request a developer base subscription. Then from the developer base subscription pick any API and execute it. A valid bearer token is returns in the Curl section of the response. Copy this token value and assign it to the TOKEN variable below.
TOKEN = '' # Paste here the token without the bearer prefixHelper functions used in the different sample scripts of this tutorial.
from typing import List
import httpx
import pandas as pd
import numpy as np
import io
from IPython.display import display_html, display, HTML
from itertools import chain, cycle
def generate_df_typed(columns, index):
def gen_values(col_name, size):
if col_name.startswith('float'):
return np.random.random_sample(size=size)
if col_name.startswith('str'):
return [f'string_value_{i}' for i in range(size)]
if col_name.startswith('bool'):
return np.random.choice(a=[False, True], size=size)
if col_name.startswith('date'):
return (np.datetime4('2021-01-01') + days for days in range(size))
return np.random.randint(-100, 1000, size=size)
df = pd.DataFrame({c: gen_values(c, len(index))
for c in columns}, index=index)
return df
def multi_table(table_list):
'''Acceps a list of IpyTable objects and returns a table which contains each IpyTable in a cell'''
return HTML(
'<table><tr style="background-color:white;">' +
''.join(['<td>' + table._repr_html_() + '</td>' for table in table_list]) +
'</tr></table>'
)
def gen_color(color):
def fct(val=None):
return f'color: {color}'
return fct
def display_operation(before, sent, after):
colors = ['blue', 'green', 'orange', 'red']
color_fct = [gen_color(c) for c in colors]
sent_st = [sent[i].style.set_caption(f'chunk {i+1} sent').applymap(color_fct[i]) for i in range(len(sent))]
def color_output(s):
res = []
for r in s.index:
c = ''
for i in range(len(sent)):
if s.name in sent[i] and int(r) in sent[i][s.name]:
c = color_fct[i]()#f'color: {colors[i]}'
res.append(c)
return res
margin = '65'
after_st = after.style.set_table_attributes(f"style='margin-left:{margin}px'").apply(color_output).highlight_null(null_color='lightyellow').set_caption('Final data - After session commit')
display(multi_table([before.style.set_table_attributes(f"style='margin-right:{margin}px'").set_caption('Initial data - Before session'), *sent_st, after_st]))
def display_side_by_side(dfs:list, captions:list):
"""Display tables side by side to save vertical space
Input:
dfs: list of pandas.DataFrame
captions: list of table captions
"""
output = ""
combined = dict(zip(captions, dfs))
for caption, df in combined.items():
output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
output += "\xa0\xa0\xa0"
display(HTML(output))
def generate_df(columns: List[str], index):
nbrows = len(index)
df = pd.DataFrame(
np.random.randint(-100, 1000, size=(nbrows, len(columns))), index=index)
df.columns= columns
return df
def print_response(resp):
print(f'{resp.request.method} : {resp.url} -> {resp.status_code}')
if resp.status_code != httpx.codes.OK:
display(resp.content)
def create_df_from_response(response):
"""Returns a dataframe created from the WellLog bulk data response
Input:
response: a httpx.response object
Output:
dataframe: a pandas.dataframe object
"""
content_type = response.headers.get('content-type')
if content_type == 'application/json':
return pd.DataFrame.from_dict(response.json())
elif content_type == 'application/x-parquet':
f = io.BytesIO(response.content)
f.seek(0)
return pd.read_parquet(f)
raise ValueError(f"Unknown content-type: '{content_type}'")
def display_previous_and_current_well_log_data_versions(record_id):
"""Display the previous and current WellLog data versions for a given record id and highlight differences between them.
Input:
record_id: a WellLog record id
"""
# list record version
results_response = client.get(f'{welllog_dms_url}/{record_id}/versions')
wellLog_versions_response = results_response.json()
versions = wellLog_versions_response['versions']
is_previous_results = False
is_current_results = False
if len(versions) >= 2:
previous_version_id = versions[len(versions)-2]
curl = f'{welllog_dms_url}/{record_id}/versions/{previous_version_id}/data'
results_response = client.get(curl)
if results_response.status_code == 200:
previous_results = create_df_from_response(results_response)
is_previous_results = True
current_version_id = versions[len(versions)-1]
curl = f'{welllog_dms_url}/{record_id}/versions/{current_version_id}/data'
results_response = client.get(curl)
if results_response.status_code == 200:
current_results = create_df_from_response(results_response)
is_current_results = True
colors = ['blue', 'red']
color_fct = [gen_color(c) for c in colors]
def color_output(s):
res = []
for r in s.index:
c = ''
if s.name in previous_results and int(r) in previous_results[s.name]:
c = color_fct[0]()
else:
c = color_fct[1]()
res.append(c)
return res
margin = '65'
tables = []
if is_previous_results:
previous_results_st = previous_results.style.set_table_attributes(f"style='margin-left:{margin}px'").highlight_null(null_color='lightyellow').set_caption('Previous WellLog data version').applymap(color_fct[0])
tables.append(previous_results_st)
if is_current_results:
if is_previous_results:
current_results_st = current_results.style.set_table_attributes(f"style='margin-left:{margin}px'").apply(color_output).highlight_null(null_color='lightyellow').set_caption('Current WellLog data version with data chunks added in red')
tables.append(current_results_st)
else:
current_results_st = current_results.style.set_table_attributes(f"style='margin-left:{margin}px'").highlight_null(null_color='lightyellow').set_caption('Current WellLog data version')
tables.append(current_results_st)
display(multi_table(tables))Several settings as the base url end-point and the data partition id to create a WellLog to the Wellbore DDMS. Please change those settings accordingly to the environment settings that you want to target.
base_url = "" # set a base URL value
data_partition_id = "" # set a data partition id
legal_tag = "" # set a valid legal tag in the data partition
acl_domain = "" # set an Access Control Lists (ACL) domain
welllog_dms_url = f'{base_url}/api/os-wellbore-ddms/ddms/v3/welllogs'
client = httpx.Client(verify=False,
headers={
"data-partition-id": f"{data_partition_id}",
"Authorization": f"Bearer {TOKEN}",
},
timeout=120
)
# Create a new WellLog. Here is a fake body just to illustrate the API use
record = {
"kind": "osdu:wks:work-product-component--WellLog:1.0.0",
"acl": {
"viewers": [f"data.default.viewers@{data_partition_id}.{acl_domain}"],
"owners": [f"data.default.owners@{data_partition_id}.{acl_domain}"]
},
"legal": {
"legaltags": [f"{legal_tag}"],
"otherRelevantDataCountries": ["US"],
},
"data": {""
"WellboreID": "namespace:master-data--Wellbore:SomeUniqueWellboreID:",
"Curves": [
{
"CurveID": "MD",
"NumberOfColumns": 1
},
{
"CurveID": "X",
"NumberOfColumns": 1
}
]
},
}The script below is creating a WellLog record that is used in this tutorial to demonstrate how to write WellLog bulk data to the Wellbore DDMS.
response = client.post(welllog_dms_url, json=[record])
print_response(response)
record_id = response.json()["recordIds"][0]
record_idCurrent limitations to manually trigger the WellLog bulk data statistics computation are:
- In case of error, a delay of 1h before triggering the computation again for the same WellLog data.
- A maximum of 3 attempts to compute statistics for a given WellLog data is allowed.
record = {
"kind": f"{data_partition}:wks:work-product-component--WellLog:1.0.0",
"acl": {
"viewers": [f"data.default.viewers@{data_partition}.{acl_domain}"],
"owners": [f"data.default.owners@{data_partition}.{acl_domain}"]
},
"legal": {
"legaltags": [legal_tag],
"otherRelevantDataCountries": ["US"],
},
"data": {
"Curves": [
{"CurveID": 'int-A'},
{"CurveID": 'int-A-with-nan'},
{"CurveID": 'float-B'},
{"CurveID": 'float-B-with-nan'},
{"CurveID": 'date-C'},
{"CurveID": 'date-C-with-nan'},
{"CurveID": 'bool-D'},
{"CurveID": 'string-E'},
]
},
}
response_typed = client.post(welllog_url, json=[record])
print_response(response_typed)
types_col_record_id = response_typed.json()["recordIds"][0]
print(types_col_record_id)POST : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs -> 200
columns = ['int-A', 'int-A-with-nan', 'float-B', 'float-B-with-nan', 'bool-D', 'string-E', 'date-C', 'date-C-with-nan']
different_type_df = generate_df_typed(columns, range(100_000))
add_nan_values_to_df(different_type_df)
display(different_type_df)
data_to_send = different_type_df.to_parquet(engine='pyarrow')
write_response = client.post(f'{welllog_url}/{types_col_record_id}/data', data=data_to_send, headers={'content-type': 'application/parquet'})
print_response(write_response)| int-A | int-A-with-nan | float-B | float-B-with-nan | bool-D | string-E | date-C | date-C-with-nan | |
|---|---|---|---|---|---|---|---|---|
| 0 | 868 | 592.0 | 0.792575 | 0.113692 | False | string_value_0 | 2021-01-01 00:00:00.000 | 2021-01-01 00:00:00.000 |
| 1 | 222 | 624.0 | 0.529602 | 0.047647 | True | string_value_1 | 2021-01-01 00:00:00.001 | NaT |
| 2 | 842 | 359.0 | 0.184516 | 0.783715 | True | string_value_2 | 2021-01-01 00:00:00.002 | NaT |
| 3 | 879 | 280.0 | 0.526019 | 0.288487 | False | string_value_3 | 2021-01-01 00:00:00.003 | NaT |
| 4 | 456 | 619.0 | 0.512207 | 0.373447 | True | string_value_4 | 2021-01-01 00:00:00.004 | 2021-01-01 00:00:00.004 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99995 | 560 | 220.0 | 0.714021 | 0.064975 | False | string_value_99995 | 2021-01-01 00:01:39.995 | 2021-01-01 00:01:39.995 |
| 99996 | 861 | 78.0 | 0.663497 | 0.560253 | False | string_value_99996 | 2021-01-01 00:01:39.996 | 2021-01-01 00:01:39.996 |
| 99997 | 916 | 100.0 | 0.687259 | NaN | False | string_value_99997 | 2021-01-01 00:01:39.997 | 2021-01-01 00:01:39.997 |
| 99998 | 354 | 933.0 | 0.563194 | 0.921421 | False | string_value_99998 | 2021-01-01 00:01:39.998 | 2021-01-01 00:01:39.998 |
| 99999 | -14 | 812.0 | 0.976996 | 0.644540 | True | string_value_99999 | 2021-01-01 00:01:39.999 | NaT |
Note: NaT means Not a Time, equivalent of NaN for dates.
POST : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/data -> 200
Computation can be manually triggered if WellLog was created before M12 release or in case of computation error.
API to use: POST /ddms/v3/welllogs/{record_id}/versions/{version}/data/statistics
NOTE: record id and record version are required.
welllog_meta_response = client.get(f'{welllog_url}/{record_id}')
print_response(welllog_meta_response)
record_version = welllog_meta_response.json()['version']
post_welllog_stats_response = client.post(f'{welllog_url}/{record_id}/versions/{record_version}/data/statistics', headers={'content-type': 'application/parquet'})
print_response(post_welllog_stats_response)GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:89bd0debbcf1411fb240d0a906da7cd4 -> 200 POST : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:89bd0debbcf1411fb240d0a906da7cd4/versions/1653990573032433/data/statistics -> 200
Please, note that WellLog's curves with string and boolean data types are not computed
# Leave the parameter "curves" empty will select all the WellLog's curves statistics available
select_all_curves_param = {}
# To select only curves: 'int-A', 'float-B' and 'date-C' from computed statistics
select_specific_curves_params = {"curves": "int-A,float-B,date-C"}
%time post_welllog_stats_response = client.get(f'{welllog_url}/{types_col_record_id}/data/statistics', param=select_all_curves_param)
print_response(post_welllog_stats_response)
json_posted_stats = post_welllog_stats_response.json()
display(json_posted_stats)CPU times: total: 141 ms Wall time: 2.01 s GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/data/statistics -> 200
{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete', 'data': {'int-A-with-nan': {'mean': '449.8925764705882', 'std': '317.8241340241287', 'min': '-100.0', '10%': '10.0', '50%': '450.0', '90%': '890.0', 'max': '999.0', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}, 'int-A': {'mean': '449.63618', 'std': '317.49388998642064', 'min': '-100.0', '10%': '9.0', '50%': '451.0', '90%': '890.0', 'max': '999.0', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}, 'date-C': {'mean': '2021-01-01 00:00:49.999499776', 'std': 'NaN', 'min': '2021-01-01 00:00:00', '10%': '2021-01-01 00:00:09.999899904', '50%': '2021-01-01 00:00:49.999500032', '90%': '2021-01-01 00:01:29.999100160', 'max': '2021-01-01 00:01:39.999000', 'totalCount': '100000', 'nonAbsentValuesCount': '100000'}, 'date-C-with-nan': {'mean': '2021-01-01 00:00:49.995083776', 'std': 'NaN', 'min': '2021-01-01 00:00:00', '10%': '2021-01-01 00:00:10.055899904', '50%': '2021-01-01 00:00:49.966500096', '90%': '2021-01-01 00:01:29.970099968', 'max': '2021-01-01 00:01:39.998000', 'totalCount': '100000', 'nonAbsentValuesCount': '85000'}, 'float-B-with-nan': {'mean': '0.4981784333593075', 'std': '0.2888887958106551', 'min': '3.56818820279603e-06', '10%': '0.09984358734025209', '50%': '0.4974533393916296', '90%': '0.8997088186446737', 'max': '0.9999888136429178', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}, 'float-B': {'mean': '0.4980908954221541', 'std': '0.2885563043838143', 'min': '1.4538739944169876e-06', '10%': '0.09894710442338889', '50%': '0.4969224934210345', '90%': '0.8987151676695218', 'max': '0.9999918891377975', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}}}
json_posted_stats_copy = response_copy_without_data(post_welllog_stats_response)
display(json_posted_stats_copy)
create_df_from_dict(post_welllog_stats_response){'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete'}
| mean | std | min | 10% | 50% | 90% | max | totalCount | nonAbsentValuesCount | |
|---|---|---|---|---|---|---|---|---|---|
| int-A-with-nan | 449.8925764705882 | 317.8241340241287 | -100.0 | 10.0 | 450.0 | 890.0 | 999.0 | 100000 | 85000.0 |
| date-C | 2021-01-01 00:00:49.999499776 | NaN | 2021-01-01 00:00:00 | 2021-01-01 00:00:09.999899904 | 2021-01-01 00:00:49.999500032 | 2021-01-01 00:01:29.999100160 | 2021-01-01 00:01:39.999000 | 100000 | 100000 |
| float-B-with-nan | 0.4981784333593075 | 0.2888887958106551 | 3.56818820279603e-06 | 0.09984358734025209 | 0.4974533393916296 | 0.8997088186446737 | 0.9999888136429178 | 100000 | 85000.0 |
| float-B | 0.4980908954221541 | 0.2885563043838143 | 1.4538739944169876e-06 | 0.09894710442338889 | 0.4969224934210345 | 0.8987151676695218 | 0.9999918891377975 | 100000 | 100000.0 |
| int-A | 449.63618 | 317.49388998642064 | -100.0 | 9.0 | 451.0 | 890.0 | 999.0 | 100000 | 100000.0 |
| date-C-with-nan | 2021-01-01 00:00:49.995083776 | NaN | 2021-01-01 00:00:00 | 2021-01-01 00:00:10.055899904 | 2021-01-01 00:00:49.966500096 | 2021-01-01 00:01:29.970099968 | 2021-01-01 00:01:39.998000 | 100000 | 85000 |
APIs:
- GET /ddms/v3/welllogs/{record_id}/data/statistics
- GET /ddms/v3/welllogs/{record_id}/versions/{version}/data/statistics
As the GET bulk API, you can fetch statistics only for specifics curves
- GET /ddms/v3/welllogs/{record_id}/data?describe=True
welllog_bulk_response_example_1 = client.get(f'{welllog_url}/{types_col_record_id}/data', params={'describe':True})
record_columns = welllog_bulk_response_example_1.json().get('columns', [])
display(welllog_bulk_response_example_1.json())
wanted_curves = [c for c in record_columns if c.startswith('float')]
print("\nWanted WellLog's curves", wanted_curves){'numberOfRows': 100000, 'columns': ['bool-D', 'date-C', 'date-C-with-nan', 'float-B', 'float-B-with-nan', 'int-A', 'int-A-with-nan', 'string-E']}
Wanted WellLog's curves ['float-B', 'float-B-with-nan']
- GET /ddms/v3/welllogs/{record_id}/data/statistics
# Generate list of wanted curves from previous describe=True API.
wanted_curves_params = {
'curves': ','.join(wanted_curves)
}
welllog_stats_response_example_1 = client.get(f'{welllog_url}/{types_col_record_id}/data/statistics', params=wanted_curves_params)
print_response(welllog_stats_response_example_1)
json_posted_stats_example_1 = welllog_stats_response_example_1.json()
display(json_posted_stats_example_1){'curves': 'float-B,float-B-with-nan'} GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/data/statistics?curves=float-B%2Cfloat-B-with-nan -> 200
{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete', 'data': {'float-B': {'mean': '0.4980908954221541', 'std': '0.2885563043838143', 'min': '1.4538739944169876e-06', '10%': '0.09894710442338889', '50%': '0.4969224934210345', '90%': '0.8987151676695218', 'max': '0.9999918891377975', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}, 'float-B-with-nan': {'mean': '0.4981784333593075', 'std': '0.2888887958106551', 'min': '3.56818820279603e-06', '10%': '0.09984358734025209', '50%': '0.4974533393916296', '90%': '0.8997088186446737', 'max': '0.9999888136429178', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}}}
- GET /ddms/v3/welllogs/{welllogId}/versions
welllog_record_response_example_2 = client.get(f'{welllog_url}/{types_col_record_id}/versions')
record_versions = welllog_record_response_example_2.json().get('versions', [])
print("record_versions:", record_versions)
last_record_version = record_versions[-1]record_versions: [1653988602340449, 1653988624188709]
- GET /ddms/v3/welllogs/{record_id}/versions/{version}/data/statistics
wanted_curves_params = {
'curves': ','.join(wanted_curves)
}
record_version = last_record_version
welllog_stats_response_example_2 = client.get(f'{welllog_url}/{types_col_record_id}/versions/{record_version}/data/statistics', params=wanted_curves_params)
print_response(welllog_stats_response_example_2)
json_posted_stats_example_2 = welllog_stats_response_example_2.json()
display(json_posted_stats_example_2)GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/versions/1653988624188709/data/statistics?curves=float-B%2Cfloat-B-with-nan -> 200
{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete', 'data': {'float-B': {'mean': '0.4980908954221541', 'std': '0.2885563043838143', 'min': '1.4538739944169876e-06', '10%': '0.09894710442338889', '50%': '0.4969224934210345', '90%': '0.8987151676695218', 'max': '0.9999918891377975', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}, 'float-B-with-nan': {'mean': '0.4981784333593075', 'std': '0.2888887958106551', 'min': '3.56818820279603e-06', '10%': '0.09984358734025209', '50%': '0.4974533393916296', '90%': '0.8997088186446737', 'max': '0.9999888136429178', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}}}