Last updated

Introduction

In this tutorial we will explain:

  • How to get bulk statistics on WellLog data created
  • How to fetch WellLog bulk statistics code examples

Note: Computable WellLog's curves data types are: integer, float and date/datetime.

Computable WellLog's curves data types are: integer, float and date/datetime.

Prerequisites

Required Python packages

Before to start to write bulk data through Wellbore DDMS API's, you will need to install the Python packages below:

  • The pandas module and its Pandas.Dataframe json format to structure log bulk data to be written to the Wellbore DDMS.
  • The pyarrow module to transform Pandas.Dataframe to parquet file through the pyarrow engine.
  • The httpx module that allows to post request to the Wellbore DDMS.
# Prerequisite to run this notebook
!python -m pip install pip --upgrade
!pip install pandas numpy httpx pyarrow

Authorization

For any call to Wellbore DDMS API's you need to pass into the header of the request a valid bearer token. This token can be obtained from any API catalog on the developer portal. You will need first to request a developer base subscription. Then from the developer base subscription pick any API and execute it. A valid bearer token is returns in the Curl section of the response. Copy this token value and assign it to the TOKEN variable below.

TOKEN = '' # Paste here the token without the bearer prefix

Utility methods

Helper functions used in the different sample scripts of this tutorial.
from typing import List
import httpx
import pandas as pd
import numpy as np
import io
from IPython.display import display_html, display, HTML
from itertools import chain, cycle

def generate_df_typed(columns, index):
    def gen_values(col_name, size):
        if col_name.startswith('float'):
            return np.random.random_sample(size=size)
        if col_name.startswith('str'):
            return [f'string_value_{i}' for i in range(size)]
        if col_name.startswith('bool'):
            return np.random.choice(a=[False, True], size=size) 
        if col_name.startswith('date'):
            return (np.datetime4('2021-01-01') + days for days in range(size))
        return np.random.randint(-100, 1000, size=size)

    df = pd.DataFrame({c: gen_values(c, len(index))
                      for c in columns}, index=index)
    return df

def multi_table(table_list):
    '''Acceps a list of IpyTable objects and returns a table which contains each IpyTable in a cell'''
    return HTML(
        '<table><tr style="background-color:white;">' + 
        ''.join(['<td>' + table._repr_html_() + '</td>' for table in table_list]) +
        '</tr></table>'
    )

def gen_color(color):
    def fct(val=None):
         return f'color: {color}'
    return fct

def display_operation(before, sent, after):
    colors = ['blue', 'green', 'orange', 'red']
    color_fct = [gen_color(c) for c in colors]
    sent_st = [sent[i].style.set_caption(f'chunk {i+1} sent').applymap(color_fct[i]) for i in range(len(sent))]
    def color_output(s):
        res = []
        for r in s.index:
            c = ''
            for i in range(len(sent)):
                if s.name in sent[i] and int(r) in sent[i][s.name]:
                    c = color_fct[i]()#f'color: {colors[i]}'
            res.append(c)
        return res

    margin = '65'
    after_st = after.style.set_table_attributes(f"style='margin-left:{margin}px'").apply(color_output).highlight_null(null_color='lightyellow').set_caption('Final data - After session commit')   
    display(multi_table([before.style.set_table_attributes(f"style='margin-right:{margin}px'").set_caption('Initial data - Before session'), *sent_st, after_st]))
    
def display_side_by_side(dfs:list, captions:list):
    """Display tables side by side to save vertical space
    Input:
        dfs: list of pandas.DataFrame
        captions: list of table captions
    """
    output = ""
    combined = dict(zip(captions, dfs))
    for caption, df in combined.items():
        output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
        output += "\xa0\xa0\xa0"
    display(HTML(output))
    
def generate_df(columns: List[str], index):
    nbrows = len(index)
    df = pd.DataFrame(
        np.random.randint(-100, 1000, size=(nbrows, len(columns))), index=index)
    df.columns= columns
    return df


def print_response(resp):
    print(f'{resp.request.method} : {resp.url} -> {resp.status_code}')
    if resp.status_code != httpx.codes.OK:
        display(resp.content)

        
def create_df_from_response(response):
    """Returns a dataframe created from the WellLog bulk data response
    Input:
        response: a httpx.response object
    Output:
        dataframe: a pandas.dataframe object
    """
    content_type = response.headers.get('content-type')
    
    if content_type == 'application/json':
        return pd.DataFrame.from_dict(response.json())
    
    elif content_type == 'application/x-parquet':
        f = io.BytesIO(response.content)
        f.seek(0)
        return pd.read_parquet(f)
    
    raise ValueError(f"Unknown content-type: '{content_type}'")
    
def display_previous_and_current_well_log_data_versions(record_id):
    """Display the previous and current WellLog data versions for a given record id and highlight differences between them.
    Input:
        record_id: a WellLog record id
    """
    # list record version
    results_response = client.get(f'{welllog_dms_url}/{record_id}/versions')
    wellLog_versions_response = results_response.json()
    versions = wellLog_versions_response['versions']
    
    is_previous_results = False
    is_current_results = False
    if len(versions) >= 2:
        previous_version_id = versions[len(versions)-2]
        curl = f'{welllog_dms_url}/{record_id}/versions/{previous_version_id}/data'
        results_response = client.get(curl)
        if results_response.status_code == 200:
            previous_results = create_df_from_response(results_response)
            is_previous_results = True
    
        current_version_id = versions[len(versions)-1]
        curl = f'{welllog_dms_url}/{record_id}/versions/{current_version_id}/data'
        results_response = client.get(curl)
        if results_response.status_code == 200:
            current_results = create_df_from_response(results_response)
            is_current_results = True
    
    colors = ['blue', 'red']
    color_fct = [gen_color(c) for c in colors]
    def color_output(s):
        res = []        
        for r in s.index:
            c = ''
            if s.name in previous_results and int(r) in previous_results[s.name]:
                c = color_fct[0]()
            else:
                c = color_fct[1]()
            res.append(c)
        return res
    
    margin = '65'
    tables = []
    if is_previous_results:
        previous_results_st = previous_results.style.set_table_attributes(f"style='margin-left:{margin}px'").highlight_null(null_color='lightyellow').set_caption('Previous WellLog data version').applymap(color_fct[0])  
        tables.append(previous_results_st)
        
    if is_current_results:
        if is_previous_results:
            current_results_st = current_results.style.set_table_attributes(f"style='margin-left:{margin}px'").apply(color_output).highlight_null(null_color='lightyellow').set_caption('Current WellLog data version with data chunks added in red')
            tables.append(current_results_st)
        else:
            current_results_st = current_results.style.set_table_attributes(f"style='margin-left:{margin}px'").highlight_null(null_color='lightyellow').set_caption('Current WellLog data version') 
            tables.append(current_results_st)
        
    display(multi_table(tables))

Settings

Several settings as the base url end-point and the data partition id to create a WellLog to the Wellbore DDMS. Please change those settings accordingly to the environment settings that you want to target.

base_url = "" # set a base URL value
data_partition_id = "" # set a data partition id
legal_tag = "" # set a valid legal tag in the data partition 
acl_domain = "" # set an Access Control Lists (ACL) domain

welllog_dms_url = f'{base_url}/api/os-wellbore-ddms/ddms/v3/welllogs'

client = httpx.Client(verify=False,
    headers={
        "data-partition-id": f"{data_partition_id}",
        "Authorization": f"Bearer {TOKEN}",
    },
    timeout=120
)

# Create a new WellLog. Here is a fake body just to illustrate the API use
record = {
    "kind": "osdu:wks:work-product-component--WellLog:1.0.0",
    "acl": {
        "viewers": [f"data.default.viewers@{data_partition_id}.{acl_domain}"],
        "owners": [f"data.default.owners@{data_partition_id}.{acl_domain}"]
      },
    "legal": {
        "legaltags": [f"{legal_tag}"],
        "otherRelevantDataCountries": ["US"],
    },
    "data": {""
        "WellboreID": "namespace:master-data--Wellbore:SomeUniqueWellboreID:",
        "Curves": [
            {
                "CurveID": "MD",
                "NumberOfColumns": 1
            },
            {
                "CurveID": "X",
                "NumberOfColumns": 1
            }
        ]
    },
}

Create a WellLog record

The script below is creating a WellLog record that is used in this tutorial to demonstrate how to write WellLog bulk data to the Wellbore DDMS.

response = client.post(welllog_dms_url, json=[record])
print_response(response)
record_id = response.json()["recordIds"][0]
record_id

2. How to trigger WellLog bulk data statistics computation

Current limitations to manually trigger the WellLog bulk data statistics computation are:

  • In case of error, a delay of 1h before triggering the computation again for the same WellLog data.
  • A maximum of 3 attempts to compute statistics for a given WellLog data is allowed.

Create WellLog record

record = {
    "kind": f"{data_partition}:wks:work-product-component--WellLog:1.0.0",
    "acl": {
        "viewers": [f"data.default.viewers@{data_partition}.{acl_domain}"],
        "owners": [f"data.default.owners@{data_partition}.{acl_domain}"]
      },
    "legal": {
        "legaltags": [legal_tag],
        "otherRelevantDataCountries": ["US"],
    },
    "data": {
        "Curves": [
            {"CurveID": 'int-A'},
            {"CurveID": 'int-A-with-nan'},
            {"CurveID": 'float-B'},
            {"CurveID": 'float-B-with-nan'}, 
            {"CurveID": 'date-C'}, 
            {"CurveID": 'date-C-with-nan'},
            {"CurveID": 'bool-D'},
            {"CurveID": 'string-E'},
        ]
    },
}

response_typed = client.post(welllog_url, json=[record])
print_response(response_typed)
types_col_record_id = response_typed.json()["recordIds"][0]
print(types_col_record_id)

POST : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs -> 200

POST WellLog data

columns = ['int-A', 'int-A-with-nan', 'float-B', 'float-B-with-nan', 'bool-D', 'string-E', 'date-C', 'date-C-with-nan']

different_type_df = generate_df_typed(columns, range(100_000))
add_nan_values_to_df(different_type_df)
display(different_type_df)

data_to_send = different_type_df.to_parquet(engine='pyarrow')

write_response = client.post(f'{welllog_url}/{types_col_record_id}/data', data=data_to_send, headers={'content-type': 'application/parquet'})
print_response(write_response)
int-Aint-A-with-nanfloat-Bfloat-B-with-nanbool-Dstring-Edate-Cdate-C-with-nan
0868592.00.7925750.113692Falsestring_value_02021-01-01 00:00:00.0002021-01-01 00:00:00.000
1222624.00.5296020.047647Truestring_value_12021-01-01 00:00:00.001NaT
2842359.00.1845160.783715Truestring_value_22021-01-01 00:00:00.002NaT
3879280.00.5260190.288487Falsestring_value_32021-01-01 00:00:00.003NaT
4456619.00.5122070.373447Truestring_value_42021-01-01 00:00:00.0042021-01-01 00:00:00.004
...........................
99995560220.00.7140210.064975Falsestring_value_999952021-01-01 00:01:39.9952021-01-01 00:01:39.995
9999686178.00.6634970.560253Falsestring_value_999962021-01-01 00:01:39.9962021-01-01 00:01:39.996
99997916100.00.687259NaNFalsestring_value_999972021-01-01 00:01:39.9972021-01-01 00:01:39.997
99998354933.00.5631940.921421Falsestring_value_999982021-01-01 00:01:39.9982021-01-01 00:01:39.998
99999-14812.00.9769960.644540Truestring_value_999992021-01-01 00:01:39.999NaT

Note: NaT means Not a Time, equivalent of NaN for dates.

POST : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/data -> 200

Trigger bulk statistics computation

Computation can be manually triggered if WellLog was created before M12 release or in case of computation error.

API to use: POST /ddms/v3/welllogs/{record_id}/versions/{version}/data/statistics

NOTE: record id and record version are required.

welllog_meta_response = client.get(f'{welllog_url}/{record_id}')
print_response(welllog_meta_response)
record_version = welllog_meta_response.json()['version']

post_welllog_stats_response = client.post(f'{welllog_url}/{record_id}/versions/{record_version}/data/statistics', headers={'content-type': 'application/parquet'})
print_response(post_welllog_stats_response)

GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:89bd0debbcf1411fb240d0a906da7cd4 -> 200 POST : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:89bd0debbcf1411fb240d0a906da7cd4/versions/1653990573032433/data/statistics -> 200

3. How to fetch WellLog bulk data statistics already computed

Please, note that WellLog's curves with string and boolean data types are not computed

Display statistics data as JSON

# Leave the parameter "curves" empty will select all the WellLog's curves statistics available
select_all_curves_param = {}

 # To select only curves: 'int-A', 'float-B' and 'date-C' from computed statistics
select_specific_curves_params = {"curves": "int-A,float-B,date-C"}

%time post_welllog_stats_response = client.get(f'{welllog_url}/{types_col_record_id}/data/statistics', param=select_all_curves_param)
print_response(post_welllog_stats_response)

json_posted_stats = post_welllog_stats_response.json()
display(json_posted_stats)

CPU times: total: 141 ms Wall time: 2.01 s GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/data/statistics -> 200

{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete', 'data': {'int-A-with-nan': {'mean': '449.8925764705882', 'std': '317.8241340241287', 'min': '-100.0', '10%': '10.0', '50%': '450.0', '90%': '890.0', 'max': '999.0', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}, 'int-A': {'mean': '449.63618', 'std': '317.49388998642064', 'min': '-100.0', '10%': '9.0', '50%': '451.0', '90%': '890.0', 'max': '999.0', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}, 'date-C': {'mean': '2021-01-01 00:00:49.999499776', 'std': 'NaN', 'min': '2021-01-01 00:00:00', '10%': '2021-01-01 00:00:09.999899904', '50%': '2021-01-01 00:00:49.999500032', '90%': '2021-01-01 00:01:29.999100160', 'max': '2021-01-01 00:01:39.999000', 'totalCount': '100000', 'nonAbsentValuesCount': '100000'}, 'date-C-with-nan': {'mean': '2021-01-01 00:00:49.995083776', 'std': 'NaN', 'min': '2021-01-01 00:00:00', '10%': '2021-01-01 00:00:10.055899904', '50%': '2021-01-01 00:00:49.966500096', '90%': '2021-01-01 00:01:29.970099968', 'max': '2021-01-01 00:01:39.998000', 'totalCount': '100000', 'nonAbsentValuesCount': '85000'}, 'float-B-with-nan': {'mean': '0.4981784333593075', 'std': '0.2888887958106551', 'min': '3.56818820279603e-06', '10%': '0.09984358734025209', '50%': '0.4974533393916296', '90%': '0.8997088186446737', 'max': '0.9999888136429178', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}, 'float-B': {'mean': '0.4980908954221541', 'std': '0.2885563043838143', 'min': '1.4538739944169876e-06', '10%': '0.09894710442338889', '50%': '0.4969224934210345', '90%': '0.8987151676695218', 'max': '0.9999918891377975', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}}}

Display statistics data in a Dataframe

json_posted_stats_copy = response_copy_without_data(post_welllog_stats_response)
display(json_posted_stats_copy)

create_df_from_dict(post_welllog_stats_response)

{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete'}

meanstdmin10%50%90%maxtotalCountnonAbsentValuesCount
int-A-with-nan449.8925764705882317.8241340241287-100.010.0450.0890.0999.010000085000.0
date-C2021-01-01 00:00:49.999499776NaN2021-01-01 00:00:002021-01-01 00:00:09.9998999042021-01-01 00:00:49.9995000322021-01-01 00:01:29.9991001602021-01-01 00:01:39.999000100000100000
float-B-with-nan0.49817843335930750.28888879581065513.56818820279603e-060.099843587340252090.49745333939162960.89970881864467370.999988813642917810000085000.0
float-B0.49809089542215410.28855630438381431.4538739944169876e-060.098947104423388890.49692249342103450.89871516766952180.9999918891377975100000100000.0
int-A449.63618317.49388998642064-100.09.0451.0890.0999.0100000100000.0
date-C-with-nan2021-01-01 00:00:49.995083776NaN2021-01-01 00:00:002021-01-01 00:00:10.0558999042021-01-01 00:00:49.9665000962021-01-01 00:01:29.9700999682021-01-01 00:01:39.99800010000085000

4. Fetch WellLog bulk statistics code examples

APIs:

  • GET /ddms/v3/welllogs/{record_id}/data/statistics
  • GET /ddms/v3/welllogs/{record_id}/versions/{version}/data/statistics

Select specifics curves statistics

As the GET bulk API, you can fetch statistics only for specifics curves

Fetch WellLog bulk information and retrieve columns from it

  • GET /ddms/v3/welllogs/{record_id}/data?describe=True
welllog_bulk_response_example_1 = client.get(f'{welllog_url}/{types_col_record_id}/data', params={'describe':True})
record_columns = welllog_bulk_response_example_1.json().get('columns', [])
display(welllog_bulk_response_example_1.json())

wanted_curves = [c for c in record_columns if c.startswith('float')]
print("\nWanted WellLog's curves", wanted_curves)

{'numberOfRows': 100000, 'columns': ['bool-D', 'date-C', 'date-C-with-nan', 'float-B', 'float-B-with-nan', 'int-A', 'int-A-with-nan', 'string-E']}

Wanted WellLog's curves ['float-B', 'float-B-with-nan']

Fetch WellLog bulk statistics of selected WellLog's curves

  • GET /ddms/v3/welllogs/{record_id}/data/statistics
# Generate list of wanted curves from previous describe=True API.
wanted_curves_params = {
    'curves': ','.join(wanted_curves)
}

welllog_stats_response_example_1 = client.get(f'{welllog_url}/{types_col_record_id}/data/statistics', params=wanted_curves_params)
print_response(welllog_stats_response_example_1)

json_posted_stats_example_1 = welllog_stats_response_example_1.json()
display(json_posted_stats_example_1)

{'curves': 'float-B,float-B-with-nan'} GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/data/statistics?curves=float-B%2Cfloat-B-with-nan -> 200

{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete', 'data': {'float-B': {'mean': '0.4980908954221541', 'std': '0.2885563043838143', 'min': '1.4538739944169876e-06', '10%': '0.09894710442338889', '50%': '0.4969224934210345', '90%': '0.8987151676695218', 'max': '0.9999918891377975', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}, 'float-B-with-nan': {'mean': '0.4981784333593075', 'std': '0.2888887958106551', 'min': '3.56818820279603e-06', '10%': '0.09984358734025209', '50%': '0.4974533393916296', '90%': '0.8997088186446737', 'max': '0.9999888136429178', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}}}

Fetch WellLog bulk statistics of anterior WellLog bulk version

List WellLog anterior versions

  • GET /ddms/v3/welllogs/{welllogId}/versions
welllog_record_response_example_2 = client.get(f'{welllog_url}/{types_col_record_id}/versions')
record_versions = welllog_record_response_example_2.json().get('versions', [])

print("record_versions:", record_versions)
last_record_version = record_versions[-1]

record_versions: [1653988602340449, 1653988624188709]

Then, fetch WellLog bulk statistics at specified version

  • GET /ddms/v3/welllogs/{record_id}/versions/{version}/data/statistics
wanted_curves_params = {
    'curves': ','.join(wanted_curves)
}

record_version = last_record_version

welllog_stats_response_example_2 = client.get(f'{welllog_url}/{types_col_record_id}/versions/{record_version}/data/statistics', params=wanted_curves_params)
print_response(welllog_stats_response_example_2)

json_posted_stats_example_2 = welllog_stats_response_example_2.json()
display(json_posted_stats_example_2)

GET : https://<ENVIRONMENT_BASE_URL>/api/os-wellbore-ddms/ddms/v3/welllogs/osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9/versions/1653988624188709/data/statistics?curves=float-B%2Cfloat-B-with-nan -> 200

{'computationStartDatetime': '2022-05-31T09:17:08.431223', 'recordId': 'osdu:work-product-component--WellLog:ac6ec4b8074941b19c4723b1dbdc0da9', 'recordVersion': 1653988624188709, 'computationStatus': 'complete', 'data': {'float-B': {'mean': '0.4980908954221541', 'std': '0.2885563043838143', 'min': '1.4538739944169876e-06', '10%': '0.09894710442338889', '50%': '0.4969224934210345', '90%': '0.8987151676695218', 'max': '0.9999918891377975', 'totalCount': '100000', 'nonAbsentValuesCount': '100000.0'}, 'float-B-with-nan': {'mean': '0.4981784333593075', 'std': '0.2888887958106551', 'min': '3.56818820279603e-06', '10%': '0.09984358734025209', '50%': '0.4974533393916296', '90%': '0.8997088186446737', 'max': '0.9999888136429178', 'totalCount': '100000', 'nonAbsentValuesCount': '85000.0'}}}