May 6

May 6 Get Deployment Pipelines from Power BI API with Python

USE PYTHON TO GET DATA FROM POWER BI APIs

…such as the deployment pipelines a user or service principle has access to

HOW TO SET IT UP?

I wrote a post about setting up a Service Principal, permissions & settings to get you started, as well as explaining the Authentication. You can find that post, here.

#########################################################################################      
# Authentication - Replace string variables with your relevant values       
#########################################################################################

import json, requests, pandas as pd
try:
    from azure.identity import ClientSecretCredential
except Exception:
     !pip install azure.identity
     from azure.identity import ClientSecretCredential

# --------------------------------------------------------------------------------------#
# String variables: Replace with your own
tenant = 'Your-Tenant-ID'
client = 'Your-App-Client-ID'
client_secret = 'Your-Client-Secret-Value' # See Note 2: Better to use key vault
api = 'https://analysis.windows.net/powerbi/api/.default'
# --------------------------------------------------------------------------------------#

# Generates the access token for the Service Principal
auth = ClientSecretCredential(authority = 'https://login.microsoftonline.com/',
                                                        tenant_id = tenant,
                                                        client_id = client,
                                                        client_secret = client_secret)
access_token = auth.get_token(api)
access_token = access_token.token

print('
Successfully authenticated.')   

#########################################################################################

# Note 1: This code was authored for use in a Google Colab Notebook
# Note 2: Check here for info on using Azure Key Vault: 
          # https://docs.microsoft.com/en-us/azure/key-vault/secrets/quick-create-python    
          # The above code should be modified to programmatically get the secret from AKV

#########################################################################################
# Admin: Get all Deployment Pipelines
#########################################################################################

base_url = 'https://api.powerbi.com/v1.0/myorg/'
header = {'Authorization': f'Bearer {access_token}'}

# The admin API doesn't work unless you provide a ?$top=n argument
# Top 5000 pipelines
topn = '$top=5000'

# All pipelines, no expand
admin_pipelines = f'{base_url}admin/pipelines?{topn}'

# All pipelines with users & stages
admin_pipelines_expand = f'{base_url}admin/pipelines?$expand=users,stages&{topn}'

# HTTP GET Request
pipelines = requests.get(admin_pipelines_expand, headers=header)

# Response code (200 = Success; 401 = Unauthorized; 404 = Bad Request)
print(pipelines)
try:
    pipelines = json.loads(pipelines.content)

    # Pretty-prints the JSON
    print(json.dumps(pipelines, indent=4, 
sort_keys=True))

except Exception as e:
    print(e)

#########################################################################################
# Custom function to flatten the dataframe containing nested json / list objects
#########################################################################################
# Reference: https://stackoverflow.com/a/61269285

def flatten_nested_json_df(df):

    df = df.reset_index()

    # search for columns to explode/flatten
    s = (df.applymap(type) == list).all()
    list_columns = s[s].index.tolist()

    s = (df.applymap(type) == dict).all()
    dict_columns = s[s].index.tolist()

    while len(list_columns) > 0 or len(dict_columns) > 0:
        new_columns = []

        for col in dict_columns:

            horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
            horiz_exploded.index = df.index
            df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
            new_columns.extend(horiz_exploded.columns) # inplace

        for col in list_columns:

            # explode lists vertically, adding new columns
            df = df.drop(columns=[col]).join(df[col].explode().to_frame())
            new_columns.append(col)

        # check if there are still dict o list fields to flatten
        s = (df[new_columns].applymap(type) == list).all()      
        list_columns = s[s].index.tolist()

        s = (df[new_columns].applymap(type) == dict).all()      
        dict_columns = s[s].index.tolist()

    return df

#########################################################################################
# Return the result in a dataframe
#########################################################################################

result = flatten_nested_json_df(pd.concat([pd.json_normalize(x) for x in pipelines['value']])).reset_index(drop=True)
result