How to Use Python Logger With Tqdm

The following example illustrates how to log into the Python logging framework. The idea is to create a custom logger that inherits logged data from the StringIO and channel. Using buffer modules such as StringIO helps us to manipulate data like a normal file that we can use for further processing.

# Copyright 2019 tiptapcode Authors. All Rights Reserved.

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.



import io

import os

import sys

import logging

import validators



from urllib import request

from tqdm import tqdm





class ProgressBar(tqdm):



def update_progress(self, block_num=1, block_size=1, total_size=None):

if total_size is not None:

self.total = total_size

self.update(block_num * block_size - self.n) # will also set self.n = b * bsize





class DownloadFileHandler(object):



@staticmethod

def download_file_by_url(url, download_dir=None):

if not validators.url(url):

raise ValueError('Invalid url := {}'.format(url))

if download_dir is not None and not os.path.isdir(download_dir):

raise FileNotFoundError('Directory specified := {} does not exist'.format(download_dir))

else:

download_dir = os.path.abspath(os.path.dirname(__file__))

filename = os.path.basename(url)

download_destination = os.path.join(download_dir, filename)



#The magic happens here in order to log to python logger we need to create

# A custom logger that channels the output stream to the log stream

with ProgressBar(

file=TqdmSystemLogger(logger, suppress_new_line=False),

unit='B',

unit_scale=True,

miniters=1,

desc=filename

) as progressBar:

# request.urlretrieve has an internal callback function that get invoked reporthook

# The reporthook argument should be

# a callable that accepts a block number, a read size, and the

# total file size of the URL target. The data argument should be

# valid URL encoded data.

# tqdm uses this data to derive a progress bar as we know the total file size we can estimate ETA

request.urlretrieve(url, filename=download_destination, reporthook=progressBar.update_progress, data=None)



return download_destination





class SystemLogger(object):



def __init__(self):

pass



@staticmethod

def get_logger(name, level=None):



root_logger = logging.getLogger(name)

root_logger.setLevel(level if level else logging.INFO)



# An attempt to replace logger output as to print on same line may not work on some terminals

# only applicable to logging to sys.stdout

# formatter = logging.Formatter('\x1b[80D\x1b[1A\x1b[K%(message)s')



formatter = logging.Formatter(fmt='%(levelname)s:%(name)s: %(message)s (%(asctime)s; %(filename)s:%(lineno)d)', datefmt="%d-%m-%YT%H:%M:%S%z")



handler_stdout = logging.StreamHandler(sys.stdout)

handler_stdout.setFormatter(formatter)

handler_stdout.setLevel(logging.WARNING)

handler_stdout.addFilter(type('', (logging.Filter,), {'filter': staticmethod(lambda r: r.levelno <= logging.INFO)}))



handler_stdout.flush = sys.stdout.flush



root_logger.addHandler(handler_stdout)



handler_stderr = logging.StreamHandler(sys.stderr)

handler_stderr.setFormatter(formatter)

handler_stderr.setLevel(logging.WARNING)



handler_stderr.flush = sys.stderr.flush



root_logger.addHandler(handler_stderr)



return root_logger





class TqdmSystemLogger(io.StringIO):



def __init__(self, logger, suppress_new_line=True):

super(TqdmSystemLogger, self).__init__()

self.logger = logger

self.buf = ''

# only tested and works inside pycharm terminal logging to sys.stdout

# by replacing default terminator newline we force logger to override the output on screen

# thus giving us a progress depiction in a single line instead of multiple lines

if suppress_new_line:

for handler in self.logger.handlers:

if isinstance(handler, logging.StreamHandler):

handler.terminator = ""



def write(self, buf):

self.buf = buf.strip('\r

\t ')



def flush(self):

self.logger.log(self.logger.level, '\r' + self.buf)





try:

logger = SystemLogger.get_logger('DownloadFileHandler', level=logging.WARNING)

# Download a file to this scripts relative directory and log output to python logger sysout

DownloadFileHandler.download_file_by_url('https://nodejs.org/dist/v12.13.1/node-v12.13.1-darwin-x64.tar.gz')

except Exception as e:

print(str(e))

Adding Tqdm to python subprocesses

Python subproceses are used and should be used for accessing system commands, for example, executing windows terminal commands or bash commands on your terminal if you are running on Unix based systems. The subprocess module allows us to spawn processes, connect to their input/output/error pipes, and obtain their return codes.

import sys

import subprocess



from tqdm import tqdm





def create_test_bash_script():

"""

Create a bash script that generates numbers 1 to 1000000

This is just for illustration purpose to simulate a long running bash command

"""

with open('hello', 'w') as bash_file:

bash_file.write('''\

#!/bin/bash

# Tested using bash version 4.1.5

for ((i=1;i<=1000000;i++));

do

# your-unix-command-here

echo $i

done

''')





def run_task(cmd):



try:

# create a default tqdm progress bar object, unit='B' definnes a String that will be used to define the unit of each iteration in our case bytes

with tqdm(unit='B', unit_scale=True, miniters=1, desc="run_task={}".format(cmd)) as t:

# subprocess.PIPE gets the output of the child process

process = subprocess.Popen(cmd, shell=True, bufsize=1, universal_newlines=True, stdout=subprocess.PIPE,

stderr=subprocess.PIPE)



# print subprocess output line-by-line as soon as its stdout buffer is flushed in Python 3:

for line in process.stdout:

# Update the progress, since we do not have a predefined iterator

# tqdm doesnt know before hand when to end and cant generate a progress bar

# hence elapsed time will be shown, this is good enough as we know

# something is in progress

t.update()

# forces stdout to "flush" the buffer

sys.stdout.flush()



# We explicitly close stdout

process.stdout.close()



# wait for the return code

return_code = process.wait()



# if return code is not 0 this means our script errored out

if return_code != 0:

raise subprocess.CalledProcessError(return_code, cmd)



except subprocess.CalledProcessError as e:

sys.stderr.write(

"common::run_command() : [ERROR]: output = {}, error code = {}

".format(e.output, e.returncode))





create_test_bash_script()



# run your terminal command using below

run_task('chmod 755 hello && ./hello')



run_task('xx*3238') # this will fail not a valid command﻿﻿

In the example above, we iteratively stream the output generated by your executed command and use that to update the tqdm progress bar. Since you do not explicitly have an iterator with a pre-defined length we can't anticipate an end to our iteration hence tqdm will default to elapsed time as output.