less than 1 minute read

Multiprocessing is an essential need to process long list or lage chunks of data. In this post I will share a simple python program to do the multiprocessing for a pandas dataframe rows processing.

import multiprocessing
import pandas as pd
from tqdm import tqdm

def process(row):
    # do your process over the row
    return process_data


def main():
    df = pd.read_csv('mydata.csv')
    data_to_process = df.to_dict('records')
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    for result in tqdm(pool.imap_unordered(process, data_to_process), total=len(data_to_process)):
        print(result)

if __name__ == "__main__":
    main()

Comments