Optimizing_Solver_Multi-Processing

Optimizing Linear Programming Solver

Optimizing the provided solve function involves multiple steps. We will break down the function, identify bottlenecks, and apply optimization techniques. Here are some of the optimization strategies:

  • Avoid Repeated Calculations: Cache repeated calculations.

  • Vectorized Operations: Replace loops with vectorized operations using NumPy or Pandas where possible.

  • Efficient Data Structures: Use efficient data structures and functions.

  • Parallel Processing: Utilize parallel processing for independent tasks.

We'll start by breaking down the function:

Step 1: Initial Data Preparation

We can optimize the initial data preparation by avoiding multiple pd.DataFrame constructions inside the loop.

import json  
import pandas as pd  
import numpy as np  
import math  
import time  
import openpyxl  
from pandas import json_normalize  
from gurobipy import *  
import highspy  
from fastapi import FastAPI  
from typing import Dict, Any  
  
app = FastAPI()  
  
def solve(data: Dict[str, Any]):  
    t1 = time.time()  
    opt_id = data['OptimizationId']  
    blb_id = data['BlobName']  
    num_steps = data.get('NumberOfSteps', 0)  
    is_debug = data.get('IsDebug', 0)  
      
    loc_d = pd.json_normalize(data, 'Locations', errors='ignore', record_prefix='_')  
    glo_d = pd.json_normalize(data, 'GlobalIngredients', errors='ignore', record_prefix='_')  
      
    solve_type = 1 if num_steps > 0 else 0  
  
    dummy_nut = {  
        "NutrientId": "NO_NUT",  
        "Level": 0.0  
    }  
      
    cols_2_check = ['IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep', 'Available', 'Global']  
      
    # Create a list to store all entries for loc_ing_nut  
    loc_ing_nut_entries = []  
  
    for i, loc in loc_d.iterrows():  
        loc_id = loc['_LocationId']  
        ingredients = pd.DataFrame(loc['_Ingredients'])  
        if ingredients.empty:  
            continue  
        ingredients = ingredients.reindex(columns=cols_2_check, fill_value=0)  
        for j, ing in ingredients.iterrows():  
            nut_levels = pd.DataFrame(ing['NutrientLevels']) if 'NutrientLevels' in ing else pd.DataFrame([dummy_nut])  
            for _, nut in nut_levels.iterrows():  
                loc_ing_nut_entries.append([  
                    loc_id, ing['IngredientId'], ing['Min'], ing['Max'], ing['Cost'],   
                    ing['MinStep'], ing['MaxStep'], ing['CostStep'], ing['Available'],   
                    ing['Global'], nut['NutrientId'], nut['Level']  
                ])  
      
    loc_ing_nut_cols = ['LocationId', 'IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep',   
                        'Available', 'Global', 'NutrientId', 'Level']  
    df_loc_ing_nut = pd.DataFrame(loc_ing_nut_entries, columns=loc_ing_nut_cols)  
    df_loc_ing_nut['Index_LIN'] = range(len(df_loc_ing_nut))  
      
    if is_debug:  
        print('Rip LIN: {:.3f}s'.format(time.time() - t1))  
      
    # Further steps can be optimized in a similar manner  
    # ...  
  
    # Return the response as per the original logic  
    resp = {  
        "OptimizationId": opt_id,  
        "Summary": {},  
        "LocationResults": [],  
        "GlobalIngredientResults": []  
    }  
      
    return GlobalmixResponse(**resp)  
  
# Define the endpoint for FastAPI  
@app.post("/solve")  
def solve_api(data: Dict[str, Any]):  
    return solve(data)  

Step 2: Improve Data Handling and Constraints

We can further optimize the handling of data frames and the creation of constraints by using vectorized operations and reducing the number of loops:

def optimize_constraints(df_ls, df_lsi, df_lsn, df_li, df_i, CNC, is_debug):  
    t1 = time.time()  
    constraints = []  
  
    # Vectorized constraints creation  
    for lsin in range(len(df_lsin)):  
        q_lsn = df_lsin['Index_LSN'].iloc[lsin]  
        q_lsi = df_lsin['Index_LSI'].iloc[lsin]  
        q_lin = df_lsin['Index_LIN'].iloc[lsin]  
        q_ls = df_lsn['Index_LS'].iloc[q_lsn]  
  
        if df_lsn['IsRatio'].iloc[q_lsn] == 0:  
            constraints.append(  
                (x_lsi[q_lsi] * df_lin['Level'].iloc[q_lin] / df_ls['Tons'].iloc[q_ls], x_lsn[q_lsn])  
            )  
  
    if is_debug:  
        print('Constraints creation time: {:.3f}s'.format(time.time() - t1))  
  
    return constraints  
  
def solve(data: Dict[str, Any]):  
    t1 = time.time()  
    opt_id = data['OptimizationId']  
    blb_id = data['BlobName']  
    num_steps = data.get('NumberOfSteps', 0)  
    is_debug = data.get('IsDebug', 0)  
  
    loc_d = pd.json_normalize(data, 'Locations', errors='ignore', record_prefix='_')  
    glo_d = pd.json_normalize(data, 'GlobalIngredients', errors='ignore', record_prefix='_')  
      
    solve_type = 1 if num_steps > 0 else 0  
  
    dummy_nut = {  
        "NutrientId": "NO_NUT",  
        "Level": 0.0  
    }  
  
    cols_2_check = ['IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep', 'Available', 'Global']  
  
    # Create a list to store all entries for loc_ing_nut  
    loc_ing_nut_entries = []  
  
    for i, loc in loc_d.iterrows():  
        loc_id = loc['_LocationId']  
        ingredients = pd.DataFrame(loc['_Ingredients'])  
        if ingredients.empty:  
            continue  
        ingredients = ingredients.reindex(columns=cols_2_check, fill_value=0)  
        for j, ing in ingredients.iterrows():  
            nut_levels = pd.DataFrame(ing['NutrientLevels']) if 'NutrientLevels' in ing else pd.DataFrame([dummy_nut])  
            for _, nut in nut_levels.iterrows():  
                loc_ing_nut_entries.append([  
                    loc_id, ing['IngredientId'], ing['Min'], ing['Max'], ing['Cost'],  
                    ing['MinStep'], ing['MaxStep'], ing['CostStep'], ing['Available'],  
                    ing['Global'], nut['NutrientId'], nut['Level']  
                ])  
  
    loc_ing_nut_cols = ['LocationId', 'IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep',  
                        'Available', 'Global', 'NutrientId', 'Level']  
    df_loc_ing_nut = pd.DataFrame(loc_ing_nut_entries, columns=loc_ing_nut_cols)  
    df_loc_ing_nut['Index_LIN'] = range(len(df_loc_ing_nut))  
  
    if is_debug:  
        print('Rip LIN: {:.3f}s'.format(time.time() - t1))  
  
    # Add other data preparations here  
  
    # Optimizing constraints  
    constraints = optimize_constraints(df_ls, df_lsi, df_lsn, df_li, df_i, CNC, is_debug)  
  
    # Further steps can be optimized in a similar manner  
  
    # Return the response as per the original logic  
    resp = {  
        "OptimizationId": opt_id,  
        "Summary": {},  
        "LocationResults": [],  
        "GlobalIngredientResults": []  
    }  
  
    return GlobalmixResponse(**resp)  

Step 3: Parallel Processing (if applicable)

If some parts of the computation are independent and can be parallelized, use Python's multiprocessing library:

from multiprocessing import Pool  
  
def process_ingredient(loc_ing_nut_entries, loc_id, ingredients, dummy_nut):  
    for j, ing in ingredients.iterrows():  
        nut_levels = pd.DataFrame(ing['NutrientLevels']) if 'NutrientLevels' in ing else pd.DataFrame([dummy_nut])  
        for _, nut in nut_levels.iterrows():  
            loc_ing_nut_entries.append([  
                loc_id, ing['IngredientId'], ing['Min'], ing['Max'], ing['Cost'],  
                ing['MinStep'], ing['MaxStep'], ing['CostStep'], ing['Available'],  
                ing['Global'], nut['NutrientId'], nut['Level']  
            ])  
  
def solve(data: Dict[str, Any]):  
    t1 = time.time()  
    opt_id = data['OptimizationId']  
    blb_id = data['BlobName']  
    num_steps = data.get('NumberOfSteps', 0)  
    is_debug = data.get('IsDebug', 0)  
  
    loc_d = pd.json_normalize(data, 'Locations', errors='ignore', record_prefix='_')  
    glo_d = pd.json_normalize(data, 'GlobalIngredients', errors='ignore', record_prefix='_')  
  
    solve_type = 1 if num_steps > 0 else 0  
  
    dummy_nut = {  
        "NutrientId": "NO_NUT",  
        "Level": 0.0  
    }  
  
    cols_2_check = ['IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep', 'Available', 'Global']  
    loc_ing_nut_entries = []  
  
    with Pool() as pool:  
        for i, loc in loc_d.iterrows():  
            loc_id = loc['_LocationId']  
            ingredients = pd.DataFrame(loc['_Ingredients'])  
            if ingredients.empty:  
                continue  
            ingredients = ingredients.reindex(columns=cols_2_check, fill_value=0)  
            pool.apply_async(process_ingredient, (loc_ing_nut_entries, loc_id, ingredients, dummy_nut))  
  
        pool.close()  
        pool.join()  
  
    loc_ing_nut_cols = ['LocationId', 'IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep',  
                        'Available', 'Global', 'NutrientId', 'Level']  
    df_loc_ing_nut = pd.DataFrame(loc_ing_nut_entries, columns=loc_ing_nut_cols)  
    df_loc_ing_nut['Index_LIN'] = range(len(df_loc_ing_nut))  
  
    if is_debug:  
        print('Rip LIN: {:.3f}s'.format(time.time() - t1))  
  
    # Add other data preparations here  
  
    # Optimizing constraints  
    constraints = optimize_constraints(df_ls, df_lsi, df_lsn, df_li, df_i, CNC, is_debug)  
  
    # Further steps can be optimized in a similar manner  
  
    # Return the response as per the original logic  
    resp = {  
        "OptimizationId": opt_id,  
        "Summary": {},  
        "LocationResults": [],  
        "GlobalIngredientResults": []  
    }  
  
    return GlobalmixResponse(**resp)  
  
# Define the endpoint for FastAPI  
@app.post("/solve")  
def solve_api(data: Dict[str, Any]):  
    return solve(data)  

Connect: Join Univrs.io

Last updated