Optimizing_Solver_Multi-Processing
Optimizing Linear Programming Solver
Optimizing the provided solve function involves multiple steps. We will break down the function, identify bottlenecks, and apply optimization techniques. Here are some of the optimization strategies:
Avoid Repeated Calculations: Cache repeated calculations.
Vectorized Operations: Replace loops with vectorized operations using NumPy or Pandas where possible.
Efficient Data Structures: Use efficient data structures and functions.
Parallel Processing: Utilize parallel processing for independent tasks.
We'll start by breaking down the function:
Step 1: Initial Data Preparation
We can optimize the initial data preparation by avoiding multiple pd.DataFrame constructions inside the loop.
import json
import pandas as pd
import numpy as np
import math
import time
import openpyxl
from pandas import json_normalize
from gurobipy import *
import highspy
from fastapi import FastAPI
from typing import Dict, Any
app = FastAPI()
def solve(data: Dict[str, Any]):
t1 = time.time()
opt_id = data['OptimizationId']
blb_id = data['BlobName']
num_steps = data.get('NumberOfSteps', 0)
is_debug = data.get('IsDebug', 0)
loc_d = pd.json_normalize(data, 'Locations', errors='ignore', record_prefix='_')
glo_d = pd.json_normalize(data, 'GlobalIngredients', errors='ignore', record_prefix='_')
solve_type = 1 if num_steps > 0 else 0
dummy_nut = {
"NutrientId": "NO_NUT",
"Level": 0.0
}
cols_2_check = ['IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep', 'Available', 'Global']
# Create a list to store all entries for loc_ing_nut
loc_ing_nut_entries = []
for i, loc in loc_d.iterrows():
loc_id = loc['_LocationId']
ingredients = pd.DataFrame(loc['_Ingredients'])
if ingredients.empty:
continue
ingredients = ingredients.reindex(columns=cols_2_check, fill_value=0)
for j, ing in ingredients.iterrows():
nut_levels = pd.DataFrame(ing['NutrientLevels']) if 'NutrientLevels' in ing else pd.DataFrame([dummy_nut])
for _, nut in nut_levels.iterrows():
loc_ing_nut_entries.append([
loc_id, ing['IngredientId'], ing['Min'], ing['Max'], ing['Cost'],
ing['MinStep'], ing['MaxStep'], ing['CostStep'], ing['Available'],
ing['Global'], nut['NutrientId'], nut['Level']
])
loc_ing_nut_cols = ['LocationId', 'IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep',
'Available', 'Global', 'NutrientId', 'Level']
df_loc_ing_nut = pd.DataFrame(loc_ing_nut_entries, columns=loc_ing_nut_cols)
df_loc_ing_nut['Index_LIN'] = range(len(df_loc_ing_nut))
if is_debug:
print('Rip LIN: {:.3f}s'.format(time.time() - t1))
# Further steps can be optimized in a similar manner
# ...
# Return the response as per the original logic
resp = {
"OptimizationId": opt_id,
"Summary": {},
"LocationResults": [],
"GlobalIngredientResults": []
}
return GlobalmixResponse(**resp)
# Define the endpoint for FastAPI
@app.post("/solve")
def solve_api(data: Dict[str, Any]):
return solve(data) Step 2: Improve Data Handling and Constraints
We can further optimize the handling of data frames and the creation of constraints by using vectorized operations and reducing the number of loops:
def optimize_constraints(df_ls, df_lsi, df_lsn, df_li, df_i, CNC, is_debug):
t1 = time.time()
constraints = []
# Vectorized constraints creation
for lsin in range(len(df_lsin)):
q_lsn = df_lsin['Index_LSN'].iloc[lsin]
q_lsi = df_lsin['Index_LSI'].iloc[lsin]
q_lin = df_lsin['Index_LIN'].iloc[lsin]
q_ls = df_lsn['Index_LS'].iloc[q_lsn]
if df_lsn['IsRatio'].iloc[q_lsn] == 0:
constraints.append(
(x_lsi[q_lsi] * df_lin['Level'].iloc[q_lin] / df_ls['Tons'].iloc[q_ls], x_lsn[q_lsn])
)
if is_debug:
print('Constraints creation time: {:.3f}s'.format(time.time() - t1))
return constraints
def solve(data: Dict[str, Any]):
t1 = time.time()
opt_id = data['OptimizationId']
blb_id = data['BlobName']
num_steps = data.get('NumberOfSteps', 0)
is_debug = data.get('IsDebug', 0)
loc_d = pd.json_normalize(data, 'Locations', errors='ignore', record_prefix='_')
glo_d = pd.json_normalize(data, 'GlobalIngredients', errors='ignore', record_prefix='_')
solve_type = 1 if num_steps > 0 else 0
dummy_nut = {
"NutrientId": "NO_NUT",
"Level": 0.0
}
cols_2_check = ['IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep', 'Available', 'Global']
# Create a list to store all entries for loc_ing_nut
loc_ing_nut_entries = []
for i, loc in loc_d.iterrows():
loc_id = loc['_LocationId']
ingredients = pd.DataFrame(loc['_Ingredients'])
if ingredients.empty:
continue
ingredients = ingredients.reindex(columns=cols_2_check, fill_value=0)
for j, ing in ingredients.iterrows():
nut_levels = pd.DataFrame(ing['NutrientLevels']) if 'NutrientLevels' in ing else pd.DataFrame([dummy_nut])
for _, nut in nut_levels.iterrows():
loc_ing_nut_entries.append([
loc_id, ing['IngredientId'], ing['Min'], ing['Max'], ing['Cost'],
ing['MinStep'], ing['MaxStep'], ing['CostStep'], ing['Available'],
ing['Global'], nut['NutrientId'], nut['Level']
])
loc_ing_nut_cols = ['LocationId', 'IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep',
'Available', 'Global', 'NutrientId', 'Level']
df_loc_ing_nut = pd.DataFrame(loc_ing_nut_entries, columns=loc_ing_nut_cols)
df_loc_ing_nut['Index_LIN'] = range(len(df_loc_ing_nut))
if is_debug:
print('Rip LIN: {:.3f}s'.format(time.time() - t1))
# Add other data preparations here
# Optimizing constraints
constraints = optimize_constraints(df_ls, df_lsi, df_lsn, df_li, df_i, CNC, is_debug)
# Further steps can be optimized in a similar manner
# Return the response as per the original logic
resp = {
"OptimizationId": opt_id,
"Summary": {},
"LocationResults": [],
"GlobalIngredientResults": []
}
return GlobalmixResponse(**resp) Step 3: Parallel Processing (if applicable)
If some parts of the computation are independent and can be parallelized, use Python's multiprocessing library:
from multiprocessing import Pool
def process_ingredient(loc_ing_nut_entries, loc_id, ingredients, dummy_nut):
for j, ing in ingredients.iterrows():
nut_levels = pd.DataFrame(ing['NutrientLevels']) if 'NutrientLevels' in ing else pd.DataFrame([dummy_nut])
for _, nut in nut_levels.iterrows():
loc_ing_nut_entries.append([
loc_id, ing['IngredientId'], ing['Min'], ing['Max'], ing['Cost'],
ing['MinStep'], ing['MaxStep'], ing['CostStep'], ing['Available'],
ing['Global'], nut['NutrientId'], nut['Level']
])
def solve(data: Dict[str, Any]):
t1 = time.time()
opt_id = data['OptimizationId']
blb_id = data['BlobName']
num_steps = data.get('NumberOfSteps', 0)
is_debug = data.get('IsDebug', 0)
loc_d = pd.json_normalize(data, 'Locations', errors='ignore', record_prefix='_')
glo_d = pd.json_normalize(data, 'GlobalIngredients', errors='ignore', record_prefix='_')
solve_type = 1 if num_steps > 0 else 0
dummy_nut = {
"NutrientId": "NO_NUT",
"Level": 0.0
}
cols_2_check = ['IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep', 'Available', 'Global']
loc_ing_nut_entries = []
with Pool() as pool:
for i, loc in loc_d.iterrows():
loc_id = loc['_LocationId']
ingredients = pd.DataFrame(loc['_Ingredients'])
if ingredients.empty:
continue
ingredients = ingredients.reindex(columns=cols_2_check, fill_value=0)
pool.apply_async(process_ingredient, (loc_ing_nut_entries, loc_id, ingredients, dummy_nut))
pool.close()
pool.join()
loc_ing_nut_cols = ['LocationId', 'IngredientId', 'Min', 'Max', 'Cost', 'MinStep', 'MaxStep', 'CostStep',
'Available', 'Global', 'NutrientId', 'Level']
df_loc_ing_nut = pd.DataFrame(loc_ing_nut_entries, columns=loc_ing_nut_cols)
df_loc_ing_nut['Index_LIN'] = range(len(df_loc_ing_nut))
if is_debug:
print('Rip LIN: {:.3f}s'.format(time.time() - t1))
# Add other data preparations here
# Optimizing constraints
constraints = optimize_constraints(df_ls, df_lsi, df_lsn, df_li, df_i, CNC, is_debug)
# Further steps can be optimized in a similar manner
# Return the response as per the original logic
resp = {
"OptimizationId": opt_id,
"Summary": {},
"LocationResults": [],
"GlobalIngredientResults": []
}
return GlobalmixResponse(**resp)
# Define the endpoint for FastAPI
@app.post("/solve")
def solve_api(data: Dict[str, Any]):
return solve(data) Connect: Join Univrs.io
Last updated