@inproceedings{33d00a7457934a45949033ccb1c2034c,
title = "Strategies for Developing a Supervisory Controller with Deep Reinforcement Learning in a Production Context",
abstract = "Deep reinforcement learning (RL) algorithms are a promising optimisation tool in changing industrial production systems. We implement a supervisory controller using deep Q-learning and a Petri net simulation model. Furthermore, we identify challenges for using RL in a production context and propose three generally applicable strategies for using deep RL with production systems. Firstly, reward shaping may be used to deal with multiple goals and constraints, by allowing the RL agent to slowly adapt to the constraints. Secondly, an existing RL agent can be adapted to a different task using transfer learning and thus reducing training times. Lastly, including varying starting conditions increases the number of states the RL agent encounters during training. This increases the generalisation capabilities of the deep-Rlagent and allows the agent to react to unseen states more robustly. We present a setup for solving a sorting task using deep Q-learning and conduct several experiments to evaluate the proposed strategies. {\textcopyright} 2022 IEEE.",
keywords = "Deep learning, Petri nets, Industrial production, Optimization tools, Petri nets simulation, Production system, Q-learning, Reinforcement learning agent, Reinforcement learning algorithms, Reinforcement learnings, Simulation model, Supervisory controllers, Reinforcement learning",
author = "J. Harb and S. Riedmann and S. Wegenkitt",
note = "Conference code: 184926 Cited By :1 Export Date: 14 December 2023 References: Siegert, J., Schlegel, T., Zarco, L., Miljanovic, B., Meyke, A., Bauernhansl, T., Ultra-flexible factories: An approach to manage complexity (2020) Procedia CIRP, 93, pp. 329-334. , https://www.sciencedirect.com/science/article/pii/S2212827120307472, 2020, 53rd CIRP Conference on Manufacturing Systems; Schwung, D., Reimann, J.N., Schwung, A., Ding, S.X., Self learning in flexible manufacturing units: A reinforcement learning approach (2018) 2018 International Conference on Intelligent Systems (IS), pp. 31-38; Baer, S., Bakakeu, J., Meyes, R., Meisen, T., Multi-agent reinforcement learning for job shop scheduling in flexible manufacturing systems (2019) 2019 Second International Conference on Artificial Intelligence for Industries (AI4I). IEEE, , https://doi.org/10.1109/ai4i46381.2019.00014, Sep; Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Hassabis, D., Human-level control through deep reinforcement learning (2015) Nature, 518 (7540), pp. 529-533. , https://doi.org/10.1038/nature14236, Feb; Donyanavard, B., M{\"u}ck, T., Moazzemi, K., Maity, B., Melo, C.B.D., Stewart, K., Yi, S., Dutt, N., (2021) Reflecting on Self-Aware Systems-on-Chip, pp. 79-95. , Cham: Springer International Publishing; (2011) VDI-2498-Vorgehen bei einer Materialflussplanung Grundlagen, , Verein Deutscher Ingenieure Richtlinie 2498; Riedmann, S., Harb, J., Hoher, S., Timed coloured petri net simulation model for reinforcement learning in the context of production systems (2022) Production at the Leading Edge of Technology, pp. 457-465. , B.-A. Behrens, A. Brosius, W.-G. Drossel, W. Hintze, S. Ihlenfeldt, and P. Nyhuis, Eds. Cham: Springer International Publishing; Waschneck, B., Reichstaller, A., Belzner, L., Altenm{\"u}ller, T., Bauernhansl, T., Knapp, A., Kyek, A., Deep reinforcement learning for semiconductor production scheduling (2018) 2018 29th Annual SEMI Advanced Semiconductor Manufacturing Conference (ASMC), pp. 301-306; Zinn, J., Vogel-Heuser, B., Ockier, P., Deep q-learning for the control of plc-based automated production systems (2020) 2020 IEEE 16th International Conference on Automation Science and Engineering (CASE), pp. 1434-1440; Ng, A.Y., Harada, D., Russell, S., Policy invariance under reward transformations: Theory and application to reward shaping (1999) Proceedings of the Sixteenth International Conference on Machine Learning. Morgan Kaufmann, pp. 278-287; Sutton, R.S., Barto, A.G., (2018) Reinforcement Learning: An Introduction, , http://incompleteideas.net/book/the-book-2nd.html, 2nd ed. The MIT Press, 11; Goodfellow, I., Bengio, Y., Courville, A., (2016) Deep Learning, , http://www.deeplearningbook.org, MIT Press; Taylor, M.E., Stone, P., Transfer learning for reinforcement learning domains: A survey (2009) Journal of Machine Learning Research, 10 (1), pp. 1633-1685. , http://www.cs.utexas.edu/users/ai-lab?taylor:jmlr09; Pommereau, F., SNAKES: A flexible high-level petri nets library (tool paper) (2015) Application and Theory of Petri Nets and Concurrency, pp. 254-265. , Springer International Publishing; Guadarrama, S., Korattikara, A., Ramirez, O., Castro, P., Holly, E., Fishman, S., Wang, K., Brevdo, E., (2018) TF-Agents: A library for reinforcement learning in tensorflow, , https://github.com/tensorflow/agents, Online; accessed 16-July-2021, https://github. com/tensorflow/agents; Wang, Z., Shi, Z., Li, Y., Tu, J., The optimization of path planning for multi-robot system using boltzmann policy based q-learning algorithm (2013) 2013 IEEE International Conference on Robotics and Biomimetics (ROBIO), , https://doi.org/10.1109/robio.2013.6739627, IEEE, Dec; 2022 IEEE Conference on Control Technology and Applications, CCTA 2022, CCTA 2022 ; Conference date: 23-08-2022 Through 25-08-2022",
year = "2022",
doi = "10.1109/CCTA49430.2022.9966086",
language = "English",
isbn = "978-1-6654-7339-2",
pages = "869--874",
booktitle = "2022 IEEE Conference on Control Technology and Applications (CCTA)",
url = "https://ccta2022.ieeecss.org/",
}