/* gcc -g ql_test.cpp -o ql_test 強化学習(Q-Learning)を理解する為に、中学→高校→大学の学歴を使ってみた */ #include <stdio.h> #include <stdlib.h> typedef enum period{ BIRTH = 0, JUNIOR_HIGH = 1, HIGH = 2, COLLEGE = 3, SUPER_COLLEGE = 4 }PERIOD; typedef struct state{ struct state* future_state[2]; // 未来へのパス(取り敢えず2つほど) PERIOD period; int q; }STATE; STATE* change_state(STATE* p_state) { if ((double)rand()/RAND_MAX < 0.3){ // ε:0.3 if ((double)rand()/RAND_MAX < 0.5){ // 半々 return p_state->future_state[0]; } else{ return p_state->future_state[1]; } } else { if (p_state->future_state[0]->q > p_state->future_state[1]->q){ return p_state->future_state[0]; } else{ return p_state->future_state[1]; } } } void q_renewal(STATE* p_state) { int dummy_q; if (p_state->period == SUPER_COLLEGE){ p_state->q += 0.1 * (1000- p_state->q); // α:0.1 報酬の源泉:年収1000万円 } else if (p_state->period != COLLEGE){ if (p_state->future_state[0]->q > p_state->future_state[1]->q){ dummy_q = p_state->future_state[0]->q; } else { dummy_q = p_state->future_state[1]->q; } p_state->q += 0.1 * (0.9 * dummy_q - p_state->q); // α:0.1 γ:0.9 } return; } void q_display(STATE* p_state) { for (int i =0; i < 15 ; i++){ printf("%d,", p_state->q); p_state++; } printf("\n"); return; } int main() { srand(13); // 初期設定 //STATE* state; STATE state[15]; state[0].period = BIRTH; state[0].future_state[0] = &(state[1]); state[0].future_state[1] = &(state[2]); state[1].period = JUNIOR_HIGH; state[1].future_state[0] = &(state[3]); state[1].future_state[1] = &(state[4]); state[2].period = JUNIOR_HIGH; state[2].future_state[0] = &(state[5]); state[2].future_state[1] = &(state[6]); state[3].period = HIGH; state[3].future_state[0] = &(state[7]); state[3].future_state[1] = &(state[8]); state[4].period = HIGH; state[4].future_state[0] = &(state[9]); state[4].future_state[1] = &(state[10]); state[5].period = HIGH; state[5].future_state[0] = &(state[11]); state[5].future_state[1] = &(state[12]); state[6].period = HIGH; state[6].future_state[0] = &(state[13]); state[6].future_state[1] = &(state[14]); state[7].period = COLLEGE; state[8].period = COLLEGE; state[9].period = COLLEGE; state[10].period = SUPER_COLLEGE; state[11].period = COLLEGE; state[12].period = COLLEGE; state[13].period = COLLEGE; state[14].period = COLLEGE; for (int i = 0; i < 15; i++){ state[i].q = (int)rand() % 100; } printf("誕生,A中学,B中学,C高校,D高校,E高校,F高校,G大学,H大学,I大学,J大学,K大学,L大学,M大学,N大学\n"); STATE* s = state; //q_display(s); q_display(state); for (int i = 0; i < 1000; i++){ // 300:学習回数 STATE* s = state; // 初期値に戻しているだけ do{ s = change_state(s); q_renewal(s); }while( (s->period != COLLEGE) && (s->period != SUPER_COLLEGE)); q_display(state); } printf("\n[after]\n"); //q_display(s); q_display(state); }