QuickConversationsServiceRaceTest.java

  1package eu.siacs.conversations.test;
  2
  3import static org.mockito.ArgumentMatchers.any;
  4import static org.mockito.Mockito.mock;
  5import static org.mockito.Mockito.when;
  6
  7import android.content.Intent;
  8import android.os.IBinder;
  9
 10import eu.siacs.conversations.entities.Account;
 11import eu.siacs.conversations.services.QuickConversationsService;
 12import eu.siacs.conversations.services.XmppConnectionService;
 13import eu.siacs.conversations.xmpp.Jid;
 14import eu.siacs.conversations.xmpp.XmppConnection;
 15import eu.siacs.conversations.xmpp.manager.RosterManager;
 16
 17import org.junit.Rule;
 18import org.junit.Test;
 19import org.junit.runner.RunWith;
 20
 21import androidx.test.InstrumentationRegistry;
 22import androidx.test.ext.junit.runners.AndroidJUnit4;
 23import androidx.test.filters.FlakyTest;
 24import androidx.test.rule.ServiceTestRule;
 25
 26import com.google.common.collect.ImmutableList;
 27
 28import java.lang.reflect.Field;
 29import java.lang.reflect.Method;
 30import java.util.ArrayList;
 31import java.util.Arrays;
 32import java.util.ConcurrentModificationException;
 33import java.util.HashMap;
 34import java.util.List;
 35import java.util.Map;
 36import java.util.Set;
 37import java.util.concurrent.CountDownLatch;
 38import java.util.concurrent.TimeUnit;
 39import java.util.concurrent.atomic.AtomicInteger;
 40import java.util.concurrent.atomic.AtomicReference;
 41
 42/*
 43This test is extremely unreliable. I tried a lot of things to make it more reliable,
 44but ultimately there's not much to be done. The bug it captures is triggered about 1 in every 10
 45times. That bug is triggered by unsynchronized access to
 46`QuickConversationsService.mLastSyncAttempt`, which causes a whole host of problems, mostly
 47as a result of reading during resize, which can surface as:
 48- NPE from reading a partially constructed Node then treating the return value as non-null
 49- NoSuchMethodException, since apparently HashMap entries might optimize to a BST while another
 50thread expects a linked list node
 51- infinite loops caused by traversing a corrupted linked list
 52*/
 53@FlakyTest
 54@RunWith(AndroidJUnit4.class)
 55public class QuickConversationsServiceRaceTest {
 56	@Rule
 57	public final ServiceTestRule mServiceRule = new ServiceTestRule();
 58
 59    private List<Account> createTestAccounts(int count) {
 60        List<Account> accounts = new ArrayList<>();
 61        for (int i = 0; i < count; i++) {
 62            Jid jid = Jid.of("testuser" + i + "@test.example.com");
 63            Account account = new Account(jid, "testpassword" + i);
 64
 65            RosterManager mockRoster = mock(RosterManager.class);
 66            when(mockRoster.getWithSystemAccounts(any())).thenReturn(ImmutableList.of());
 67
 68            XmppConnection mockConnection = mock(XmppConnection.class);
 69            when(mockConnection.getManager(RosterManager.class)).thenReturn(mockRoster);
 70
 71            account.setXmppConnection(mockConnection);
 72
 73            accounts.add(account);
 74        }
 75        return accounts;
 76    }
 77
 78    private Method getConsiderSyncMethod(QuickConversationsService qcs) throws NoSuchMethodException {
 79        Method method = qcs.getClass().getDeclaredMethod(
 80            "considerSync", Account.class, Set.class, Map.class, boolean.class
 81        );
 82        method.setAccessible(true);
 83        return method;
 84    }
 85
 86
 87    private Map<String, ?> getLastSyncAttemptMap(QuickConversationsService qcs) throws Exception {
 88        Field field = qcs.getClass().getDeclaredField("mLastSyncAttempt");
 89        field.setAccessible(true);
 90        return (Map<String, ?>) field.get(qcs);
 91    }
 92
 93    private void setAccounts(XmppConnectionService xcs, List<Account> accounts) throws Exception {
 94        Field field = xcs.getClass().getDeclaredField("accounts");
 95        field.setAccessible(true);
 96        field.set(xcs, accounts);
 97    }
 98
 99    private void injectAlwaysResizingHashMap(QuickConversationsService qcs) throws Exception {
100        Field field = qcs.getClass().getDeclaredField("mLastSyncAttempt");
101        field.setAccessible(true);
102        field.set(qcs, new AlwaysResizingHashMap<>());
103    }
104
105    static class AlwaysResizingHashMap<K, V> extends HashMap<K, V> {
106        private final AtomicInteger churnCounter = new AtomicInteger(0);
107        private final AtomicInteger opCounter = new AtomicInteger(0);
108
109        public AlwaysResizingHashMap() {
110            super(1);
111        }
112
113        @SuppressWarnings("unchecked")
114        private void churn() {
115            int base = churnCounter.getAndAdd(10);
116            for (int i = 0; i < 10; i++) {
117                super.put((K) ("__churn_" + (base + i)), null);
118            }
119            for (int i = 0; i < 10; i++) {
120                super.remove("__churn_" + (base + i));
121            }
122            if (opCounter.incrementAndGet() % 3 == 0) {
123                super.clear();
124            }
125        }
126
127        @Override
128        public V put(K key, V value) {
129            V result = super.put(key, value);
130            churn();
131            return result;
132        }
133
134        @Override
135        public V getOrDefault(Object key, V defaultValue) {
136            churn();
137            return super.getOrDefault(key, defaultValue);
138        }
139    }
140
141    @Test
142    public void resizeRace() throws Throwable {
143		IBinder binder = mServiceRule.bindService(
144				new Intent(InstrumentationRegistry.getTargetContext(), XmppConnectionService.class));
145		XmppConnectionService xmppConnectionService =
146				((XmppConnectionService.XmppConnectionBinder) binder).getService();
147		QuickConversationsService service = xmppConnectionService.getQuickConversationsService();
148
149		final var accounts = createTestAccounts(5);
150		setAccounts(xmppConnectionService, accounts);
151		// Uncomment to make the race condition slightly more likely with a non-thread-safe HashMap.
152		// When commented out, the test verifies that the ConcurrentHashMap fix prevents races.
153		// injectAlwaysResizingHashMap(service);
154
155		final AtomicReference<Throwable> caughtException = new AtomicReference<>();
156		final CountDownLatch startLatch = new CountDownLatch(1);
157		final CountDownLatch doneLatch = new CountDownLatch(4);
158
159		Thread.UncaughtExceptionHandler exceptionHandler = (t, e) -> {
160			caughtException.compareAndSet(null, e);
161		};
162
163		final Method considerSyncForced = service.getClass().getDeclaredMethod("considerSync", boolean.class);
164		considerSyncForced.setAccessible(true);
165
166		Thread thread1 = new Thread(() -> {
167			try {
168				startLatch.await();
169				for (int i = 0; i < 500; ++i) {
170					considerSyncForced.invoke(service, true);
171				}
172			} catch (Throwable e) {
173				Throwable cause = e.getCause() != null ? e.getCause() : e;
174				caughtException.compareAndSet(null, cause);
175			} finally {
176				doneLatch.countDown();
177			}
178		});
179		thread1.setUncaughtExceptionHandler(exceptionHandler);
180
181		Thread thread2 = new Thread(() -> {
182			try {
183				startLatch.await();
184				for (int i = 0; i < 500; ++i) {
185					considerSyncForced.invoke(service, true);
186				}
187			} catch (Throwable e) {
188				Throwable cause = e.getCause() != null ? e.getCause() : e;
189				caughtException.compareAndSet(null, cause);
190			} finally {
191				doneLatch.countDown();
192			}
193		});
194		thread2.setUncaughtExceptionHandler(exceptionHandler);
195
196		Thread thread3 = new Thread(() -> {
197			try {
198				startLatch.await();
199				for (int i = 0; i < 500; ++i) {
200					considerSyncForced.invoke(service, true);
201				}
202			} catch (Throwable e) {
203				Throwable cause = e.getCause() != null ? e.getCause() : e;
204				caughtException.compareAndSet(null, cause);
205			} finally {
206				doneLatch.countDown();
207			}
208		});
209		thread3.setUncaughtExceptionHandler(exceptionHandler);
210
211		Thread thread4 = new Thread(() -> {
212			try {
213				startLatch.await();
214				for (int i = 0; i < 500; ++i) {
215					considerSyncForced.invoke(service, true);
216				}
217			} catch (Throwable e) {
218				Throwable cause = e.getCause() != null ? e.getCause() : e;
219				caughtException.compareAndSet(null, cause);
220			} finally {
221				doneLatch.countDown();
222			}
223		});
224		thread4.setUncaughtExceptionHandler(exceptionHandler);
225
226		thread1.start();
227		thread2.start();
228		thread3.start();
229		thread4.start();
230		startLatch.countDown();
231
232		boolean completed = doneLatch.await(5, TimeUnit.MINUTES);
233		if (!completed) {
234			String stuckInfo = getThreadStacks(thread1, thread2, thread3, thread4);
235			thread1.interrupt();
236			thread2.interrupt();
237			thread3.interrupt();
238			thread4.interrupt();
239			throw new AssertionError("Timeout after 5 minutes.\n" + stuckInfo);
240		}
241
242		Throwable e = caughtException.get();
243		if (e != null && isRaceException(e)) {
244			throw e;
245		}
246    }
247
248    private String getThreadStacks(Thread... threads) {
249        StringBuilder result = new StringBuilder();
250        for (Thread thread : threads) {
251            StackTraceElement[] stack = thread.getStackTrace();
252            result.append("Thread ").append(thread.getName())
253                  .append(" (").append(thread.getState()).append("):\n");
254            for (StackTraceElement ste : stack) {
255                result.append("    at ").append(ste).append("\n");
256            }
257            result.append("\n");
258        }
259        return result.toString();
260    }
261
262
263//    private String findThreadsStuckInHashMap() {
264//        StringBuilder result = new StringBuilder();
265//        Map<Thread, StackTraceElement[]> allStacks = Thread.getAllStackTraces();
266//
267//        for (Map.Entry<Thread, StackTraceElement[]> entry : allStacks.entrySet()) {
268//            Thread thread = entry.getKey();
269//            String name = thread.getName();
270//
271//            if (!name.startsWith("Writer-") && !name.startsWith("Reader-")) {
272//                continue;
273//            }
274//
275//            StackTraceElement[] stack = entry.getValue();
276//
277//
278//            if (inHashMap && fromConsiderSync) {
279//                result.append("\nThread ").append(name).append(" stack trace:\n");
280//                for (StackTraceElement ste : stack) {
281//                    result.append("    at ").append(ste).append("\n");
282//                }
283//                Log.e(TAG, "Thread " + name + " stuck in HashMap (from considerSync):");
284//                for (StackTraceElement ste : stack) {
285//                    Log.e(TAG, "    at " + ste);
286//                }
287//            }
288//        }
289//
290//        return result.length() > 0 ? result.toString() : null;
291//    }
292
293    /**
294     * Check if the exception indicates a HashMap race condition.
295     * OOM from HashMap.resize called from QuickConversationsService.considerSync
296     * indicates race-corrupted state causing massive allocation.
297     */
298    private boolean isRaceException(Throwable t) {
299        if (t instanceof NullPointerException ||
300            t instanceof ClassCastException ||
301            t instanceof ConcurrentModificationException) {
302            return true;
303        }
304        // OOM specifically from HashMap.resize in our considerSync method
305        if (t instanceof OutOfMemoryError) {
306            boolean hasHashMapResize = false;
307            boolean hasConsiderSync = false;
308            for (StackTraceElement ste : t.getStackTrace()) {
309                if ("java.util.HashMap".equals(ste.getClassName()) &&
310                    "resize".equals(ste.getMethodName())) {
311                    hasHashMapResize = true;
312                }
313                if ("eu.siacs.conversations.services.QuickConversationsService".equals(ste.getClassName()) &&
314                    "considerSync".equals(ste.getMethodName())) {
315                    hasConsiderSync = true;
316                }
317            }
318            return hasHashMapResize && hasConsiderSync;
319        }
320        return false;
321    }
322}