Is there an example of how to create a GlobalKTable to keep count from a KStream using Spring Cloud stream and using Functional approach?
Is implementing processor interface the right approach?
#Bean
public Consumer<KStream<String, Long>> processorsample() {
return input -> input.process(() -> new Processor<String, Long>() {
#Override
public void init(ProcessorContext context) {
if (state == null) {
state = (KeyValueStore<String, Long>) context.getStateStore("mystate");
}
}
#Override
public void process(String key, Long value) {
if (state != null) {
if (key != null) {
Long currentCount = state.get(key);
if (currentCount == null) {
state.put(key, value);
} else {
state.put(key, currentCount + value);
}
}
}
}
#Override
public void close() {
if (state != null) {
state.close();
}
}
}, "mystate");
}
According to the documentation GlobalKTables are read-only, you cannot modify a global table during the processing.
Since GlobalKTables are consumers of a Kafka topic, you can just send your data to the GlobalKTable's source topic, and eventually, it's going to be added to the table. But you cannot be sure that the GlobalKTable will be updated immediately.
Related
I am using Asp.Net core 3.1 . If I want to create a WebSockets backend for example for
a chat app , I need to store all the related WebSocket objects for broadcasting events , my question is what is the best way to manage removing objects that are no longer useful (if disconnected or no longer open). keeping in mind that I want other parts of the application to access the WebScoket groups to also broadcast events if needed. I store the related connections in a ConnectionNode which is the nearest layer to the Websocket objects , a class called WebsocketsManager manage these nodes, a service in the background runs to clear the unused objects every timeout period. but since I want the group(related connections)to be accessible for the application (for example other endpoints); to avoid any concurrent modification errors , if a broadcast is required during the cleaning process,the broadcast will have to wait for the cleaning process to finish, thats why the WebsocketsManager if the related connections are larger than a certain limit it will divide them into multiple related ConnectionNodes , that way the cleaning process can continue partially for related connection while broadcasting if needed. I want to know how good my solution will behave or what is the best way to do it. any help would be really appreciated.
ConnectionNode
public class ConnectionNode
{
private List<WebSocket> connections;
private BroadcastQueue BroadcastQueue = new BroadcastQueue();
private bool isBroadCasting = false;
private bool isCleaning = false;
public void AddConnection(WebSocket socket)
{
if (connections == null)
connections = new List<WebSocket>();
connections.Add(socket);
}
public void Broadcast(Broadcast broadCast)
{
while (isCleaning)
{
}
BroadcastQueue.QueueBroadcast(broadCast);
if (isBroadCasting)
{
return;
}
isBroadCasting = true;
var broadcast = BroadcastQueue.GetNext();
while (broadCast != null)
{
foreach (var ws in connections)
{
broadCast.Dispatch(ws);
}
broadCast = BroadcastQueue.GetNext();
}
isBroadCasting = false;
}
public int CleanUnUsedConnections()
{
if (isBroadCasting)
return 0;
isCleaning = true;
var i =connections.RemoveAll(s => s.State != WebSocketState.Open);
isCleaning = false;
return i;
}
public int ConnectionsCount()
{
return connections.Count;
}
}
Manager class
public class WebSocketsManager
{
static int ConnectionNodesDividerLimit = 1000;
private ConcurrentDictionary<String, List<ConnectionNode>> mConnectionNodes;
private readonly ILogger<WebSocketsManager> logger;
public WebSocketsManager(ILogger<WebSocketsManager> logger)
{
this.logger = logger;
}
public ConnectionNode RequireNode(string Id)
{
if (mConnectionNodes == null)
mConnectionNodes = new ConcurrentDictionary<String, List<ConnectionNode>>();
var node = mConnectionNodes.GetValueOrDefault(Id);
if (node == null)
{
node = new List<ConnectionNode>();
node.Add(new ConnectionNode());
mConnectionNodes.TryAdd(Id, node);
return node[0];
}
if (ConnectionNodesDividerLimit != 0)
{
if (node[0].ConnectionsCount() == ConnectionNodesDividerLimit)
{
node.Insert(0,new ConnectionNode());
}
}
return node[0];
}
public void ClearUnusedConnections()
{
logger.LogInformation("Manager is Clearing ..");
if (mConnectionNodes == null)
return;
if (mConnectionNodes.IsEmpty)
{
logger.LogInformation("Empty ## Nothing to clear ..");
return;
}
Dictionary<String,ConnectionNode> ToBeRemovedNodes = new Dictionary<String, ConnectionNode>();
foreach (var pair in mConnectionNodes)
{
bool shoudlRemoveStack = true;
foreach (var node in pair.Value)
{
int i = node.CleanUnUsedConnections();
logger.LogInformation($"Removed ${i} from connection node(s){pair.Key}");
if (node.ConnectionsCount() == 0)
{
ToBeRemovedNodes[pair.Key] = node;
logger.LogInformation($"To be Removed A node From ..{pair.Key}");
}
else
{
shoudlRemoveStack = false;
}
}
if (shoudlRemoveStack)
{
ToBeRemovedNodes.Remove(pair.Key);
List<ConnectionNode> v =null;
var b = mConnectionNodes.TryRemove(pair.Key,out v);
logger.LogInformation($"Removing the Stack ..{pair.Key} Removed ${b}");
}
}
foreach (var pair in ToBeRemovedNodes)
{
mConnectionNodes[pair.Key].Remove(pair.Value);
logger.LogInformation($"Clearing Nodes : Clearing Nodes from stack #{pair.Key}");
}
}
public void Broadcast(string id, Broadcast broadcast)
{
var c = mConnectionNodes.GetValueOrDefault(id);
foreach (var node in c)
{
node.Broadcast(broadcast);
}
}
the service
public class SocketsConnectionsCleaningService : BackgroundService
{
private readonly IServiceProvider Povider;
private Timer Timer = null;
private bool isRunning = false;
private readonly ILogger Logger;
public SocketsConnectionsCleaningService(IServiceProvider Provider, ILogger<SocketsConnectionsCleaningService> Logger )
{
this.Povider = Provider;
this.Logger = Logger;
}
protected override Task ExecuteAsync(CancellationToken stoppingToken)
{
Logger.LogInformation("Execute Sync is called ");
Timer = new Timer(DeleteClosedConnections, null, TimeSpan.FromMinutes(0), TimeSpan.FromMinutes(2));
return Task.CompletedTask;
}
private void DeleteClosedConnections(object state)
{
Logger.LogInformation("Clearing ");
if (isRunning)
{
Logger.LogInformation("A Task is Running Return ");
return;
}
isRunning = true;
var connectionManager = Povider.GetService(typeof(WebSocketsManager)) as WebSocketsManager;
connectionManager.ClearUnusedConnections();
isRunning = false;
Logger.LogInformation($"Finished Cleaning !");
}
}
Usage in a controller be like
[HttpGet("ws")]
public async Task SomeRealtimeFunction()
{
if (HttpContext.IsWebSocketsRequest())
{
using var socket = await HttpContext.AcceptSocketRequest();
try
{
await socket.SendString(" Connected! ");
webSocketsManager.RequireNode("Chat Room")
.AddConnection(socket);
var RecieverHelper = socket.GetRecieveResultsHelper();
string str = await RecieverHelper.ReceiveString();
while (!RecieverHelper.Result.CloseStatus.HasValue)
{
webSocketsManager
.Broadcast("Chat Room", new StringBroadcast(str));
str = await RecieverHelper.ReceiveString();
}
}
catch (Exception e)
{
await socket.SendString("Error!");
await socket.SendString(e.Message);
await socket.SendString(e.ToString());
}
}
else
{
HttpContext.Response.StatusCode = 400;
}
}
There is a quite simple case I would like to implement:
I have a base and DLT topics:
MessageBus:
Topic: my_topic
DltTopic: my_dlt_topic
Broker: event-serv:9092
So, those topics are already predefined, I don't need to create them automatically.
The only I need to handle broken messages automatically without retries, because they don't make any sense, so I have something like this:
#KafkaListener(topics = ["#{config.messageBus.topic}"], groupId = "group_id")
#RetryableTopic(
dltStrategy = DltStrategy.FAIL_ON_ERROR,
autoCreateTopics = "false",
attempts = "1"
)
#Throws(IOException::class)
fun consume(rawMessage: String?) {
...
}
#DltHandler
fun processMessage(rawMessage: String?) {
kafkaTemplate.send(config.messageBus.dltTopic, rawMessage)
}
That of course doesn't work properly.
I also tried to specify a kafkaTemplate
#Bean
fun kafkaTemplate(
config: Config,
producerFactory: ProducerFactory<String, String>
): KafkaTemplate<String, String> {
val template = KafkaTemplate(producerFactory)
template.defaultTopic = config.messageBus.dltTopic
return template
}
however, that does not change the situation.
In the end, I believe there is an obvious solution, so I please give me a hint about it.
See the documenation.
#SpringBootApplication
public class So69317126Application {
public static void main(String[] args) {
SpringApplication.run(So69317126Application.class, args);
}
#RetryableTopic(attempts = "1", autoCreateTopics = "false", dltStrategy = DltStrategy.FAIL_ON_ERROR)
#KafkaListener(id = "so69317126", topics = "so69317126")
void listen(String in) {
System.out.println(in);
throw new RuntimeException();
}
#DltHandler
void handler(String in) {
System.out.println("DLT: " + in);
}
#Bean
RetryTopicNamesProviderFactory namer() {
return new RetryTopicNamesProviderFactory() {
#Override
public RetryTopicNamesProvider createRetryTopicNamesProvider(Properties properties) {
if (properties.isMainEndpoint()) {
return new SuffixingRetryTopicNamesProviderFactory.SuffixingRetryTopicNamesProvider(properties) {
#Override
public String getTopicName(String topic) {
return "so69317126";
}
};
}
else if(properties.isDltTopic()) {
return new SuffixingRetryTopicNamesProviderFactory.SuffixingRetryTopicNamesProvider(properties) {
#Override
public String getTopicName(String topic) {
return "so69317126.DLT";
}
};
}
else {
throw new IllegalStateException("Shouldn't get here - attempts is only 1");
}
}
};
}
}
so69317126: partitions assigned: [so69317126-0]
so69317126-dlt: partitions assigned: [so69317126.DLT-0]
foo
DLT: foo
This is a Kafka server configuration so you must set it on the server. The relevant property is:
auto.create.topics.enable (true by default)
When onPerformSync occurs I need the current location but I do not want to set up a separate service that is constantly active requesting location because my SyncAdapter period exponentially backs off such that the periods between syncs could be many hours apart. It would be wasteful to have location requests running between each sync.
I am planning on using a GoogleApiClient and LocationServices.FusedLocationApi.requestLocationUpdates then Thread.sleep(###) the onPerformSync thread until a location is found.
However I have read that requestLocationUpdates needs to be called on the main looper and that it makes callbacks on that thread in which case I expect will it fail to return location results because I am sleeping on the thread which called it.
Will I need to start my own looper thread?
Is there another/better way to get current location from onPerformSync?
Turns out my fears were not justified, my method does work without error. I have put together a handy example class below in case anyone else wants to do this:
public class cSyncLocation implements ConnectionCallbacks, OnConnectionFailedListener, LocationListener
{
// =======================================================
// private vars
// =======================================================
private GoogleApiClient moGoogleApiClient;
private LocationRequest moLocationRequest;
private Location moCurrentLocation;
private static final int kTIMEOUT_MILLISECONDS = 2500;
// =======================================================
// public static vars
// =======================================================
// =======================================================
// public methods
// =======================================================
public void Start(Context oContext)
{
if (moGoogleApiClient == null)
{
moGoogleApiClient = new GoogleApiClient.Builder(oContext)
.addApi(LocationServices.API)
.addConnectionCallbacks(this)
.addOnConnectionFailedListener(this)
.build();
}
if (moLocationRequest == null)
{
moLocationRequest = new LocationRequest();
moLocationRequest.setInterval(1);
moLocationRequest.setFastestInterval(1);
moLocationRequest.setInterval(1);
moLocationRequest.setPriority(LocationRequest.PRIORITY_HIGH_ACCURACY);
}
// Start the connection
if (moGoogleApiClient != null)
{
if (!moGoogleApiClient.isConnecting() && !moGoogleApiClient.isConnected())
moGoogleApiClient.connect();
else if (moCurrentLocation == null)
LocationServices.FusedLocationApi.requestLocationUpdates(moGoogleApiClient, moLocationRequest, this);
}
}
public void Stop()
{
if (moGoogleApiClient != null && moGoogleApiClient.isConnected())
LocationServices.FusedLocationApi.removeLocationUpdates(moGoogleApiClient, this);
if (moGoogleApiClient != null)
moGoogleApiClient.disconnect();
}
public Location GetLocationBlocking(Context oContext)
{
if (moCurrentLocation == null)
{
intTimeout = kTIMEOUT_MILLISECONDS;
Start(oContext);
while(intTimeout > 0 && aFrmLocationActivity.IsLastLocationExpired(oContext))
{
Thread.sleep(100);
intTimeout -= 100;
}
Stop();
}
return moCurrentLocation;
}
// =======================================================
// Location API Events
// =======================================================
#Override
public void onLocationChanged(Location oLocation)
{
if (oLocation != null)
{
moCurrentLocation = oLocation;
}
}
// =======================================================
// Google API Connection Events
// =======================================================
#Override
public void onConnected(Bundle connectionHint)
{
// Connected to Google Play services! The good stuff goes here.
if (moGoogleApiClient != null)
{
Location oLocation = LocationServices.FusedLocationApi.getLastLocation(moGoogleApiClient);
if (oLocation != null)
moCurrentLocation = oLocation;
else
LocationServices.FusedLocationApi.requestLocationUpdates(moGoogleApiClient, moLocationRequest, this);
}
}
#Override
public void onConnectionSuspended(int cause)
{
//...
}
#Override
public void onConnectionFailed(ConnectionResult result)
{
//...
}
}
How to use it, in your onPerformSync method call it like this
cSyncLocation oSyncLocation = new cSyncLocation();
Location oLocation = oSyncLocation.GetLocationBlocking(getContext());
Obviously you will want to add some exception handling and deal with null location result.
I am implementing a simple data analytic functionality with RXJava, where a topic subscriber asynchronously processes the data published to a topic, depositing the output to the Redis.
When a message is received, the Spring component publishes it to an Observable. To avoid blocking the submission I used the RxJava Async to do this asynchronously.
#Override
public void onMessage(final TransactionalMessage message) {
Async.start(new Func0<Void>() {
#Override
public Void call() {
analyser.process(message);
return null;
}
});
}
I have two confusions in implementing other processing parts; 1) creating an asynchronous Observable with buffering 2) Computing different logics in parallel based on message type on list of messages.
After long experiments I found two ways to create the Async Observable and not sure which one is the right and better approach.
Way one,
private static final class Analyzer {
private Subscriber<? super TransactionalMessage> subscriber;
public Analyzer() {
OnSubscribe<TransactionalMessage> f = subscriber -> this.subscriber = subscriber;
Observable.create(f).observeOn(Schedulers.computation())
.buffer(5, TimeUnit.SECONDS, 5, Schedulers.io())
.skipWhile((list) -> list == null || list.isEmpty())
.subscribe(t -> compute(t));
}
public void process(TransactionalMessage message) {
subscriber.onNext(message);
}
}
Way two
private static final class Analyser {
private PublishSubject<TransactionalMessage> subject;
public Analyser() {
subject = PublishSubject.create();
Observable<List<TransactionalMessage>> observable = subject
.buffer(5, TimeUnit.SECONDS, 5, Schedulers.io())
.observeOn(Schedulers.computation());
observable.subscribe(new Observer<List<TransactionalMessage>>() {
#Override
public void onCompleted() {
log.debug("[Analyser] onCompleted(), completed!");
}
#Override
public void onError(Throwable e) {
log.error("[Analyser] onError(), exception, ", e);
}
#Override
public void onNext(List<TransactionalMessage> t) {
compute(t);
}
});
}
public void process(TransactionalMessage message) {
subject.onNext(message);
}
}
The TransactionalMessage comes in different types, so I want to perform different computations based on the types. One approach I tried is filter the list based on every type and process them separately, but this looks so bad and I think does not work in parallel. What way to process them in parallel?
protected void compute(List<TransactionalMessage> messages) {
Observable<TransactionalMessage> observable = Observable
.from(messages);
Observable<String> observable2 = observable
.filter(new Func1<TransactionalMessage, Boolean>() {
#Override
public Boolean call(TransactionalMessage t) {
return t.getMsgType()
.equals(OttMessageType.click.name());
}
}).flatMap(
new Func1<TransactionalMessage, Observable<String>>() {
#Override
public Observable<String> call(
TransactionalMessage t) {
return Observable.just(
t.getMsgType() + t.getAppId());
}
});
Observable<String> observable3 = observable
.filter(new Func1<TransactionalMessage, Boolean>() {
#Override
public Boolean call(TransactionalMessage t) {
return t.getMsgType()
.equals(OttMessageType.image.name());
}
}).flatMap(
new Func1<TransactionalMessage, Observable<String>>() {
#Override
public Observable<String> call(
TransactionalMessage t) {
return Observable.just(
t.getMsgType() + t.getAppId());
}
});
// I sense some code smell in filtering on type and processing it.
Observable.merge(observable2, observable3)
.subscribe(new Action1<String>() {
#Override
public void call(String t) {
// save it to redis
System.out.println(t);
}
});
}
I suggest thinking about Subjects before attempting to use create.
If you want parallel processing done based on some categorization, you could use groupBy along with observeOn to achieve the desired effect:
Observable.range(1, 100)
.groupBy(v -> v % 3)
.flatMap(g ->
g.observeOn(Schedulers.computation())
.reduce(0, (a, b) -> a + b)
.map(v -> g.getKey() + ": " + v)
)
.toBlocking().forEach(System.out::println);
What I am doing currently is adding an item to the Cache and disposing of my object when that object is removed from the Cache. The logic being that it gets removed when memory consumption gets too high. I'm open to outher suggestions but I would like to avoid creating a thread than continually measures memory statistics if possible. Here is my code:
public class WebServiceCache : ConcurrentDictionary<string, WebServiceCacheObject>, IDisposable
{
private WebServiceCache()
{
if (HttpContext.Current != null && HttpContext.Current.Cache != null)
{
HttpContext.Current.Cache.Add("CacheTest", true, null, DateTime.Now.AddYears(1), System.Web.Caching.Cache.NoSlidingExpiration,
System.Web.Caching.CacheItemPriority.Low,
(key, obj, reason) => {
if (reason != System.Web.Caching.CacheItemRemovedReason.Removed)
{
WebServiceCache.Current.ClearCache(50);
}
});
}
}
private static WebServiceCache _current;
public static WebServiceCache Current
{
get
{
if (_current != null && _current.IsDisposed)
{
// Might as well clear it fully
_current = null;
}
if (_current == null)
{
_current = new WebServiceCache();
}
return _current;
}
}
public void ClearCache(short percentage)
{
try
{
if (percentage == 100)
{
this.Dispose();
return;
}
var oldest = _current.Min(c => c.Value.LastAccessed);
var newest = _current.Max(c => c.Value.LastAccessed);
var difference = (newest - oldest).TotalSeconds;
var deleteBefore = oldest.AddSeconds((difference / 100) * percentage);
// LINQ doesn't seem to work very well on concurrent dictionaries
//var toDelete = _current.Where(c => DateTime.Compare(c.Value.LastAccessed,deleteBefore) < 0);
var keys = _current.Keys.ToArray();
foreach (var key in keys)
{
if (DateTime.Compare(_current[key].LastAccessed, deleteBefore) < 0)
{
WebServiceCacheObject tmp;
_current.TryRemove(key, out tmp);
tmp = null;
}
}
keys = null;
}
catch
{
// If we throw an exception here then we are probably really low on memory
_current = null;
GC.Collect();
}
}
public bool IsDisposed { get; set; }
public void Dispose()
{
this.Clear();
HttpContext.Current.Cache.Remove("CacheTest");
this.IsDisposed = true;
}
}
In Global.asax
void context_Error(object sender, EventArgs e)
{
Exception ex = _context.Server.GetLastError();
if (ex.InnerException is OutOfMemoryException)
{
if (_NgageWebControls.classes.Caching.WebServiceCache.Current != null)
{
_NgageWebControls.classes.Caching.WebServiceCache.Current.ClearCache(100);
}
}
}
Thanks,
Joe
You can access the ASP.NET Cache from anywhere in your application as the static property:
HttpRuntime.Cache
You don't need to be in the context of a Request (i.e. don't need HttpContext.Current) to do this.
So you should be using it instead of rolling your own caching solution.